Extended Gossipsub metrics (#1577)

## Issue Addressed

N/A

## Proposed Changes

Adds extended metrics to get a better idea of what is happening at the gossipsub layer of lighthouse. This provides information about mesh statistics per topics, subscriptions and peer scores. 

## Additional Info
This commit is contained in:
Age Manning
2020-09-01 06:59:14 +00:00
parent 8301a984eb
commit fb9d828e5e
11 changed files with 445 additions and 89 deletions

View File

@@ -41,7 +41,7 @@ rand = "0.7.3"
[dependencies.libp2p]
#version = "0.23.0"
git = "https://github.com/sigp/rust-libp2p"
rev = "d0f9d6b9b3fef9616026f3ddf11d75fe9f7a41df"
rev = "03f998022ce2f566a6c6e6c4206bc0ce4d45109f"
default-features = false
features = ["websocket", "identify", "mplex", "noise", "gossipsub", "dns", "tcp-tokio"]

View File

@@ -249,8 +249,28 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
for topic in message.topics(GossipEncoding::default(), self.enr_fork_id.fork_digest) {
match message.encode(GossipEncoding::default()) {
Ok(message_data) => {
if let Err(e) = self.gossipsub.publish(topic.into(), message_data) {
if let Err(e) = self.gossipsub.publish(topic.clone().into(), message_data) {
slog::warn!(self.log, "Could not publish message"; "error" => format!("{:?}", e));
// add to metrics
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
if let Some(v) = metrics::get_int_gauge(
&metrics::FAILED_ATTESTATION_PUBLISHES_PER_SUBNET,
&[&subnet_id.to_string()],
) {
v.inc()
};
}
kind => {
if let Some(v) = metrics::get_int_gauge(
&metrics::FAILED_PUBLISHES_PER_MAIN_TOPIC,
&[&format!("{:?}", kind)],
) {
v.inc()
};
}
}
}
}
Err(e) => crit!(self.log, "Could not publish message"; "error" => e),
@@ -471,23 +491,9 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
}
}
GossipsubEvent::Subscribed { peer_id, topic } => {
if let Some(topic_metric) = metrics::get_int_gauge(
&metrics::GOSSIPSUB_SUBSCRIBED_PEERS_COUNT,
&[topic.as_str()],
) {
topic_metric.inc()
}
self.add_event(BehaviourEvent::PeerSubscribed(peer_id, topic));
}
GossipsubEvent::Unsubscribed { peer_id: _, topic } => {
if let Some(topic_metric) = metrics::get_int_gauge(
&metrics::GOSSIPSUB_SUBSCRIBED_PEERS_COUNT,
&[topic.as_str()],
) {
topic_metric.dec()
}
}
GossipsubEvent::Unsubscribed { .. } => {}
}
}

View File

@@ -19,7 +19,7 @@ pub use behaviour::{BehaviourEvent, PeerRequestId, Request, Response};
pub use config::Config as NetworkConfig;
pub use discovery::{CombinedKeyExt, EnrExt, Eth2Enr};
pub use discv5;
pub use libp2p::gossipsub::{MessageAcceptance, MessageId, Topic, TopicHash};
pub use libp2p::gossipsub::{Gossipsub, MessageAcceptance, MessageId, Topic, TopicHash};
pub use libp2p::{core::ConnectedPoint, PeerId, Swarm};
pub use libp2p::{multiaddr, Multiaddr};
pub use metrics::scrape_discovery_metrics;

View File

@@ -34,9 +34,20 @@ lazy_static! {
"Unsolicited discovery requests per ip per second",
&["Addresses"]
);
pub static ref GOSSIPSUB_SUBSCRIBED_PEERS_COUNT: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_peers_per_topic_count",
"Peers subscribed per topic",
pub static ref PEERS_PER_CLIENT: Result<IntGaugeVec> = try_create_int_gauge_vec(
"libp2p_peers_per_client",
"The connected peers via client implementation",
&["Client"]
);
pub static ref FAILED_ATTESTATION_PUBLISHES_PER_SUBNET: Result<IntGaugeVec> =
try_create_int_gauge_vec(
"gossipsub_failed_attestation_publishes_per_subnet",
"Failed attestation publishes per subnet",
&["subnet"]
);
pub static ref FAILED_PUBLISHES_PER_MAIN_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_failed_publishes_per_main_topic",
"Failed gossip publishes",
&["topic_hash"]
);
}

View File

@@ -20,7 +20,7 @@ pub struct Client {
pub agent_string: Option<String>,
}
#[derive(Clone, Debug, Serialize)]
#[derive(Clone, Debug, Serialize, PartialEq)]
pub enum ClientKind {
/// A lighthouse node (the best kind).
Lighthouse,
@@ -98,6 +98,12 @@ impl std::fmt::Display for Client {
}
}
impl std::fmt::Display for ClientKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
// helper function to identify clients from their agent_version. Returns the client
// kind and it's associated version and the OS kind.
fn client_from_agent_version(agent_version: &str) -> (ClientKind, String, String) {

View File

@@ -239,6 +239,27 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
///
/// This is also called when dialing a peer fails.
pub fn notify_disconnect(&mut self, peer_id: &PeerId) {
// Decrement the PEERS_PER_CLIENT metric
if let Some(kind) = self
.network_globals
.peers
.read()
.peer_info(peer_id)
.and_then(|peer_info| {
if let Connected { .. } = peer_info.connection_status {
Some(peer_info.client.kind.clone())
} else {
None
}
})
{
if let Some(v) =
metrics::get_int_gauge(&metrics::PEERS_PER_CLIENT, &[&kind.to_string()])
{
v.dec()
};
}
self.network_globals.peers.write().disconnect(peer_id);
// remove the ping and status timer for the peer
@@ -296,8 +317,25 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
/// Updates `PeerInfo` with `identify` information.
pub fn identify(&mut self, peer_id: &PeerId, info: &IdentifyInfo) {
if let Some(peer_info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
let previous_kind = peer_info.client.kind.clone();
peer_info.client = client::Client::from_identify_info(info);
peer_info.listening_addresses = info.listen_addrs.clone();
if previous_kind != peer_info.client.kind {
// update the peer client kind metric
if let Some(v) = metrics::get_int_gauge(
&metrics::PEERS_PER_CLIENT,
&[&peer_info.client.kind.to_string()],
) {
v.inc()
};
if let Some(v) = metrics::get_int_gauge(
&metrics::PEERS_PER_CLIENT,
&[&previous_kind.to_string()],
) {
v.dec()
};
}
} else {
crit!(self.log, "Received an Identify response from an unknown peer"; "peer_id" => peer_id.to_string());
}
@@ -551,7 +589,10 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
}
match connection {
ConnectingType::Dialing => peerdb.dialing_peer(peer_id),
ConnectingType::Dialing => {
peerdb.dialing_peer(peer_id);
return true;
}
ConnectingType::IngoingConnected => peerdb.connect_outgoing(peer_id),
ConnectingType::OutgoingConnected => peerdb.connect_ingoing(peer_id),
}
@@ -568,6 +609,21 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
self.network_globals.connected_peers() as i64,
);
// Increment the PEERS_PER_CLIENT metric
if let Some(kind) = self
.network_globals
.peers
.read()
.peer_info(peer_id)
.map(|peer_info| peer_info.client.kind.clone())
{
if let Some(v) =
metrics::get_int_gauge(&metrics::PEERS_PER_CLIENT, &[&kind.to_string()])
{
v.inc()
};
}
true
}

View File

@@ -4,7 +4,7 @@ use super::PeerSyncStatus;
use crate::rpc::MetaData;
use crate::Multiaddr;
use serde::{
ser::{SerializeStructVariant, Serializer},
ser::{SerializeStruct, Serializer},
Serialize,
};
use std::net::IpAddr;
@@ -120,29 +120,51 @@ pub enum PeerConnectionStatus {
/// Serialization for http requests.
impl Serialize for PeerConnectionStatus {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let mut s = serializer.serialize_struct("connection_status", 5)?;
match self {
Connected { n_in, n_out } => {
let mut s = serializer.serialize_struct_variant("", 0, "Connected", 2)?;
s.serialize_field("in", n_in)?;
s.serialize_field("out", n_out)?;
s.serialize_field("status", "connected")?;
s.serialize_field("connections_in", n_in)?;
s.serialize_field("connections_out", n_out)?;
s.serialize_field("last_seen", &0)?;
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
s.end()
}
Disconnected { since } => {
let mut s = serializer.serialize_struct_variant("", 1, "Disconnected", 1)?;
s.serialize_field("since", &since.elapsed().as_secs())?;
s.serialize_field("status", "disconnected")?;
s.serialize_field("connections_in", &0)?;
s.serialize_field("connections_out", &0)?;
s.serialize_field("last_seen", &since.elapsed().as_secs())?;
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
s.end()
}
Banned { since, .. } => {
let mut s = serializer.serialize_struct_variant("", 2, "Banned", 1)?;
s.serialize_field("since", &since.elapsed().as_secs())?;
Banned {
since,
ip_addresses,
} => {
s.serialize_field("status", "banned")?;
s.serialize_field("connections_in", &0)?;
s.serialize_field("connections_out", &0)?;
s.serialize_field("last_seen", &since.elapsed().as_secs())?;
s.serialize_field("banned_ips", &ip_addresses)?;
s.end()
}
Dialing { since } => {
let mut s = serializer.serialize_struct_variant("", 3, "Dialing", 1)?;
s.serialize_field("since", &since.elapsed().as_secs())?;
s.serialize_field("status", "dialing")?;
s.serialize_field("connections_in", &0)?;
s.serialize_field("connections_out", &0)?;
s.serialize_field("last_seen", &since.elapsed().as_secs())?;
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
s.end()
}
Unknown => {
s.serialize_field("status", "unknown")?;
s.serialize_field("connections_in", &0)?;
s.serialize_field("connections_out", &0)?;
s.serialize_field("last_seen", &0)?;
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
s.end()
}
Unknown => serializer.serialize_unit_variant("", 4, "Unknown"),
}
}
}