Extended Gossipsub metrics (#1577)

## Issue Addressed

N/A

## Proposed Changes

Adds extended metrics to get a better idea of what is happening at the gossipsub layer of lighthouse. This provides information about mesh statistics per topics, subscriptions and peer scores. 

## Additional Info
This commit is contained in:
Age Manning
2020-09-01 06:59:14 +00:00
parent 8301a984eb
commit fb9d828e5e
11 changed files with 445 additions and 89 deletions

View File

@@ -20,7 +20,7 @@ pub struct Client {
pub agent_string: Option<String>,
}
#[derive(Clone, Debug, Serialize)]
#[derive(Clone, Debug, Serialize, PartialEq)]
pub enum ClientKind {
/// A lighthouse node (the best kind).
Lighthouse,
@@ -98,6 +98,12 @@ impl std::fmt::Display for Client {
}
}
impl std::fmt::Display for ClientKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
// helper function to identify clients from their agent_version. Returns the client
// kind and it's associated version and the OS kind.
fn client_from_agent_version(agent_version: &str) -> (ClientKind, String, String) {

View File

@@ -239,6 +239,27 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
///
/// This is also called when dialing a peer fails.
pub fn notify_disconnect(&mut self, peer_id: &PeerId) {
// Decrement the PEERS_PER_CLIENT metric
if let Some(kind) = self
.network_globals
.peers
.read()
.peer_info(peer_id)
.and_then(|peer_info| {
if let Connected { .. } = peer_info.connection_status {
Some(peer_info.client.kind.clone())
} else {
None
}
})
{
if let Some(v) =
metrics::get_int_gauge(&metrics::PEERS_PER_CLIENT, &[&kind.to_string()])
{
v.dec()
};
}
self.network_globals.peers.write().disconnect(peer_id);
// remove the ping and status timer for the peer
@@ -296,8 +317,25 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
/// Updates `PeerInfo` with `identify` information.
pub fn identify(&mut self, peer_id: &PeerId, info: &IdentifyInfo) {
if let Some(peer_info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
let previous_kind = peer_info.client.kind.clone();
peer_info.client = client::Client::from_identify_info(info);
peer_info.listening_addresses = info.listen_addrs.clone();
if previous_kind != peer_info.client.kind {
// update the peer client kind metric
if let Some(v) = metrics::get_int_gauge(
&metrics::PEERS_PER_CLIENT,
&[&peer_info.client.kind.to_string()],
) {
v.inc()
};
if let Some(v) = metrics::get_int_gauge(
&metrics::PEERS_PER_CLIENT,
&[&previous_kind.to_string()],
) {
v.dec()
};
}
} else {
crit!(self.log, "Received an Identify response from an unknown peer"; "peer_id" => peer_id.to_string());
}
@@ -551,7 +589,10 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
}
match connection {
ConnectingType::Dialing => peerdb.dialing_peer(peer_id),
ConnectingType::Dialing => {
peerdb.dialing_peer(peer_id);
return true;
}
ConnectingType::IngoingConnected => peerdb.connect_outgoing(peer_id),
ConnectingType::OutgoingConnected => peerdb.connect_ingoing(peer_id),
}
@@ -568,6 +609,21 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
self.network_globals.connected_peers() as i64,
);
// Increment the PEERS_PER_CLIENT metric
if let Some(kind) = self
.network_globals
.peers
.read()
.peer_info(peer_id)
.map(|peer_info| peer_info.client.kind.clone())
{
if let Some(v) =
metrics::get_int_gauge(&metrics::PEERS_PER_CLIENT, &[&kind.to_string()])
{
v.inc()
};
}
true
}

View File

@@ -4,7 +4,7 @@ use super::PeerSyncStatus;
use crate::rpc::MetaData;
use crate::Multiaddr;
use serde::{
ser::{SerializeStructVariant, Serializer},
ser::{SerializeStruct, Serializer},
Serialize,
};
use std::net::IpAddr;
@@ -120,29 +120,51 @@ pub enum PeerConnectionStatus {
/// Serialization for http requests.
impl Serialize for PeerConnectionStatus {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let mut s = serializer.serialize_struct("connection_status", 5)?;
match self {
Connected { n_in, n_out } => {
let mut s = serializer.serialize_struct_variant("", 0, "Connected", 2)?;
s.serialize_field("in", n_in)?;
s.serialize_field("out", n_out)?;
s.serialize_field("status", "connected")?;
s.serialize_field("connections_in", n_in)?;
s.serialize_field("connections_out", n_out)?;
s.serialize_field("last_seen", &0)?;
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
s.end()
}
Disconnected { since } => {
let mut s = serializer.serialize_struct_variant("", 1, "Disconnected", 1)?;
s.serialize_field("since", &since.elapsed().as_secs())?;
s.serialize_field("status", "disconnected")?;
s.serialize_field("connections_in", &0)?;
s.serialize_field("connections_out", &0)?;
s.serialize_field("last_seen", &since.elapsed().as_secs())?;
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
s.end()
}
Banned { since, .. } => {
let mut s = serializer.serialize_struct_variant("", 2, "Banned", 1)?;
s.serialize_field("since", &since.elapsed().as_secs())?;
Banned {
since,
ip_addresses,
} => {
s.serialize_field("status", "banned")?;
s.serialize_field("connections_in", &0)?;
s.serialize_field("connections_out", &0)?;
s.serialize_field("last_seen", &since.elapsed().as_secs())?;
s.serialize_field("banned_ips", &ip_addresses)?;
s.end()
}
Dialing { since } => {
let mut s = serializer.serialize_struct_variant("", 3, "Dialing", 1)?;
s.serialize_field("since", &since.elapsed().as_secs())?;
s.serialize_field("status", "dialing")?;
s.serialize_field("connections_in", &0)?;
s.serialize_field("connections_out", &0)?;
s.serialize_field("last_seen", &since.elapsed().as_secs())?;
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
s.end()
}
Unknown => {
s.serialize_field("status", "unknown")?;
s.serialize_field("connections_in", &0)?;
s.serialize_field("connections_out", &0)?;
s.serialize_field("last_seen", &0)?;
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
s.end()
}
Unknown => serializer.serialize_unit_variant("", 4, "Unknown"),
}
}
}