mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-14 18:32:42 +00:00
Extended Gossipsub metrics (#1577)
## Issue Addressed N/A ## Proposed Changes Adds extended metrics to get a better idea of what is happening at the gossipsub layer of lighthouse. This provides information about mesh statistics per topics, subscriptions and peer scores. ## Additional Info
This commit is contained in:
@@ -41,7 +41,7 @@ rand = "0.7.3"
|
||||
[dependencies.libp2p]
|
||||
#version = "0.23.0"
|
||||
git = "https://github.com/sigp/rust-libp2p"
|
||||
rev = "d0f9d6b9b3fef9616026f3ddf11d75fe9f7a41df"
|
||||
rev = "03f998022ce2f566a6c6e6c4206bc0ce4d45109f"
|
||||
default-features = false
|
||||
features = ["websocket", "identify", "mplex", "noise", "gossipsub", "dns", "tcp-tokio"]
|
||||
|
||||
|
||||
@@ -249,8 +249,28 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
for topic in message.topics(GossipEncoding::default(), self.enr_fork_id.fork_digest) {
|
||||
match message.encode(GossipEncoding::default()) {
|
||||
Ok(message_data) => {
|
||||
if let Err(e) = self.gossipsub.publish(topic.into(), message_data) {
|
||||
if let Err(e) = self.gossipsub.publish(topic.clone().into(), message_data) {
|
||||
slog::warn!(self.log, "Could not publish message"; "error" => format!("{:?}", e));
|
||||
|
||||
// add to metrics
|
||||
match topic.kind() {
|
||||
GossipKind::Attestation(subnet_id) => {
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::FAILED_ATTESTATION_PUBLISHES_PER_SUBNET,
|
||||
&[&subnet_id.to_string()],
|
||||
) {
|
||||
v.inc()
|
||||
};
|
||||
}
|
||||
kind => {
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::FAILED_PUBLISHES_PER_MAIN_TOPIC,
|
||||
&[&format!("{:?}", kind)],
|
||||
) {
|
||||
v.inc()
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => crit!(self.log, "Could not publish message"; "error" => e),
|
||||
@@ -471,23 +491,9 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
}
|
||||
}
|
||||
GossipsubEvent::Subscribed { peer_id, topic } => {
|
||||
if let Some(topic_metric) = metrics::get_int_gauge(
|
||||
&metrics::GOSSIPSUB_SUBSCRIBED_PEERS_COUNT,
|
||||
&[topic.as_str()],
|
||||
) {
|
||||
topic_metric.inc()
|
||||
}
|
||||
|
||||
self.add_event(BehaviourEvent::PeerSubscribed(peer_id, topic));
|
||||
}
|
||||
GossipsubEvent::Unsubscribed { peer_id: _, topic } => {
|
||||
if let Some(topic_metric) = metrics::get_int_gauge(
|
||||
&metrics::GOSSIPSUB_SUBSCRIBED_PEERS_COUNT,
|
||||
&[topic.as_str()],
|
||||
) {
|
||||
topic_metric.dec()
|
||||
}
|
||||
}
|
||||
GossipsubEvent::Unsubscribed { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ pub use behaviour::{BehaviourEvent, PeerRequestId, Request, Response};
|
||||
pub use config::Config as NetworkConfig;
|
||||
pub use discovery::{CombinedKeyExt, EnrExt, Eth2Enr};
|
||||
pub use discv5;
|
||||
pub use libp2p::gossipsub::{MessageAcceptance, MessageId, Topic, TopicHash};
|
||||
pub use libp2p::gossipsub::{Gossipsub, MessageAcceptance, MessageId, Topic, TopicHash};
|
||||
pub use libp2p::{core::ConnectedPoint, PeerId, Swarm};
|
||||
pub use libp2p::{multiaddr, Multiaddr};
|
||||
pub use metrics::scrape_discovery_metrics;
|
||||
|
||||
@@ -34,9 +34,20 @@ lazy_static! {
|
||||
"Unsolicited discovery requests per ip per second",
|
||||
&["Addresses"]
|
||||
);
|
||||
pub static ref GOSSIPSUB_SUBSCRIBED_PEERS_COUNT: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_peers_per_topic_count",
|
||||
"Peers subscribed per topic",
|
||||
pub static ref PEERS_PER_CLIENT: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"libp2p_peers_per_client",
|
||||
"The connected peers via client implementation",
|
||||
&["Client"]
|
||||
);
|
||||
pub static ref FAILED_ATTESTATION_PUBLISHES_PER_SUBNET: Result<IntGaugeVec> =
|
||||
try_create_int_gauge_vec(
|
||||
"gossipsub_failed_attestation_publishes_per_subnet",
|
||||
"Failed attestation publishes per subnet",
|
||||
&["subnet"]
|
||||
);
|
||||
pub static ref FAILED_PUBLISHES_PER_MAIN_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_failed_publishes_per_main_topic",
|
||||
"Failed gossip publishes",
|
||||
&["topic_hash"]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ pub struct Client {
|
||||
pub agent_string: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
#[derive(Clone, Debug, Serialize, PartialEq)]
|
||||
pub enum ClientKind {
|
||||
/// A lighthouse node (the best kind).
|
||||
Lighthouse,
|
||||
@@ -98,6 +98,12 @@ impl std::fmt::Display for Client {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ClientKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self)
|
||||
}
|
||||
}
|
||||
|
||||
// helper function to identify clients from their agent_version. Returns the client
|
||||
// kind and it's associated version and the OS kind.
|
||||
fn client_from_agent_version(agent_version: &str) -> (ClientKind, String, String) {
|
||||
|
||||
@@ -239,6 +239,27 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
///
|
||||
/// This is also called when dialing a peer fails.
|
||||
pub fn notify_disconnect(&mut self, peer_id: &PeerId) {
|
||||
// Decrement the PEERS_PER_CLIENT metric
|
||||
if let Some(kind) = self
|
||||
.network_globals
|
||||
.peers
|
||||
.read()
|
||||
.peer_info(peer_id)
|
||||
.and_then(|peer_info| {
|
||||
if let Connected { .. } = peer_info.connection_status {
|
||||
Some(peer_info.client.kind.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
{
|
||||
if let Some(v) =
|
||||
metrics::get_int_gauge(&metrics::PEERS_PER_CLIENT, &[&kind.to_string()])
|
||||
{
|
||||
v.dec()
|
||||
};
|
||||
}
|
||||
|
||||
self.network_globals.peers.write().disconnect(peer_id);
|
||||
|
||||
// remove the ping and status timer for the peer
|
||||
@@ -296,8 +317,25 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
/// Updates `PeerInfo` with `identify` information.
|
||||
pub fn identify(&mut self, peer_id: &PeerId, info: &IdentifyInfo) {
|
||||
if let Some(peer_info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
|
||||
let previous_kind = peer_info.client.kind.clone();
|
||||
peer_info.client = client::Client::from_identify_info(info);
|
||||
peer_info.listening_addresses = info.listen_addrs.clone();
|
||||
|
||||
if previous_kind != peer_info.client.kind {
|
||||
// update the peer client kind metric
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::PEERS_PER_CLIENT,
|
||||
&[&peer_info.client.kind.to_string()],
|
||||
) {
|
||||
v.inc()
|
||||
};
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::PEERS_PER_CLIENT,
|
||||
&[&previous_kind.to_string()],
|
||||
) {
|
||||
v.dec()
|
||||
};
|
||||
}
|
||||
} else {
|
||||
crit!(self.log, "Received an Identify response from an unknown peer"; "peer_id" => peer_id.to_string());
|
||||
}
|
||||
@@ -551,7 +589,10 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
}
|
||||
|
||||
match connection {
|
||||
ConnectingType::Dialing => peerdb.dialing_peer(peer_id),
|
||||
ConnectingType::Dialing => {
|
||||
peerdb.dialing_peer(peer_id);
|
||||
return true;
|
||||
}
|
||||
ConnectingType::IngoingConnected => peerdb.connect_outgoing(peer_id),
|
||||
ConnectingType::OutgoingConnected => peerdb.connect_ingoing(peer_id),
|
||||
}
|
||||
@@ -568,6 +609,21 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
self.network_globals.connected_peers() as i64,
|
||||
);
|
||||
|
||||
// Increment the PEERS_PER_CLIENT metric
|
||||
if let Some(kind) = self
|
||||
.network_globals
|
||||
.peers
|
||||
.read()
|
||||
.peer_info(peer_id)
|
||||
.map(|peer_info| peer_info.client.kind.clone())
|
||||
{
|
||||
if let Some(v) =
|
||||
metrics::get_int_gauge(&metrics::PEERS_PER_CLIENT, &[&kind.to_string()])
|
||||
{
|
||||
v.inc()
|
||||
};
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ use super::PeerSyncStatus;
|
||||
use crate::rpc::MetaData;
|
||||
use crate::Multiaddr;
|
||||
use serde::{
|
||||
ser::{SerializeStructVariant, Serializer},
|
||||
ser::{SerializeStruct, Serializer},
|
||||
Serialize,
|
||||
};
|
||||
use std::net::IpAddr;
|
||||
@@ -120,29 +120,51 @@ pub enum PeerConnectionStatus {
|
||||
/// Serialization for http requests.
|
||||
impl Serialize for PeerConnectionStatus {
|
||||
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
||||
let mut s = serializer.serialize_struct("connection_status", 5)?;
|
||||
match self {
|
||||
Connected { n_in, n_out } => {
|
||||
let mut s = serializer.serialize_struct_variant("", 0, "Connected", 2)?;
|
||||
s.serialize_field("in", n_in)?;
|
||||
s.serialize_field("out", n_out)?;
|
||||
s.serialize_field("status", "connected")?;
|
||||
s.serialize_field("connections_in", n_in)?;
|
||||
s.serialize_field("connections_out", n_out)?;
|
||||
s.serialize_field("last_seen", &0)?;
|
||||
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
|
||||
s.end()
|
||||
}
|
||||
Disconnected { since } => {
|
||||
let mut s = serializer.serialize_struct_variant("", 1, "Disconnected", 1)?;
|
||||
s.serialize_field("since", &since.elapsed().as_secs())?;
|
||||
s.serialize_field("status", "disconnected")?;
|
||||
s.serialize_field("connections_in", &0)?;
|
||||
s.serialize_field("connections_out", &0)?;
|
||||
s.serialize_field("last_seen", &since.elapsed().as_secs())?;
|
||||
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
|
||||
s.end()
|
||||
}
|
||||
Banned { since, .. } => {
|
||||
let mut s = serializer.serialize_struct_variant("", 2, "Banned", 1)?;
|
||||
s.serialize_field("since", &since.elapsed().as_secs())?;
|
||||
Banned {
|
||||
since,
|
||||
ip_addresses,
|
||||
} => {
|
||||
s.serialize_field("status", "banned")?;
|
||||
s.serialize_field("connections_in", &0)?;
|
||||
s.serialize_field("connections_out", &0)?;
|
||||
s.serialize_field("last_seen", &since.elapsed().as_secs())?;
|
||||
s.serialize_field("banned_ips", &ip_addresses)?;
|
||||
s.end()
|
||||
}
|
||||
Dialing { since } => {
|
||||
let mut s = serializer.serialize_struct_variant("", 3, "Dialing", 1)?;
|
||||
s.serialize_field("since", &since.elapsed().as_secs())?;
|
||||
s.serialize_field("status", "dialing")?;
|
||||
s.serialize_field("connections_in", &0)?;
|
||||
s.serialize_field("connections_out", &0)?;
|
||||
s.serialize_field("last_seen", &since.elapsed().as_secs())?;
|
||||
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
|
||||
s.end()
|
||||
}
|
||||
Unknown => {
|
||||
s.serialize_field("status", "unknown")?;
|
||||
s.serialize_field("connections_in", &0)?;
|
||||
s.serialize_field("connections_out", &0)?;
|
||||
s.serialize_field("last_seen", &0)?;
|
||||
s.serialize_field("banned_ips", &Vec::<IpAddr>::new())?;
|
||||
s.end()
|
||||
}
|
||||
Unknown => serializer.serialize_unit_variant("", 4, "Unknown"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user