mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-14 10:22:38 +00:00
Extended Gossipsub metrics (#1577)
## Issue Addressed N/A ## Proposed Changes Adds extended metrics to get a better idea of what is happening at the gossipsub layer of lighthouse. This provides information about mesh statistics per topics, subscriptions and peer scores. ## Additional Info
This commit is contained in:
@@ -10,13 +10,14 @@ use eth2_libp2p::{
|
||||
rpc::{GoodbyeReason, RPCResponseErrorCode, RequestId},
|
||||
Libp2pEvent, PeerAction, PeerRequestId, PubsubMessage, Request, Response,
|
||||
};
|
||||
use eth2_libp2p::{BehaviourEvent, MessageId, NetworkGlobals, PeerId};
|
||||
use eth2_libp2p::{
|
||||
types::GossipKind, BehaviourEvent, GossipTopic, MessageId, NetworkGlobals, PeerId, TopicHash,
|
||||
};
|
||||
use eth2_libp2p::{MessageAcceptance, Service as LibP2PService};
|
||||
use futures::prelude::*;
|
||||
use rest_types::ValidatorSubscription;
|
||||
use slog::{debug, error, info, o, trace, warn};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||
use store::HotColdDB;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::Delay;
|
||||
@@ -24,6 +25,9 @@ use types::EthSpec;
|
||||
|
||||
mod tests;
|
||||
|
||||
/// The interval (in seconds) that various network metrics will update.
|
||||
const METRIC_UPDATE_INTERVAL: u64 = 1;
|
||||
|
||||
/// Types of messages that the network service can receive.
|
||||
#[derive(Debug)]
|
||||
pub enum NetworkMessage<T: EthSpec> {
|
||||
@@ -91,6 +95,8 @@ pub struct NetworkService<T: BeaconChainTypes> {
|
||||
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
|
||||
/// A delay that expires when a new fork takes place.
|
||||
next_fork_update: Option<Delay>,
|
||||
/// A timer for updating various network metrics.
|
||||
metrics_update: tokio::time::Interval,
|
||||
/// The logger for the network service.
|
||||
log: slog::Logger,
|
||||
}
|
||||
@@ -146,6 +152,9 @@ impl<T: BeaconChainTypes> NetworkService<T> {
|
||||
let attestation_service =
|
||||
AttestationService::new(beacon_chain.clone(), network_globals.clone(), &network_log);
|
||||
|
||||
// create a timer for updating network metrics
|
||||
let metrics_update = tokio::time::interval(Duration::from_secs(METRIC_UPDATE_INTERVAL));
|
||||
|
||||
// create the network service and spawn the task
|
||||
let network_log = network_log.new(o!("service" => "network"));
|
||||
let network_service = NetworkService {
|
||||
@@ -157,6 +166,7 @@ impl<T: BeaconChainTypes> NetworkService<T> {
|
||||
store,
|
||||
network_globals: network_globals.clone(),
|
||||
next_fork_update,
|
||||
metrics_update,
|
||||
log: network_log,
|
||||
};
|
||||
|
||||
@@ -175,9 +185,8 @@ fn spawn_service<T: BeaconChainTypes>(
|
||||
|
||||
// spawn on the current executor
|
||||
executor.spawn_without_exit(async move {
|
||||
// TODO: there is something with this code that prevents cargo fmt from doing anything at
|
||||
// all. Ok, it is worse, the compiler doesn't show errors over this code beyond ast
|
||||
// checking
|
||||
|
||||
let mut metric_update_counter = 0;
|
||||
loop {
|
||||
// build the futures to check simultaneously
|
||||
tokio::select! {
|
||||
@@ -206,6 +215,17 @@ fn spawn_service<T: BeaconChainTypes>(
|
||||
info!(service.log, "Network service shutdown");
|
||||
return;
|
||||
}
|
||||
_ = service.metrics_update.next() => {
|
||||
// update various network metrics
|
||||
metric_update_counter +=1;
|
||||
if metric_update_counter* 1000 % T::EthSpec::default_spec().milliseconds_per_slot == 0 {
|
||||
// if a slot has occurred, reset the metrics
|
||||
let _ = metrics::ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
}
|
||||
update_gossip_metrics::<T::EthSpec>(&service.libp2p.swarm.gs());
|
||||
}
|
||||
// handle a message sent to the network
|
||||
Some(message) = service.network_recv.recv() => {
|
||||
match message {
|
||||
@@ -424,7 +444,11 @@ fn expose_publish_metrics<T: EthSpec>(messages: &[PubsubMessage<T>]) {
|
||||
for message in messages {
|
||||
match message {
|
||||
PubsubMessage::BeaconBlock(_) => metrics::inc_counter(&metrics::GOSSIP_BLOCKS_TX),
|
||||
PubsubMessage::Attestation(_) => {
|
||||
PubsubMessage::Attestation(subnet_id) => {
|
||||
metrics::inc_counter_vec(
|
||||
&metrics::ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT,
|
||||
&[&subnet_id.0.to_string()],
|
||||
);
|
||||
metrics::inc_counter(&metrics::GOSSIP_UNAGGREGATED_ATTESTATIONS_TX)
|
||||
}
|
||||
PubsubMessage::AggregateAndProofAttestation(_) => {
|
||||
@@ -448,3 +472,163 @@ fn expose_receive_metrics<T: EthSpec>(message: &PubsubMessage<T>) {
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn update_gossip_metrics<T: EthSpec>(gossipsub: ð2_libp2p::Gossipsub) {
|
||||
// Clear the metrics
|
||||
let _ = metrics::PEERS_PER_PROTOCOL
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = metrics::PEERS_PER_PROTOCOL
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = metrics::MESH_PEERS_PER_MAIN_TOPIC
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
|
||||
// reset the mesh peers, showing all subnets
|
||||
for subnet_id in 0..T::default_spec().attestation_subnet_count {
|
||||
let _ = metrics::get_int_gauge(
|
||||
&metrics::MESH_PEERS_PER_SUBNET_TOPIC,
|
||||
&[&subnet_id.to_string()],
|
||||
)
|
||||
.map(|v| v.set(0));
|
||||
|
||||
let _ = metrics::get_int_gauge(
|
||||
&metrics::GOSSIPSUB_SUBSCRIBED_SUBNET_TOPIC,
|
||||
&[&subnet_id.to_string()],
|
||||
)
|
||||
.map(|v| v.set(0));
|
||||
|
||||
let _ = metrics::get_int_gauge(
|
||||
&metrics::GOSSIPSUB_SUBSCRIBED_PEERS_SUBNET_TOPIC,
|
||||
&[&subnet_id.to_string()],
|
||||
)
|
||||
.map(|v| v.set(0));
|
||||
}
|
||||
|
||||
// Subnet topics subscribed to
|
||||
for topic_hash in gossipsub.topics() {
|
||||
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
|
||||
if let GossipKind::Attestation(subnet_id) = topic.kind() {
|
||||
let _ = metrics::get_int_gauge(
|
||||
&metrics::GOSSIPSUB_SUBSCRIBED_SUBNET_TOPIC,
|
||||
&[&subnet_id.to_string()],
|
||||
)
|
||||
.map(|v| v.set(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Peers per subscribed subnet
|
||||
let mut peers_per_topic: HashMap<TopicHash, usize> = HashMap::new();
|
||||
for (peer_id, topics) in gossipsub.all_peers() {
|
||||
for topic_hash in topics {
|
||||
*peers_per_topic.entry(topic_hash.clone()).or_default() += 1;
|
||||
|
||||
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
|
||||
match topic.kind() {
|
||||
GossipKind::Attestation(subnet_id) => {
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::GOSSIPSUB_SUBSCRIBED_PEERS_SUBNET_TOPIC,
|
||||
&[&subnet_id.to_string()],
|
||||
) {
|
||||
v.inc()
|
||||
};
|
||||
|
||||
// average peer scores
|
||||
if let Some(score) = gossipsub.peer_score(peer_id) {
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
|
||||
&[&subnet_id.to_string()],
|
||||
) {
|
||||
v.add(score as i64)
|
||||
};
|
||||
}
|
||||
}
|
||||
kind => {
|
||||
// main topics
|
||||
if let Some(score) = gossipsub.peer_score(peer_id) {
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
|
||||
&[&format!("{:?}", kind)],
|
||||
) {
|
||||
v.add(score as i64)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// adjust to average scores by dividing by number of peers
|
||||
for (topic_hash, peers) in peers_per_topic.iter() {
|
||||
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
|
||||
match topic.kind() {
|
||||
GossipKind::Attestation(subnet_id) => {
|
||||
// average peer scores
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
|
||||
&[&subnet_id.to_string()],
|
||||
) {
|
||||
v.set(v.get() / (*peers as i64))
|
||||
};
|
||||
}
|
||||
kind => {
|
||||
// main topics
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
|
||||
&[&format!("{:?}", kind)],
|
||||
) {
|
||||
v.set(v.get() / (*peers as i64))
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// mesh peers
|
||||
for topic_hash in gossipsub.topics() {
|
||||
let peers = gossipsub.mesh_peers(&topic_hash).count();
|
||||
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
|
||||
match topic.kind() {
|
||||
GossipKind::Attestation(subnet_id) => {
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::MESH_PEERS_PER_SUBNET_TOPIC,
|
||||
&[&subnet_id.to_string()],
|
||||
) {
|
||||
v.set(peers as i64)
|
||||
};
|
||||
}
|
||||
kind => {
|
||||
// main topics
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::MESH_PEERS_PER_MAIN_TOPIC,
|
||||
&[&format!("{:?}", kind)],
|
||||
) {
|
||||
v.set(peers as i64)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// protocol peers
|
||||
let mut peers_per_protocol: HashMap<String, i64> = HashMap::new();
|
||||
for (_peer, protocol) in gossipsub.peer_protocol() {
|
||||
*peers_per_protocol.entry(protocol.to_string()).or_default() += 1;
|
||||
}
|
||||
|
||||
for (protocol, peers) in peers_per_protocol.iter() {
|
||||
if let Some(v) =
|
||||
metrics::get_int_gauge(&metrics::PEERS_PER_PROTOCOL, &[&protocol.to_string()])
|
||||
{
|
||||
v.set(*peers)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user