Additional networking metrics (#2549)

Adds additional metrics for network monitoring and evaluation.


Co-authored-by: Mark Mackey <mark@sigmaprime.io>
This commit is contained in:
Age Manning
2021-12-22 06:17:14 +00:00
parent 60d917d9e9
commit 81c667b58e
29 changed files with 877 additions and 1158 deletions

View File

@@ -4,216 +4,42 @@ use beacon_chain::{
};
use fnv::FnvHashMap;
pub use lighthouse_metrics::*;
use lighthouse_network::PubsubMessage;
use lighthouse_network::{
types::GossipKind, BandwidthSinks, GossipTopic, Gossipsub, NetworkGlobals, TopicHash,
types::GossipKind, BandwidthSinks, GossipTopic, Gossipsub, NetworkGlobals,
};
use std::{collections::HashMap, sync::Arc};
use std::sync::Arc;
use strum::AsStaticRef;
use types::{
consts::altair::SYNC_COMMITTEE_SUBNET_COUNT, subnet_id::subnet_id_to_string,
sync_subnet_id::sync_subnet_id_to_string, EthSpec,
};
use types::EthSpec;
lazy_static! {
/*
* Gossip subnets and scoring
*/
pub static ref PEERS_PER_PROTOCOL: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_peers_per_protocol",
"Peers via supported protocol",
&["protocol"]
);
pub static ref GOSSIPSUB_SUBSCRIBED_ATTESTATION_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_subscribed_attestation_subnets",
"Attestation subnets currently subscribed to",
&["subnet"]
);
pub static ref GOSSIPSUB_SUBSCRIBED_SYNC_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_subscribed_sync_subnets",
"Sync subnets currently subscribed to",
&["subnet"]
);
pub static ref GOSSIPSUB_SUBSCRIBED_PEERS_ATTESTATION_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_peers_per_attestation_subnet_topic_count",
"Peers subscribed per attestation subnet topic",
&["subnet"]
);
pub static ref GOSSIPSUB_SUBSCRIBED_PEERS_SYNC_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_peers_per_sync_subnet_topic_count",
"Peers subscribed per sync subnet topic",
&["subnet"]
);
pub static ref MESH_PEERS_PER_MAIN_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_mesh_peers_per_main_topic",
"Mesh peers per main topic",
&["topic_hash"]
);
pub static ref MESH_PEERS_PER_ATTESTATION_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_mesh_peers_per_subnet_topic",
"Mesh peers per subnet topic",
&["subnet"]
);
pub static ref MESH_PEERS_PER_SYNC_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
"gossipsub_mesh_peers_per_subnet_topic",
"Mesh peers per subnet topic",
&["subnet"]
);
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_avg_peer_score_per_topic",
"Average peer's score per topic",
&["topic_hash"]
);
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_ATTESTATION_SUBNET_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_avg_peer_score_per_attestation_subnet_topic",
"Average peer's score per attestation subnet topic",
&["subnet"]
);
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_SYNC_SUBNET_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_avg_peer_score_per_sync_subnet_topic",
"Average peer's score per sync committee subnet topic",
&["subnet"]
);
pub static ref ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT: Result<IntCounterVec> = try_create_int_counter_vec(
"gossipsub_attestations_published_per_subnet_per_slot",
"Failed attestation publishes per subnet",
&["subnet"]
);
pub static ref SCORES_BELOW_ZERO_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_scores_below_zero_per_client",
"Relative number of scores below zero per client",
&["Client"]
);
pub static ref SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_scores_below_gossip_threshold_per_client",
"Relative number of scores below gossip threshold per client",
&["Client"]
);
pub static ref SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_scores_below_publish_threshold_per_client",
"Relative number of scores below publish threshold per client",
&["Client"]
);
pub static ref SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_scores_below_greylist_threshold_per_client",
"Relative number of scores below greylist threshold per client",
&["Client"]
);
pub static ref MIN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_min_scores_per_client",
"Minimum scores per client",
&["Client"]
);
pub static ref MEDIAN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_median_scores_per_client",
"Median scores per client",
&["Client"]
);
pub static ref MEAN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_mean_scores_per_client",
"Mean scores per client",
&["Client"]
);
pub static ref MAX_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_max_scores_per_client",
"Max scores per client",
&["Client"]
);
pub static ref BEACON_BLOCK_MESH_PEERS_PER_CLIENT: Result<IntGaugeVec> =
try_create_int_gauge_vec(
"block_mesh_peers_per_client",
"Number of mesh peers for BeaconBlock topic per client",
&["Client"]
);
try_create_int_gauge_vec(
"block_mesh_peers_per_client",
"Number of mesh peers for BeaconBlock topic per client",
&["Client"]
);
pub static ref BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT: Result<IntGaugeVec> =
try_create_int_gauge_vec(
"beacon_aggregate_and_proof_mesh_peers_per_client",
"Number of mesh peers for BeaconAggregateAndProof topic per client",
&["Client"]
);
}
lazy_static! {
/*
* Gossip Rx
*/
pub static ref GOSSIP_BLOCKS_RX: Result<IntCounter> = try_create_int_counter(
"gossipsub_blocks_rx_total",
"Count of gossip blocks received"
);
pub static ref GOSSIP_UNAGGREGATED_ATTESTATIONS_RX: Result<IntCounter> = try_create_int_counter(
"gossipsub_unaggregated_attestations_rx_total",
"Count of gossip unaggregated attestations received"
);
pub static ref GOSSIP_AGGREGATED_ATTESTATIONS_RX: Result<IntCounter> = try_create_int_counter(
"gossipsub_aggregated_attestations_rx_total",
"Count of gossip aggregated attestations received"
);
pub static ref GOSSIP_SYNC_COMMITTEE_MESSAGE_RX: Result<IntCounter> = try_create_int_counter(
"gossipsub_sync_committee_message_rx_total",
"Count of gossip sync committee messages received"
);
pub static ref GOSSIP_SYNC_COMMITTEE_CONTRIBUTION_RX: Result<IntCounter> = try_create_int_counter(
"gossipsub_sync_committee_contribution_received_total",
"Count of gossip sync committee contributions received"
);
/*
* Gossip Tx
*/
pub static ref GOSSIP_BLOCKS_TX: Result<IntCounter> = try_create_int_counter(
"gossipsub_blocks_tx_total",
"Count of gossip blocks transmitted"
);
pub static ref GOSSIP_UNAGGREGATED_ATTESTATIONS_TX: Result<IntCounter> = try_create_int_counter(
"gossipsub_unaggregated_attestations_tx_total",
"Count of gossip unaggregated attestations transmitted"
);
pub static ref GOSSIP_AGGREGATED_ATTESTATIONS_TX: Result<IntCounter> = try_create_int_counter(
"gossipsub_aggregated_attestations_tx_total",
"Count of gossip aggregated attestations transmitted"
);
pub static ref GOSSIP_SYNC_COMMITTEE_MESSAGE_TX: Result<IntCounter> = try_create_int_counter(
"gossipsub_sync_committee_message_tx_total",
"Count of gossip sync committee messages transmitted"
);
pub static ref GOSSIP_SYNC_COMMITTEE_CONTRIBUTION_TX: Result<IntCounter> = try_create_int_counter(
"gossipsub_sync_committee_contribution_tx_total",
"Count of gossip sync committee contributions transmitted"
);
/*
* Attestation subnet subscriptions
*/
pub static ref SUBNET_SUBSCRIPTION_REQUESTS: Result<IntCounter> = try_create_int_counter(
"gossipsub_attestation_subnet_subscriptions_total",
"validator_attestation_subnet_subscriptions_total",
"Count of validator attestation subscription requests."
);
pub static ref SUBNET_SUBSCRIPTION_AGGREGATOR_REQUESTS: Result<IntCounter> = try_create_int_counter(
"gossipsub_subnet_subscriptions_aggregator_total",
"validator_subnet_subscriptions_aggregator_total",
"Count of validator subscription requests where the subscriber is an aggregator."
);
/*
* Sync committee subnet subscriptions
*/
pub static ref SYNC_COMMITTEE_SUBSCRIPTION_REQUESTS: Result<IntCounter> = try_create_int_counter(
"gossipsub_sync_committee_subnet_subscriptions_total",
pub static ref SYNC_COMMITTEE_SUBSCRIPTION_REQUESTS: Result<IntCounter> = try_create_int_counter(
"validator_sync_committee_subnet_subscriptions_total",
"Count of validator sync committee subscription requests."
);
@@ -406,14 +232,13 @@ lazy_static! {
"beacon_processor_sync_contribution_verified_total",
"Total number of sync committee contributions verified for gossip."
);
pub static ref BEACON_PROCESSOR_SYNC_CONTRIBUTION_IMPORTED_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_processor_sync_contribution_imported_total",
"Total number of sync committee contributions imported to fork choice, etc."
);
}
lazy_static! {
/// Errors and Debugging Stats
pub static ref GOSSIP_ATTESTATION_ERRORS_PER_TYPE: Result<IntCounterVec> =
try_create_int_counter_vec(
"gossipsub_attestation_errors_per_type",
@@ -426,8 +251,16 @@ lazy_static! {
"Gossipsub sync_committee errors per error type",
&["type"]
);
}
lazy_static! {
/*
* Bandwidth metrics
*/
pub static ref INBOUND_LIBP2P_BYTES: Result<IntGauge> =
try_create_int_gauge("libp2p_inbound_bytes", "The inbound bandwidth over libp2p");
pub static ref OUTBOUND_LIBP2P_BYTES: Result<IntGauge> = try_create_int_gauge(
"libp2p_outbound_bytes",
"The outbound bandwidth over libp2p"
@@ -436,18 +269,8 @@ lazy_static! {
"libp2p_total_bandwidth",
"The total inbound/outbound bandwidth over libp2p"
);
}
pub fn update_bandwidth_metrics(bandwidth: Arc<BandwidthSinks>) {
set_gauge(&INBOUND_LIBP2P_BYTES, bandwidth.total_inbound() as i64);
set_gauge(&OUTBOUND_LIBP2P_BYTES, bandwidth.total_outbound() as i64);
set_gauge(
&TOTAL_LIBP2P_BANDWIDTH,
(bandwidth.total_inbound() + bandwidth.total_outbound()) as i64,
);
}
lazy_static! {
/*
* Sync related metrics
*/
@@ -489,11 +312,21 @@ lazy_static! {
);
pub static ref BEACON_PROCESSOR_REPROCESSING_QUEUE_EXPIRED_ATTESTATIONS: Result<IntCounter> = try_create_int_counter(
"beacon_processor_reprocessing_queue_expired_attestations",
"Number of queued attestations which have expired before a matching block has been found"
"Number of queued attestations which have expired before a matching block has been found."
);
pub static ref BEACON_PROCESSOR_REPROCESSING_QUEUE_MATCHED_ATTESTATIONS: Result<IntCounter> = try_create_int_counter(
"beacon_processor_reprocessing_queue_matched_attestations",
"Number of queued attestations where as matching block has been imported"
"Number of queued attestations where as matching block has been imported."
);
}
pub fn update_bandwidth_metrics(bandwidth: Arc<BandwidthSinks>) {
set_gauge(&INBOUND_LIBP2P_BYTES, bandwidth.total_inbound() as i64);
set_gauge(&OUTBOUND_LIBP2P_BYTES, bandwidth.total_outbound() as i64);
set_gauge(
&TOTAL_LIBP2P_BANDWIDTH,
(bandwidth.total_inbound() + bandwidth.total_outbound()) as i64,
);
}
@@ -505,402 +338,51 @@ pub fn register_sync_committee_error(error: &SyncCommitteeError) {
inc_counter_vec(&GOSSIP_SYNC_COMMITTEE_ERRORS_PER_TYPE, &[error.as_ref()]);
}
/// Inspects the `messages` that were being sent to the network and updates Prometheus metrics.
pub fn expose_publish_metrics<T: EthSpec>(messages: &[PubsubMessage<T>]) {
for message in messages {
match message {
PubsubMessage::BeaconBlock(_) => inc_counter(&GOSSIP_BLOCKS_TX),
PubsubMessage::Attestation(subnet_id) => {
inc_counter_vec(
&ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT,
&[subnet_id.0.as_ref()],
);
inc_counter(&GOSSIP_UNAGGREGATED_ATTESTATIONS_TX)
}
PubsubMessage::AggregateAndProofAttestation(_) => {
inc_counter(&GOSSIP_AGGREGATED_ATTESTATIONS_TX)
}
PubsubMessage::SyncCommitteeMessage(_) => {
inc_counter(&GOSSIP_SYNC_COMMITTEE_MESSAGE_TX)
}
PubsubMessage::SignedContributionAndProof(_) => {
inc_counter(&GOSSIP_SYNC_COMMITTEE_CONTRIBUTION_TX)
}
_ => {}
}
}
}
/// Inspects a `message` received from the network and updates Prometheus metrics.
pub fn expose_receive_metrics<T: EthSpec>(message: &PubsubMessage<T>) {
match message {
PubsubMessage::BeaconBlock(_) => inc_counter(&GOSSIP_BLOCKS_RX),
PubsubMessage::Attestation(_) => inc_counter(&GOSSIP_UNAGGREGATED_ATTESTATIONS_RX),
PubsubMessage::AggregateAndProofAttestation(_) => {
inc_counter(&GOSSIP_AGGREGATED_ATTESTATIONS_RX)
}
PubsubMessage::SyncCommitteeMessage(_) => inc_counter(&GOSSIP_SYNC_COMMITTEE_MESSAGE_RX),
PubsubMessage::SignedContributionAndProof(_) => {
inc_counter(&GOSSIP_SYNC_COMMITTEE_CONTRIBUTION_RX)
}
_ => {}
}
}
pub fn update_gossip_metrics<T: EthSpec>(
gossipsub: &Gossipsub,
network_globals: &Arc<NetworkGlobals<T>>,
) {
// Clear the metrics
let _ = PEERS_PER_PROTOCOL.as_ref().map(|gauge| gauge.reset());
let _ = PEERS_PER_PROTOCOL.as_ref().map(|gauge| gauge.reset());
let _ = MESH_PEERS_PER_MAIN_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = AVG_GOSSIPSUB_PEER_SCORE_PER_ATTESTATION_SUBNET_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = AVG_GOSSIPSUB_PEER_SCORE_PER_SYNC_SUBNET_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = SCORES_BELOW_ZERO_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = MIN_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
let _ = MEDIAN_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
let _ = MEAN_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
let _ = MAX_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
let _ = BEACON_BLOCK_MESH_PEERS_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
// reset the mesh peers, showing all subnets
for subnet_id in 0..T::default_spec().attestation_subnet_count {
let _ = get_int_gauge(
&MESH_PEERS_PER_ATTESTATION_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
let _ = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_ATTESTATION_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
let _ = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_PEERS_ATTESTATION_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
}
for subnet_id in 0..SYNC_COMMITTEE_SUBNET_COUNT {
let _ = get_int_gauge(
&MESH_PEERS_PER_SYNC_SUBNET_TOPIC,
&[sync_subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
let _ = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_SYNC_SUBNET_TOPIC,
&[sync_subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
let _ = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_PEERS_SYNC_SUBNET_TOPIC,
&[sync_subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
}
// Subnet topics subscribed to
for topic_hash in gossipsub.topics() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
if let GossipKind::Attestation(subnet_id) = topic.kind() {
let _ = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_ATTESTATION_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
)
.map(|v| v.set(1));
}
}
}
// Peers per subscribed subnet
let mut peers_per_topic: HashMap<TopicHash, usize> = HashMap::new();
for (peer_id, topics) in gossipsub.all_peers() {
for topic_hash in topics {
*peers_per_topic.entry(topic_hash.clone()).or_default() += 1;
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
if let Some(v) = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_PEERS_ATTESTATION_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.inc()
};
// average peer scores
if let Some(score) = gossipsub.peer_score(peer_id) {
if let Some(v) = get_gauge(
&AVG_GOSSIPSUB_PEER_SCORE_PER_ATTESTATION_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.add(score)
};
}
}
GossipKind::SyncCommitteeMessage(subnet_id) => {
if let Some(v) = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_PEERS_SYNC_SUBNET_TOPIC,
&[sync_subnet_id_to_string(subnet_id.into())],
) {
v.inc()
};
// average peer scores
if let Some(score) = gossipsub.peer_score(peer_id) {
if let Some(v) = get_gauge(
&AVG_GOSSIPSUB_PEER_SCORE_PER_SYNC_SUBNET_TOPIC,
&[sync_subnet_id_to_string(subnet_id.into())],
) {
v.add(score)
};
}
}
kind => {
// main topics
if let Some(score) = gossipsub.peer_score(peer_id) {
if let Some(v) = get_gauge(
&AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
&[kind.as_ref()],
) {
v.add(score)
};
}
}
}
}
}
}
// adjust to average scores by dividing by number of peers
for (topic_hash, peers) in peers_per_topic.iter() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
// average peer scores
if let Some(v) = get_gauge(
&AVG_GOSSIPSUB_PEER_SCORE_PER_ATTESTATION_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.set(v.get() / (*peers as f64))
};
}
GossipKind::SyncCommitteeMessage(subnet_id) => {
// average peer scores
if let Some(v) = get_gauge(
&AVG_GOSSIPSUB_PEER_SCORE_PER_SYNC_SUBNET_TOPIC,
&[sync_subnet_id_to_string(subnet_id.into())],
) {
v.set(v.get() / (*peers as f64))
};
}
kind => {
// main topics
if let Some(v) =
get_gauge(&AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC, &[kind.as_ref()])
{
v.set(v.get() / (*peers as f64))
};
}
}
}
}
// mesh peers
for topic_hash in gossipsub.topics() {
let peers = gossipsub.mesh_peers(topic_hash).count();
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
if let Some(v) = get_int_gauge(
&MESH_PEERS_PER_ATTESTATION_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.set(peers as i64)
};
}
GossipKind::SyncCommitteeMessage(subnet_id) => {
if let Some(v) = get_int_gauge(
&MESH_PEERS_PER_SYNC_SUBNET_TOPIC,
&[sync_subnet_id_to_string(subnet_id.into())],
) {
v.set(peers as i64)
};
}
kind => {
// main topics
if let Some(v) = get_int_gauge(&MESH_PEERS_PER_MAIN_TOPIC, &[kind.as_ref()]) {
v.set(peers as i64)
};
}
}
}
}
// protocol peers
let mut peers_per_protocol: HashMap<&'static str, i64> = HashMap::new();
for (_peer, protocol) in gossipsub.peer_protocol() {
*peers_per_protocol
.entry(protocol.as_static_ref())
.or_default() += 1;
}
for (protocol, peers) in peers_per_protocol.iter() {
if let Some(v) = get_int_gauge(&PEERS_PER_PROTOCOL, &[protocol]) {
v.set(*peers)
};
}
let mut peer_to_client = HashMap::new();
let mut scores_per_client: HashMap<&'static str, Vec<f64>> = HashMap::new();
{
let peers = network_globals.peers.read();
for (peer_id, _) in gossipsub.all_peers() {
let client = peers
.peer_info(peer_id)
.map(|peer_info| peer_info.client().kind.as_static())
.unwrap_or_else(|| "Unknown");
peer_to_client.insert(peer_id, client);
let score = gossipsub.peer_score(peer_id).unwrap_or(0.0);
scores_per_client.entry(client).or_default().push(score);
}
}
// mesh peers per client
// Mesh peers per client
for topic_hash in gossipsub.topics() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(_subnet_id) => {}
GossipKind::BeaconBlock => {
for peer in gossipsub.mesh_peers(topic_hash) {
if let Some(client) = peer_to_client.get(peer) {
if let Some(v) =
get_int_gauge(&BEACON_BLOCK_MESH_PEERS_PER_CLIENT, &[client])
{
v.inc()
};
}
for peer_id in gossipsub.mesh_peers(topic_hash) {
let client = network_globals
.peers
.read()
.peer_info(peer_id)
.map(|peer_info| peer_info.client().kind.as_static())
.unwrap_or_else(|| "Unknown");
if let Some(v) =
get_int_gauge(&BEACON_BLOCK_MESH_PEERS_PER_CLIENT, &[client])
{
v.inc()
};
}
}
GossipKind::BeaconAggregateAndProof => {
for peer in gossipsub.mesh_peers(topic_hash) {
if let Some(client) = peer_to_client.get(peer) {
if let Some(v) = get_int_gauge(
&BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT,
&[client],
) {
v.inc()
};
}
for peer_id in gossipsub.mesh_peers(topic_hash) {
let client = network_globals
.peers
.read()
.peer_info(peer_id)
.map(|peer_info| peer_info.client().kind.as_static())
.unwrap_or_else(|| "Unknown");
if let Some(v) = get_int_gauge(
&BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT,
&[client],
) {
v.inc()
};
}
}
_ => (),
GossipKind::SyncCommitteeMessage(_subnet_id) => {}
_kind => {}
}
}
}
for (client, scores) in scores_per_client.into_iter() {
let c = &[client];
let len = scores.len();
if len > 0 {
let mut below0 = 0;
let mut below_gossip_threshold = 0;
let mut below_publish_threshold = 0;
let mut below_greylist_threshold = 0;
let mut min = f64::INFINITY;
let mut sum = 0.0;
let mut max = f64::NEG_INFINITY;
let count = scores.len() as f64;
for &score in &scores {
if score < 0.0 {
below0 += 1;
}
if score < -4000.0 {
//TODO not hardcode
below_gossip_threshold += 1;
}
if score < -8000.0 {
//TODO not hardcode
below_publish_threshold += 1;
}
if score < -16000.0 {
//TODO not hardcode
below_greylist_threshold += 1;
}
if score < min {
min = score;
}
if score > max {
max = score;
}
sum += score;
}
let median = if len == 0 {
0.0
} else if len % 2 == 0 {
(scores[len / 2 - 1] + scores[len / 2]) / 2.0
} else {
scores[len / 2]
};
set_gauge_entry(&SCORES_BELOW_ZERO_PER_CLIENT, c, below0 as f64 / count);
set_gauge_entry(
&SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT,
c,
below_gossip_threshold as f64 / count,
);
set_gauge_entry(
&SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT,
c,
below_publish_threshold as f64 / count,
);
set_gauge_entry(
&SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT,
c,
below_greylist_threshold as f64 / count,
);
set_gauge_entry(&MIN_SCORES_PER_CLIENT, c, min);
set_gauge_entry(&MEDIAN_SCORES_PER_CLIENT, c, median);
set_gauge_entry(&MEAN_SCORES_PER_CLIENT, c, sum / count);
set_gauge_entry(&MAX_SCORES_PER_CLIENT, c, max);
}
}
}
pub fn update_sync_metrics<T: EthSpec>(network_globals: &Arc<NetworkGlobals<T>>) {

View File

@@ -9,15 +9,18 @@ use crate::{
use beacon_chain::{BeaconChain, BeaconChainError, BeaconChainTypes};
use futures::future::OptionFuture;
use futures::prelude::*;
use lighthouse_network::{
open_metrics_client::registry::Registry, MessageAcceptance, Service as LibP2PService,
};
use lighthouse_network::{
rpc::{GoodbyeReason, RPCResponseErrorCode, RequestId},
Libp2pEvent, PeerAction, PeerRequestId, PubsubMessage, ReportSource, Request, Response, Subnet,
Context, Libp2pEvent, PeerAction, PeerRequestId, PubsubMessage, ReportSource, Request,
Response, Subnet,
};
use lighthouse_network::{
types::{GossipEncoding, GossipTopic},
BehaviourEvent, MessageId, NetworkGlobals, PeerId,
};
use lighthouse_network::{MessageAcceptance, Service as LibP2PService};
use slog::{crit, debug, error, info, o, trace, warn};
use std::{net::SocketAddr, pin::Pin, sync::Arc, time::Duration};
use store::HotColdDB;
@@ -32,7 +35,7 @@ use types::{
mod tests;
/// The interval (in seconds) that various network metrics will update.
const METRIC_UPDATE_INTERVAL: u64 = 1;
const METRIC_UPDATE_INTERVAL: u64 = 5;
/// Number of slots before the fork when we should subscribe to the new fork topics.
const SUBSCRIBE_DELAY_SLOTS: u64 = 2;
/// Delay after a fork where we unsubscribe from pre-fork topics.
@@ -154,6 +157,7 @@ impl<T: BeaconChainTypes> NetworkService<T> {
beacon_chain: Arc<BeaconChain<T>>,
config: &NetworkConfig,
executor: task_executor::TaskExecutor,
gossipsub_registry: Option<&'_ mut Registry>,
) -> error::Result<(
Arc<NetworkGlobals<T::EthSpec>>,
mpsc::UnboundedSender<NetworkMessage<T::EthSpec>>,
@@ -199,16 +203,18 @@ impl<T: BeaconChainTypes> NetworkService<T> {
debug!(network_log, "Current fork"; "fork_name" => ?fork_context.current_fork());
// launch libp2p service
let (network_globals, mut libp2p) = LibP2PService::new(
executor.clone(),
// construct the libp2p service context
let service_context = Context {
config,
enr_fork_id,
&network_log,
fork_context.clone(),
&beacon_chain.spec,
)
.await?;
fork_context: fork_context.clone(),
chain_spec: &beacon_chain.spec,
gossipsub_registry,
};
// launch libp2p service
let (network_globals, mut libp2p) =
LibP2PService::new(executor.clone(), service_context, &network_log).await?;
// Repopulate the DHT with stored ENR's if discovery is not disabled.
if !config.disable_discovery {
@@ -324,21 +330,13 @@ fn spawn_service<T: BeaconChainTypes>(
// spawn on the current executor
executor.spawn(async move {
let mut metric_update_counter = 0;
loop {
// build the futures to check simultaneously
tokio::select! {
_ = service.metrics_update.tick(), if service.metrics_enabled => {
// update various network metrics
metric_update_counter +=1;
if metric_update_counter % T::EthSpec::default_spec().seconds_per_slot == 0 {
// if a slot has occurred, reset the metrics
let _ = metrics::ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT
.as_ref()
.map(|gauge| gauge.reset());
}
metrics::update_gossip_metrics::<T::EthSpec>(
service.libp2p.swarm.behaviour_mut().gs(),
service.libp2p.swarm.behaviour().gs(),
&service.network_globals,
);
// update sync metrics
@@ -445,7 +443,6 @@ fn spawn_service<T: BeaconChainTypes>(
"count" => messages.len(),
"topics" => ?topic_kinds
);
metrics::expose_publish_metrics(&messages);
service.libp2p.swarm.behaviour_mut().publish(messages);
}
NetworkMessage::ReportPeer { peer_id, action, source } => service.libp2p.report_peer(&peer_id, action, source),
@@ -643,9 +640,6 @@ fn spawn_service<T: BeaconChainTypes>(
message,
..
} => {
// Update prometheus metrics.
metrics::expose_receive_metrics(&message);
match message {
// attestation information gets processed in the attestation service
PubsubMessage::Attestation(ref subnet_and_attestation) => {

View File

@@ -67,9 +67,10 @@ mod tests {
// Create a new network service which implicitly gets dropped at the
// end of the block.
let _network_service = NetworkService::start(beacon_chain.clone(), &config, executor)
.await
.unwrap();
let _network_service =
NetworkService::start(beacon_chain.clone(), &config, executor, None)
.await
.unwrap();
drop(signal);
});