Gossipsub scoring (#1668)

## Issue Addressed #1606 ## Proposed Changes Uses dynamic gossipsub scoring parameters depending on the number of active validators as specified in https://gist.github.com/blacktemplar/5c1862cb3f0e32a1a7fb0b25e79e6e2c. ## Additional Info Although the parameters got tested on Medalla, extensive testing using simulations on larger networks is still to be done and we expect that we need to change the parameters, although this might only affect constants within the dynamic parameter framework.
2026-04-29 10:43:42 +00:00 · 2020-11-12 01:48:28 +00:00
parent f0c9339153
commit 7404f1ce54
18 changed files with 1061 additions and 166 deletions
--- a/beacon_node/network/src/metrics.rs
+++ b/beacon_node/network/src/metrics.rs
@@ -36,13 +36,13 @@ lazy_static! {
        &["subnet"]
    );

-    pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
+    pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
        "gossipsub_avg_peer_score_per_topic",
        "Average peer's score per topic",
        &["topic_hash"]
    );

-    pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
+    pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
        "gossipsub_avg_peer_score_per_subnet_topic",
        "Average peer's score per subnet topic",
        &["subnet"]
@@ -53,6 +53,60 @@ lazy_static! {
        "Failed attestation publishes per subnet",
        &["subnet"]
    );
+
+    pub static ref SCORES_BELOW_ZERO_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
+        "gossipsub_scores_below_zero_per_client",
+        "Relative number of scores below zero per client",
+        &["Client"]
+    );
+    pub static ref SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
+        "gossipsub_scores_below_gossip_threshold_per_client",
+        "Relative number of scores below gossip threshold per client",
+        &["Client"]
+    );
+    pub static ref SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
+        "gossipsub_scores_below_publish_threshold_per_client",
+        "Relative number of scores below publish threshold per client",
+        &["Client"]
+    );
+    pub static ref SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
+        "gossipsub_scores_below_greylist_threshold_per_client",
+        "Relative number of scores below greylist threshold per client",
+        &["Client"]
+    );
+
+    pub static ref MIN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
+        "gossipsub_min_scores_per_client",
+        "Minimum scores per client",
+        &["Client"]
+    );
+    pub static ref MEDIAN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
+        "gossipsub_median_scores_per_client",
+        "Median scores per client",
+        &["Client"]
+    );
+    pub static ref MEAN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
+        "gossipsub_mean_scores_per_client",
+        "Mean scores per client",
+        &["Client"]
+    );
+    pub static ref MAX_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
+        "gossipsub_max_scores_per_client",
+        "Max scores per client",
+        &["Client"]
+    );
+    pub static ref BEACON_BLOCK_MESH_PEERS_PER_CLIENT: Result<IntGaugeVec> =
+        try_create_int_gauge_vec(
+            "block_mesh_peers_per_client",
+            "Number of mesh peers for BeaconBlock topic per client",
+            &["Client"]
+        );
+    pub static ref BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT: Result<IntGaugeVec> =
+        try_create_int_gauge_vec(
+            "beacon_aggregate_and_proof_mesh_peers_per_client",
+            "Number of mesh peers for BeaconAggregateAndProof topic per client",
+            &["Client"]
+        );
 }

 lazy_static! {
--- a/beacon_node/network/src/service.rs
+++ b/beacon_node/network/src/service.rs
@@ -20,7 +20,7 @@ use std::{collections::HashMap, net::SocketAddr, sync::Arc, time::Duration};
 use store::HotColdDB;
 use tokio::sync::mpsc;
 use tokio::time::Delay;
-use types::{EthSpec, ValidatorSubscription};
+use types::{EthSpec, RelativeEpoch, ValidatorSubscription};

 mod tests;

@@ -112,6 +112,8 @@ pub struct NetworkService<T: BeaconChainTypes> {
    next_fork_update: Option<Delay>,
    /// A timer for updating various network metrics.
    metrics_update: tokio::time::Interval,
+    /// gossipsub_parameter_update timer
+    gossipsub_parameter_update: tokio::time::Interval,
    /// The logger for the network service.
    log: slog::Logger,
 }
@@ -153,8 +155,14 @@ impl<T: BeaconChainTypes> NetworkService<T> {
        let next_fork_update = next_fork_delay(&beacon_chain);

        // launch libp2p service
-        let (network_globals, mut libp2p) =
-            LibP2PService::new(executor.clone(), config, enr_fork_id, &network_log).await?;
+        let (network_globals, mut libp2p) = LibP2PService::new(
+            executor.clone(),
+            config,
+            enr_fork_id,
+            &network_log,
+            &beacon_chain.spec,
+        )
+        .await?;

        // Repopulate the DHT with stored ENR's.
        let enrs_to_load = load_dht::<T::EthSpec, T::HotStore, T::ColdStore>(store.clone());
@@ -183,6 +191,9 @@ impl<T: BeaconChainTypes> NetworkService<T> {
        // create a timer for updating network metrics
        let metrics_update = tokio::time::interval(Duration::from_secs(METRIC_UPDATE_INTERVAL));

+        // create a timer for updating gossipsub parameters
+        let gossipsub_parameter_update = tokio::time::interval(Duration::from_secs(60));
+
        // create the network service and spawn the task
        let network_log = network_log.new(o!("service" => "network"));
        let network_service = NetworkService {
@@ -197,6 +208,7 @@ impl<T: BeaconChainTypes> NetworkService<T> {
            discovery_auto_update: config.discv5_config.enr_update,
            next_fork_update,
            metrics_update,
+            gossipsub_parameter_update,
            log: network_log,
        };

@@ -256,7 +268,51 @@ fn spawn_service<T: BeaconChainTypes>(
                            .as_ref()
                            .map(|gauge| gauge.reset());
                    }
-                    update_gossip_metrics::<T::EthSpec>(&service.libp2p.swarm.gs());
+                    update_gossip_metrics::<T::EthSpec>(
+                        &service.libp2p.swarm.gs(),
+                        &service.network_globals,
+                        &service.log
+                    );
+                }
+                _ = service.gossipsub_parameter_update.next() => {
+                    if let Ok(slot) = service.beacon_chain.slot() {
+                        if let Some(active_validators) = service.beacon_chain.with_head(|head| {
+                                Ok(
+                                    head
+                                    .beacon_state
+                                    .get_cached_active_validator_indices(RelativeEpoch::Current)
+                                    .map(|indices| indices.len())
+                                    .ok()
+                                    .or_else(|| {
+                                        // if active validator cached was not build we count the
+                                        // active validators
+                                        service
+                                            .beacon_chain
+                                            .epoch()
+                                            .ok()
+                                            .map(|current_epoch| {
+                                                head
+                                                .beacon_state
+                                                .validators
+                                                .iter()
+                                                .filter(|validator|
+                                                    validator.is_active_at(current_epoch)
+                                                )
+                                                .count()
+                                            })
+                                    })
+                                )
+                            }).unwrap_or(None) {
+                            if (*service.libp2p.swarm)
+                                .update_gossipsub_parameters(active_validators, slot).is_err() {
+                                error!(
+                                    service.log,
+                                    "Failed to update gossipsub parameters";
+                                    "active_validators" => active_validators
+                                );
+                            }
+                        }
+                    }
                }
                // handle a message sent to the network
                Some(message) = service.network_recv.recv() => {
@@ -296,6 +352,7 @@ fn spawn_service<T: BeaconChainTypes>(
                                trace!(service.log, "Propagating gossipsub message";
                                    "propagation_peer" => format!("{:?}", propagation_source),
                                    "message_id" => message_id.to_string(),
+                                    "validation_result" => format!("{:?}", validation_result)
                                );
                                service
                                    .libp2p
@@ -537,7 +594,11 @@ fn expose_receive_metrics<T: EthSpec>(message: &PubsubMessage<T>) {
    }
 }

-fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {
+fn update_gossip_metrics<T: EthSpec>(
+    gossipsub: &Gossipsub,
+    network_globals: &Arc<NetworkGlobals<T>>,
+    logger: &slog::Logger,
+) {
    // Clear the metrics
    let _ = metrics::PEERS_PER_PROTOCOL
        .as_ref()
@@ -555,6 +616,38 @@ fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {
        .as_ref()
        .map(|gauge| gauge.reset());

+    let _ = metrics::SCORES_BELOW_ZERO_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+    let _ = metrics::SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+    let _ = metrics::SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+    let _ = metrics::SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+    let _ = metrics::MIN_SCORES_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+    let _ = metrics::MEDIAN_SCORES_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+    let _ = metrics::MEAN_SCORES_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+    let _ = metrics::MAX_SCORES_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+
+    let _ = metrics::BEACON_BLOCK_MESH_PEERS_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+    let _ = metrics::BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT
+        .as_ref()
+        .map(|gauge| gauge.reset());
+
    // reset the mesh peers, showing all subnets
    for subnet_id in 0..T::default_spec().attestation_subnet_count {
        let _ = metrics::get_int_gauge(
@@ -607,22 +700,22 @@ fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {

                        // average peer scores
                        if let Some(score) = gossipsub.peer_score(peer_id) {
-                            if let Some(v) = metrics::get_int_gauge(
+                            if let Some(v) = metrics::get_gauge(
                                &metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
                                &[&subnet_id.to_string()],
                            ) {
-                                v.add(score as i64)
+                                v.add(score)
                            };
                        }
                    }
                    kind => {
                        // main topics
                        if let Some(score) = gossipsub.peer_score(peer_id) {
-                            if let Some(v) = metrics::get_int_gauge(
+                            if let Some(v) = metrics::get_gauge(
                                &metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
                                &[&format!("{:?}", kind)],
                            ) {
-                                v.add(score as i64)
+                                v.add(score)
                            };
                        }
                    }
@@ -636,20 +729,20 @@ fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {
            match topic.kind() {
                GossipKind::Attestation(subnet_id) => {
                    // average peer scores
-                    if let Some(v) = metrics::get_int_gauge(
+                    if let Some(v) = metrics::get_gauge(
                        &metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
                        &[&subnet_id.to_string()],
                    ) {
-                        v.set(v.get() / (*peers as i64))
+                        v.set(v.get() / (*peers as f64))
                    };
                }
                kind => {
                    // main topics
-                    if let Some(v) = metrics::get_int_gauge(
+                    if let Some(v) = metrics::get_gauge(
                        &metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
                        &[&format!("{:?}", kind)],
                    ) {
-                        v.set(v.get() / (*peers as i64))
+                        v.set(v.get() / (*peers as f64))
                    };
                }
            }
@@ -695,4 +788,132 @@ fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {
            v.set(*peers)
        };
    }
+
+    let mut peer_to_client = HashMap::new();
+    let mut scores_per_client: HashMap<String, Vec<f64>> = HashMap::new();
+    {
+        let peers = network_globals.peers.read();
+        for (peer_id, _) in gossipsub.all_peers() {
+            let client = peers
+                .peer_info(peer_id)
+                .map_or("Unknown".to_string(), |peer_info| {
+                    peer_info.client.kind.to_string()
+                });
+            peer_to_client.insert(peer_id, client.clone());
+            let score = gossipsub.peer_score(peer_id).unwrap_or(0.0);
+            if (client == "Prysm" || client == "Lighthouse") && score < 0.0 {
+                trace!(logger, "Peer has negative score"; "peer" => format!("{:?}", peer_id),
+                       "client" => &client, "score" => score);
+            }
+            scores_per_client.entry(client).or_default().push(score);
+        }
+    }
+
+    // mesh peers per client
+    for topic_hash in gossipsub.topics() {
+        if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
+            match topic.kind() {
+                GossipKind::BeaconBlock => {
+                    for peer in gossipsub.mesh_peers(&topic_hash) {
+                        if let Some(client) = peer_to_client.get(peer) {
+                            if let Some(v) = metrics::get_int_gauge(
+                                &metrics::BEACON_BLOCK_MESH_PEERS_PER_CLIENT,
+                                &[client],
+                            ) {
+                                v.inc()
+                            };
+                        }
+                    }
+                }
+                GossipKind::BeaconAggregateAndProof => {
+                    for peer in gossipsub.mesh_peers(&topic_hash) {
+                        if let Some(client) = peer_to_client.get(peer) {
+                            if let Some(v) = metrics::get_int_gauge(
+                                &metrics::BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT,
+                                &[client],
+                            ) {
+                                v.inc()
+                            };
+                        }
+                    }
+                }
+                _ => (),
+            }
+        }
+    }
+
+    for (client, scores) in scores_per_client.into_iter() {
+        let c = &[client.as_ref()];
+        let len = scores.len();
+        if len > 0 {
+            let mut below0 = 0;
+            let mut below_gossip_threshold = 0;
+            let mut below_publish_threshold = 0;
+            let mut below_greylist_threshold = 0;
+            let mut min = f64::INFINITY;
+            let mut sum = 0.0;
+            let mut max = f64::NEG_INFINITY;
+
+            let count = scores.len() as f64;
+
+            for &score in &scores {
+                if score < 0.0 {
+                    below0 += 1;
+                }
+                if score < -4000.0 {
+                    //TODO not hardcode
+                    below_gossip_threshold += 1;
+                }
+                if score < -8000.0 {
+                    //TODO not hardcode
+                    below_publish_threshold += 1;
+                }
+                if score < -16000.0 {
+                    //TODO not hardcode
+                    below_greylist_threshold += 1;
+                }
+                if score < min {
+                    min = score;
+                }
+                if score > max {
+                    max = score;
+                }
+                sum += score;
+            }
+
+            let median = if len == 0 {
+                0.0
+            } else if len % 2 == 0 {
+                (scores[len / 2 - 1] + scores[len / 2]) / 2.0
+            } else {
+                scores[len / 2]
+            };
+
+            metrics::set_gauge_entry(
+                &metrics::SCORES_BELOW_ZERO_PER_CLIENT,
+                c,
+                below0 as f64 / count,
+            );
+            metrics::set_gauge_entry(
+                &metrics::SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT,
+                c,
+                below_gossip_threshold as f64 / count,
+            );
+            metrics::set_gauge_entry(
+                &metrics::SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT,
+                c,
+                below_publish_threshold as f64 / count,
+            );
+            metrics::set_gauge_entry(
+                &metrics::SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT,
+                c,
+                below_greylist_threshold as f64 / count,
+            );
+
+            metrics::set_gauge_entry(&metrics::MIN_SCORES_PER_CLIENT, c, min);
+            metrics::set_gauge_entry(&metrics::MEDIAN_SCORES_PER_CLIENT, c, median);
+            metrics::set_gauge_entry(&metrics::MEAN_SCORES_PER_CLIENT, c, sum / count);
+            metrics::set_gauge_entry(&metrics::MAX_SCORES_PER_CLIENT, c, max);
+        }
+    }
 }