Gossipsub scoring (#1668)

## Issue Addressed

#1606 

## Proposed Changes

Uses dynamic gossipsub scoring parameters depending on the number of active validators as specified in https://gist.github.com/blacktemplar/5c1862cb3f0e32a1a7fb0b25e79e6e2c.

## Additional Info

Although the parameters got tested on Medalla, extensive testing using simulations on larger networks is still to be done and we expect that we need to change the parameters, although this might only affect constants within the dynamic parameter framework.
This commit is contained in:
blacktemplar
2020-11-12 01:48:28 +00:00
parent f0c9339153
commit 7404f1ce54
18 changed files with 1061 additions and 166 deletions

View File

@@ -36,13 +36,13 @@ lazy_static! {
&["subnet"]
);
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_avg_peer_score_per_topic",
"Average peer's score per topic",
&["topic_hash"]
);
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_avg_peer_score_per_subnet_topic",
"Average peer's score per subnet topic",
&["subnet"]
@@ -53,6 +53,60 @@ lazy_static! {
"Failed attestation publishes per subnet",
&["subnet"]
);
pub static ref SCORES_BELOW_ZERO_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_scores_below_zero_per_client",
"Relative number of scores below zero per client",
&["Client"]
);
pub static ref SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_scores_below_gossip_threshold_per_client",
"Relative number of scores below gossip threshold per client",
&["Client"]
);
pub static ref SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_scores_below_publish_threshold_per_client",
"Relative number of scores below publish threshold per client",
&["Client"]
);
pub static ref SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_scores_below_greylist_threshold_per_client",
"Relative number of scores below greylist threshold per client",
&["Client"]
);
pub static ref MIN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_min_scores_per_client",
"Minimum scores per client",
&["Client"]
);
pub static ref MEDIAN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_median_scores_per_client",
"Median scores per client",
&["Client"]
);
pub static ref MEAN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_mean_scores_per_client",
"Mean scores per client",
&["Client"]
);
pub static ref MAX_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
"gossipsub_max_scores_per_client",
"Max scores per client",
&["Client"]
);
pub static ref BEACON_BLOCK_MESH_PEERS_PER_CLIENT: Result<IntGaugeVec> =
try_create_int_gauge_vec(
"block_mesh_peers_per_client",
"Number of mesh peers for BeaconBlock topic per client",
&["Client"]
);
pub static ref BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT: Result<IntGaugeVec> =
try_create_int_gauge_vec(
"beacon_aggregate_and_proof_mesh_peers_per_client",
"Number of mesh peers for BeaconAggregateAndProof topic per client",
&["Client"]
);
}
lazy_static! {

View File

@@ -20,7 +20,7 @@ use std::{collections::HashMap, net::SocketAddr, sync::Arc, time::Duration};
use store::HotColdDB;
use tokio::sync::mpsc;
use tokio::time::Delay;
use types::{EthSpec, ValidatorSubscription};
use types::{EthSpec, RelativeEpoch, ValidatorSubscription};
mod tests;
@@ -112,6 +112,8 @@ pub struct NetworkService<T: BeaconChainTypes> {
next_fork_update: Option<Delay>,
/// A timer for updating various network metrics.
metrics_update: tokio::time::Interval,
/// gossipsub_parameter_update timer
gossipsub_parameter_update: tokio::time::Interval,
/// The logger for the network service.
log: slog::Logger,
}
@@ -153,8 +155,14 @@ impl<T: BeaconChainTypes> NetworkService<T> {
let next_fork_update = next_fork_delay(&beacon_chain);
// launch libp2p service
let (network_globals, mut libp2p) =
LibP2PService::new(executor.clone(), config, enr_fork_id, &network_log).await?;
let (network_globals, mut libp2p) = LibP2PService::new(
executor.clone(),
config,
enr_fork_id,
&network_log,
&beacon_chain.spec,
)
.await?;
// Repopulate the DHT with stored ENR's.
let enrs_to_load = load_dht::<T::EthSpec, T::HotStore, T::ColdStore>(store.clone());
@@ -183,6 +191,9 @@ impl<T: BeaconChainTypes> NetworkService<T> {
// create a timer for updating network metrics
let metrics_update = tokio::time::interval(Duration::from_secs(METRIC_UPDATE_INTERVAL));
// create a timer for updating gossipsub parameters
let gossipsub_parameter_update = tokio::time::interval(Duration::from_secs(60));
// create the network service and spawn the task
let network_log = network_log.new(o!("service" => "network"));
let network_service = NetworkService {
@@ -197,6 +208,7 @@ impl<T: BeaconChainTypes> NetworkService<T> {
discovery_auto_update: config.discv5_config.enr_update,
next_fork_update,
metrics_update,
gossipsub_parameter_update,
log: network_log,
};
@@ -256,7 +268,51 @@ fn spawn_service<T: BeaconChainTypes>(
.as_ref()
.map(|gauge| gauge.reset());
}
update_gossip_metrics::<T::EthSpec>(&service.libp2p.swarm.gs());
update_gossip_metrics::<T::EthSpec>(
&service.libp2p.swarm.gs(),
&service.network_globals,
&service.log
);
}
_ = service.gossipsub_parameter_update.next() => {
if let Ok(slot) = service.beacon_chain.slot() {
if let Some(active_validators) = service.beacon_chain.with_head(|head| {
Ok(
head
.beacon_state
.get_cached_active_validator_indices(RelativeEpoch::Current)
.map(|indices| indices.len())
.ok()
.or_else(|| {
// if active validator cached was not build we count the
// active validators
service
.beacon_chain
.epoch()
.ok()
.map(|current_epoch| {
head
.beacon_state
.validators
.iter()
.filter(|validator|
validator.is_active_at(current_epoch)
)
.count()
})
})
)
}).unwrap_or(None) {
if (*service.libp2p.swarm)
.update_gossipsub_parameters(active_validators, slot).is_err() {
error!(
service.log,
"Failed to update gossipsub parameters";
"active_validators" => active_validators
);
}
}
}
}
// handle a message sent to the network
Some(message) = service.network_recv.recv() => {
@@ -296,6 +352,7 @@ fn spawn_service<T: BeaconChainTypes>(
trace!(service.log, "Propagating gossipsub message";
"propagation_peer" => format!("{:?}", propagation_source),
"message_id" => message_id.to_string(),
"validation_result" => format!("{:?}", validation_result)
);
service
.libp2p
@@ -537,7 +594,11 @@ fn expose_receive_metrics<T: EthSpec>(message: &PubsubMessage<T>) {
}
}
fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {
fn update_gossip_metrics<T: EthSpec>(
gossipsub: &Gossipsub,
network_globals: &Arc<NetworkGlobals<T>>,
logger: &slog::Logger,
) {
// Clear the metrics
let _ = metrics::PEERS_PER_PROTOCOL
.as_ref()
@@ -555,6 +616,38 @@ fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::SCORES_BELOW_ZERO_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::MIN_SCORES_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::MEDIAN_SCORES_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::MEAN_SCORES_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::MAX_SCORES_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::BEACON_BLOCK_MESH_PEERS_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
// reset the mesh peers, showing all subnets
for subnet_id in 0..T::default_spec().attestation_subnet_count {
let _ = metrics::get_int_gauge(
@@ -607,22 +700,22 @@ fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {
// average peer scores
if let Some(score) = gossipsub.peer_score(peer_id) {
if let Some(v) = metrics::get_int_gauge(
if let Some(v) = metrics::get_gauge(
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
&[&subnet_id.to_string()],
) {
v.add(score as i64)
v.add(score)
};
}
}
kind => {
// main topics
if let Some(score) = gossipsub.peer_score(peer_id) {
if let Some(v) = metrics::get_int_gauge(
if let Some(v) = metrics::get_gauge(
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
&[&format!("{:?}", kind)],
) {
v.add(score as i64)
v.add(score)
};
}
}
@@ -636,20 +729,20 @@ fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
// average peer scores
if let Some(v) = metrics::get_int_gauge(
if let Some(v) = metrics::get_gauge(
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
&[&subnet_id.to_string()],
) {
v.set(v.get() / (*peers as i64))
v.set(v.get() / (*peers as f64))
};
}
kind => {
// main topics
if let Some(v) = metrics::get_int_gauge(
if let Some(v) = metrics::get_gauge(
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
&[&format!("{:?}", kind)],
) {
v.set(v.get() / (*peers as i64))
v.set(v.get() / (*peers as f64))
};
}
}
@@ -695,4 +788,132 @@ fn update_gossip_metrics<T: EthSpec>(gossipsub: &Gossipsub) {
v.set(*peers)
};
}
let mut peer_to_client = HashMap::new();
let mut scores_per_client: HashMap<String, Vec<f64>> = HashMap::new();
{
let peers = network_globals.peers.read();
for (peer_id, _) in gossipsub.all_peers() {
let client = peers
.peer_info(peer_id)
.map_or("Unknown".to_string(), |peer_info| {
peer_info.client.kind.to_string()
});
peer_to_client.insert(peer_id, client.clone());
let score = gossipsub.peer_score(peer_id).unwrap_or(0.0);
if (client == "Prysm" || client == "Lighthouse") && score < 0.0 {
trace!(logger, "Peer has negative score"; "peer" => format!("{:?}", peer_id),
"client" => &client, "score" => score);
}
scores_per_client.entry(client).or_default().push(score);
}
}
// mesh peers per client
for topic_hash in gossipsub.topics() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::BeaconBlock => {
for peer in gossipsub.mesh_peers(&topic_hash) {
if let Some(client) = peer_to_client.get(peer) {
if let Some(v) = metrics::get_int_gauge(
&metrics::BEACON_BLOCK_MESH_PEERS_PER_CLIENT,
&[client],
) {
v.inc()
};
}
}
}
GossipKind::BeaconAggregateAndProof => {
for peer in gossipsub.mesh_peers(&topic_hash) {
if let Some(client) = peer_to_client.get(peer) {
if let Some(v) = metrics::get_int_gauge(
&metrics::BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT,
&[client],
) {
v.inc()
};
}
}
}
_ => (),
}
}
}
for (client, scores) in scores_per_client.into_iter() {
let c = &[client.as_ref()];
let len = scores.len();
if len > 0 {
let mut below0 = 0;
let mut below_gossip_threshold = 0;
let mut below_publish_threshold = 0;
let mut below_greylist_threshold = 0;
let mut min = f64::INFINITY;
let mut sum = 0.0;
let mut max = f64::NEG_INFINITY;
let count = scores.len() as f64;
for &score in &scores {
if score < 0.0 {
below0 += 1;
}
if score < -4000.0 {
//TODO not hardcode
below_gossip_threshold += 1;
}
if score < -8000.0 {
//TODO not hardcode
below_publish_threshold += 1;
}
if score < -16000.0 {
//TODO not hardcode
below_greylist_threshold += 1;
}
if score < min {
min = score;
}
if score > max {
max = score;
}
sum += score;
}
let median = if len == 0 {
0.0
} else if len % 2 == 0 {
(scores[len / 2 - 1] + scores[len / 2]) / 2.0
} else {
scores[len / 2]
};
metrics::set_gauge_entry(
&metrics::SCORES_BELOW_ZERO_PER_CLIENT,
c,
below0 as f64 / count,
);
metrics::set_gauge_entry(
&metrics::SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT,
c,
below_gossip_threshold as f64 / count,
);
metrics::set_gauge_entry(
&metrics::SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT,
c,
below_publish_threshold as f64 / count,
);
metrics::set_gauge_entry(
&metrics::SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT,
c,
below_greylist_threshold as f64 / count,
);
metrics::set_gauge_entry(&metrics::MIN_SCORES_PER_CLIENT, c, min);
metrics::set_gauge_entry(&metrics::MEDIAN_SCORES_PER_CLIENT, c, median);
metrics::set_gauge_entry(&metrics::MEAN_SCORES_PER_CLIENT, c, sum / count);
metrics::set_gauge_entry(&metrics::MAX_SCORES_PER_CLIENT, c, max);
}
}
}