Upgrade to tokio 0.3 (#1839)

## Description

This PR updates Lighthouse to tokio 0.3. It includes a number of dependency updates and some structural changes as to how we create and spawn tasks.

This also brings with it a number of various improvements:

- Discv5 update
- Libp2p update
- Fix for recompilation issues
- Improved UPnP port mapping handling
- Futures dependency update
- Log downgrade to traces for rejecting peers when we've reached our max



Co-authored-by: blacktemplar <blacktemplar@a1.net>
This commit is contained in:
Age Manning
2020-11-28 05:30:57 +00:00
parent 5a3b94cbb4
commit a567f788bd
81 changed files with 3666 additions and 2762 deletions

View File

@@ -11,6 +11,8 @@ lazy_static = "1.4.0"
matches = "0.1.8"
tempfile = "3.1.0"
exit-future = "0.2.0"
slog-term = "2.6.0"
slog-async = "2.5.0"
[dependencies]
beacon_chain = { path = "../beacon_chain" }
@@ -25,9 +27,9 @@ hex = "0.4.2"
eth2_ssz = "0.1.2"
eth2_ssz_types = { path = "../../consensus/ssz_types" }
tree_hash = "0.1.1"
futures = "0.3.5"
futures = "0.3.7"
error-chain = "0.12.4"
tokio = { version = "0.2.22", features = ["full"] }
tokio = { version = "0.3.2", features = ["full"] }
parking_lot = "0.11.0"
smallvec = "1.4.2"
rand = "0.7.3"

View File

@@ -156,7 +156,7 @@ mod tests {
tokio::select! {
_ = collect_stream_fut => {return events}
_ = tokio::time::delay_for(
_ = tokio::time::sleep(
Duration::from_millis(SLOT_DURATION_MILLIS) * num_slots_before_timeout,
) => { return events; }
}

View File

@@ -233,6 +233,8 @@ impl<T: BeaconChainTypes> Worker<T> {
| Err(e @ BlockError::BeaconChainError(_)) => {
debug!(self.log, "Could not verify block for gossip, ignoring the block";
"error" => e.to_string());
// Prevent recurring behaviour by penalizing the peer slightly.
self.penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
return;
}
@@ -511,6 +513,12 @@ impl<T: BeaconChainTypes> Worker<T> {
"block" => %beacon_block_root,
"type" => ?attestation_type,
);
// Peers that are slow or not to spec can spam us with these messages draining our
// bandwidth. We therefore penalize these peers when they do this.
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
// Do not propagate these messages.
self.propagate_validation_result(
message_id,
peer_id.clone(),
@@ -618,7 +626,12 @@ impl<T: BeaconChainTypes> Worker<T> {
"block" => %beacon_block_root,
"type" => ?attestation_type,
);
// We still penalize the peer slightly. We don't want this to be a recurring
// behaviour.
self.penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
return;
}
AttnError::PriorAttestationKnown { .. } => {
@@ -634,7 +647,12 @@ impl<T: BeaconChainTypes> Worker<T> {
"block" => %beacon_block_root,
"type" => ?attestation_type,
);
// We still penalize the peer slightly. We don't want this to be a recurring
// behaviour.
self.penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
return;
}
AttnError::ValidatorIndexTooHigh(_) => {
@@ -677,6 +695,10 @@ impl<T: BeaconChainTypes> Worker<T> {
"msg" => "UnknownBlockHash"
)
});
// We still penalize the peer slightly. We don't want this to be a recurring
// behaviour.
self.penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
return;
}

View File

@@ -3,15 +3,18 @@ extern crate lazy_static;
/// This crate provides the network server for Lighthouse.
pub mod error;
#[allow(clippy::mutable_key_type)] // PeerId in hashmaps are no longer permitted by clippy
pub mod service;
mod attestation_service;
mod beacon_processor;
#[allow(clippy::mutable_key_type)] // PeerId in hashmaps are no longer permitted by clippy
mod metrics;
mod nat;
mod persisted_dht;
mod router;
mod status;
#[allow(clippy::mutable_key_type)] // PeerId in hashmaps are no longer permitted by clippy
mod sync;
pub use eth2_libp2p::NetworkConfig;

View File

@@ -1,5 +1,12 @@
use beacon_chain::attestation_verification::Error as AttnError;
use eth2_libp2p::PubsubMessage;
use eth2_libp2p::{
types::GossipKind, BandwidthSinks, GossipTopic, Gossipsub, NetworkGlobals, TopicHash,
};
use fnv::FnvHashMap;
pub use lighthouse_metrics::*;
use std::{collections::HashMap, sync::Arc};
use types::{subnet_id::subnet_id_to_string, EthSpec};
lazy_static! {
@@ -404,6 +411,27 @@ lazy_static! {
"gossipsub_attestation_error_beacon_chain_error",
"Count of a specific error type (see metric name)"
);
pub static ref INBOUND_LIBP2P_BYTES: Result<IntGauge> =
try_create_int_gauge("libp2p_inbound_bytes", "The inbound bandwidth over libp2p");
pub static ref OUTBOUND_LIBP2P_BYTES: Result<IntGauge> = try_create_int_gauge(
"libp2p_outbound_bytes",
"The outbound bandwidth over libp2p"
);
pub static ref TOTAL_LIBP2P_BANDWIDTH: Result<IntGauge> = try_create_int_gauge(
"libp2p_total_bandwidth",
"The total inbound/outbound bandwidth over libp2p"
);
}
pub fn update_bandwidth_metrics(bandwidth: Arc<BandwidthSinks>) {
set_gauge(&INBOUND_LIBP2P_BYTES, bandwidth.total_inbound() as i64);
set_gauge(&OUTBOUND_LIBP2P_BYTES, bandwidth.total_outbound() as i64);
set_gauge(
&TOTAL_LIBP2P_BANDWIDTH,
(bandwidth.total_inbound() + bandwidth.total_outbound()) as i64,
);
}
lazy_static! {
@@ -486,3 +514,359 @@ pub fn register_attestation_error(error: &AttnError) {
AttnError::BeaconChainError(_) => inc_counter(&GOSSIP_ATTESTATION_ERROR_BEACON_CHAIN_ERROR),
}
}
/// Inspects the `messages` that were being sent to the network and updates Prometheus metrics.
pub fn expose_publish_metrics<T: EthSpec>(messages: &[PubsubMessage<T>]) {
for message in messages {
match message {
PubsubMessage::BeaconBlock(_) => inc_counter(&GOSSIP_BLOCKS_TX),
PubsubMessage::Attestation(subnet_id) => {
inc_counter_vec(
&ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT,
&[&subnet_id.0.as_ref()],
);
inc_counter(&GOSSIP_UNAGGREGATED_ATTESTATIONS_TX)
}
PubsubMessage::AggregateAndProofAttestation(_) => {
inc_counter(&GOSSIP_AGGREGATED_ATTESTATIONS_TX)
}
_ => {}
}
}
}
/// Inspects a `message` received from the network and updates Prometheus metrics.
pub fn expose_receive_metrics<T: EthSpec>(message: &PubsubMessage<T>) {
match message {
PubsubMessage::BeaconBlock(_) => inc_counter(&GOSSIP_BLOCKS_RX),
PubsubMessage::Attestation(_) => inc_counter(&GOSSIP_UNAGGREGATED_ATTESTATIONS_RX),
PubsubMessage::AggregateAndProofAttestation(_) => {
inc_counter(&GOSSIP_AGGREGATED_ATTESTATIONS_RX)
}
_ => {}
}
}
pub fn update_gossip_metrics<T: EthSpec>(
gossipsub: &Gossipsub,
network_globals: &Arc<NetworkGlobals<T>>,
) {
// Clear the metrics
let _ = PEERS_PER_PROTOCOL.as_ref().map(|gauge| gauge.reset());
let _ = PEERS_PER_PROTOCOL.as_ref().map(|gauge| gauge.reset());
let _ = MESH_PEERS_PER_MAIN_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = SCORES_BELOW_ZERO_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = MIN_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
let _ = MEDIAN_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
let _ = MEAN_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
let _ = MAX_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
let _ = BEACON_BLOCK_MESH_PEERS_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
// reset the mesh peers, showing all subnets
for subnet_id in 0..T::default_spec().attestation_subnet_count {
let _ = get_int_gauge(
&MESH_PEERS_PER_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
let _ = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
let _ = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_PEERS_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
}
// Subnet topics subscribed to
for topic_hash in gossipsub.topics() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
if let GossipKind::Attestation(subnet_id) = topic.kind() {
let _ = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
)
.map(|v| v.set(1));
}
}
}
// Peers per subscribed subnet
let mut peers_per_topic: HashMap<TopicHash, usize> = HashMap::new();
for (peer_id, topics) in gossipsub.all_peers() {
for topic_hash in topics {
*peers_per_topic.entry(topic_hash.clone()).or_default() += 1;
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
if let Some(v) = get_int_gauge(
&GOSSIPSUB_SUBSCRIBED_PEERS_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.inc()
};
// average peer scores
if let Some(score) = gossipsub.peer_score(peer_id) {
if let Some(v) = get_gauge(
&AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.add(score)
};
}
}
kind => {
// main topics
if let Some(score) = gossipsub.peer_score(peer_id) {
if let Some(v) = get_gauge(
&AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
&[kind.as_ref()],
) {
v.add(score)
};
}
}
}
}
}
}
// adjust to average scores by dividing by number of peers
for (topic_hash, peers) in peers_per_topic.iter() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
// average peer scores
if let Some(v) = get_gauge(
&AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.set(v.get() / (*peers as f64))
};
}
kind => {
// main topics
if let Some(v) =
get_gauge(&AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC, &[kind.as_ref()])
{
v.set(v.get() / (*peers as f64))
};
}
}
}
}
// mesh peers
for topic_hash in gossipsub.topics() {
let peers = gossipsub.mesh_peers(&topic_hash).count();
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
if let Some(v) = get_int_gauge(
&MESH_PEERS_PER_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.set(peers as i64)
};
}
kind => {
// main topics
if let Some(v) = get_int_gauge(&MESH_PEERS_PER_MAIN_TOPIC, &[kind.as_ref()]) {
v.set(peers as i64)
};
}
}
}
}
// protocol peers
let mut peers_per_protocol: HashMap<&'static str, i64> = HashMap::new();
for (_peer, protocol) in gossipsub.peer_protocol() {
*peers_per_protocol
.entry(protocol.as_static_ref())
.or_default() += 1;
}
for (protocol, peers) in peers_per_protocol.iter() {
if let Some(v) = get_int_gauge(&PEERS_PER_PROTOCOL, &[protocol]) {
v.set(*peers)
};
}
let mut peer_to_client = HashMap::new();
let mut scores_per_client: HashMap<&'static str, Vec<f64>> = HashMap::new();
{
let peers = network_globals.peers.read();
for (peer_id, _) in gossipsub.all_peers() {
let client = peers
.peer_info(peer_id)
.map(|peer_info| peer_info.client.kind.as_static_ref())
.unwrap_or_else(|| "Unknown");
peer_to_client.insert(peer_id, client);
let score = gossipsub.peer_score(peer_id).unwrap_or(0.0);
scores_per_client.entry(client).or_default().push(score);
}
}
// mesh peers per client
for topic_hash in gossipsub.topics() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::BeaconBlock => {
for peer in gossipsub.mesh_peers(&topic_hash) {
if let Some(client) = peer_to_client.get(peer) {
if let Some(v) =
get_int_gauge(&BEACON_BLOCK_MESH_PEERS_PER_CLIENT, &[client])
{
v.inc()
};
}
}
}
GossipKind::BeaconAggregateAndProof => {
for peer in gossipsub.mesh_peers(&topic_hash) {
if let Some(client) = peer_to_client.get(peer) {
if let Some(v) = get_int_gauge(
&BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT,
&[client],
) {
v.inc()
};
}
}
}
_ => (),
}
}
}
for (client, scores) in scores_per_client.into_iter() {
let c = &[client];
let len = scores.len();
if len > 0 {
let mut below0 = 0;
let mut below_gossip_threshold = 0;
let mut below_publish_threshold = 0;
let mut below_greylist_threshold = 0;
let mut min = f64::INFINITY;
let mut sum = 0.0;
let mut max = f64::NEG_INFINITY;
let count = scores.len() as f64;
for &score in &scores {
if score < 0.0 {
below0 += 1;
}
if score < -4000.0 {
//TODO not hardcode
below_gossip_threshold += 1;
}
if score < -8000.0 {
//TODO not hardcode
below_publish_threshold += 1;
}
if score < -16000.0 {
//TODO not hardcode
below_greylist_threshold += 1;
}
if score < min {
min = score;
}
if score > max {
max = score;
}
sum += score;
}
let median = if len == 0 {
0.0
} else if len % 2 == 0 {
(scores[len / 2 - 1] + scores[len / 2]) / 2.0
} else {
scores[len / 2]
};
set_gauge_entry(&SCORES_BELOW_ZERO_PER_CLIENT, c, below0 as f64 / count);
set_gauge_entry(
&SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT,
c,
below_gossip_threshold as f64 / count,
);
set_gauge_entry(
&SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT,
c,
below_publish_threshold as f64 / count,
);
set_gauge_entry(
&SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT,
c,
below_greylist_threshold as f64 / count,
);
set_gauge_entry(&MIN_SCORES_PER_CLIENT, c, min);
set_gauge_entry(&MEDIAN_SCORES_PER_CLIENT, c, median);
set_gauge_entry(&MEAN_SCORES_PER_CLIENT, c, sum / count);
set_gauge_entry(&MAX_SCORES_PER_CLIENT, c, max);
}
}
}
pub fn update_sync_metrics<T: EthSpec>(network_globals: &Arc<NetworkGlobals<T>>) {
// reset the counts
if PEERS_PER_SYNC_TYPE
.as_ref()
.map(|metric| metric.reset())
.is_err()
{
return;
};
// count per sync status, the number of connected peers
let mut peers_per_sync_type = FnvHashMap::default();
for sync_type in network_globals
.peers
.read()
.connected_peers()
.map(|(_peer_id, info)| info.sync_status.as_str())
{
*peers_per_sync_type.entry(sync_type).or_default() += 1;
}
for (sync_type, peer_count) in peers_per_sync_type {
set_gauge_entry(&PEERS_PER_SYNC_TYPE, &[sync_type], peer_count);
}
}

View File

@@ -5,7 +5,7 @@
use crate::{NetworkConfig, NetworkMessage};
use if_addrs::get_if_addrs;
use slog::{debug, info, warn};
use slog::{debug, info};
use std::net::{IpAddr, SocketAddr, SocketAddrV4};
use tokio::sync::mpsc;
use types::EthSpec;
@@ -70,6 +70,8 @@ pub fn construct_upnp_mappings<T: EthSpec>(
Some(v) => v,
};
debug!(log, "UPnP Local IP Discovered"; "ip" => ?local_ip);
match local_ip {
IpAddr::V4(address) => {
let libp2p_socket = SocketAddrV4::new(address, config.tcp_port);
@@ -78,53 +80,39 @@ pub fn construct_upnp_mappings<T: EthSpec>(
// one.
// I've found this to be more reliable. If multiple users are behind a single
// router, they should ideally try to set different port numbers.
let tcp_socket = match gateway.add_port(
let tcp_socket = add_port_mapping(
&gateway,
igd::PortMappingProtocol::TCP,
libp2p_socket.port(),
libp2p_socket,
0,
"lighthouse-tcp",
) {
Err(e) => {
info!(log, "UPnP TCP route not set"; "error" => %e);
None
}
Ok(_) => {
info!(log, "UPnP TCP route established"; "external_socket" => format!("{}:{}", external_ip.as_ref().map(|ip| ip.to_string()).unwrap_or_else(|_| "".into()), config.tcp_port));
external_ip
.as_ref()
.map(|ip| SocketAddr::new(ip.clone().into(), config.tcp_port))
.ok()
}
};
"tcp",
&log,
).and_then(|_| {
let external_socket = external_ip.as_ref().map(|ip| SocketAddr::new(ip.clone().into(), config.tcp_port)).map_err(|_| ());
info!(log, "UPnP TCP route established"; "external_socket" => format!("{}:{}", external_socket.as_ref().map(|ip| ip.to_string()).unwrap_or_else(|_| "".into()), config.tcp_port));
external_socket
}).ok();
let udp_socket = if !config.disable_discovery {
let discovery_socket = SocketAddrV4::new(address, config.udp_port);
match gateway.add_port(
add_port_mapping(
&gateway,
igd::PortMappingProtocol::UDP,
discovery_socket.port(),
discovery_socket,
0,
"lighthouse-udp",
) {
Err(e) => {
info!(log, "UPnP UDP route not set"; "error" => %e);
None
}
Ok(_) => {
info!(log, "UPnP UDP route established"; "external_socket" => format!("{}:{}", external_ip.as_ref().map(|ip| ip.to_string()).unwrap_or_else(|_| "".into()), config.tcp_port));
external_ip
.map(|ip| SocketAddr::new(ip.into(), config.tcp_port))
.ok()
}
}
"udp",
&log,
).and_then(|_| {
let external_socket = external_ip
.map(|ip| SocketAddr::new(ip.into(), config.udp_port)).map_err(|_| ());
info!(log, "UPnP UDP route established"; "external_socket" => format!("{}:{}", external_socket.as_ref().map(|ip| ip.to_string()).unwrap_or_else(|_| "".into()), config.udp_port));
external_socket
}).ok()
} else {
None
};
// report any updates to the network service.
network_send.send(NetworkMessage::UPnPMappingEstablished{ tcp_socket, udp_socket })
.unwrap_or_else(|e| warn!(log, "Could not send message to the network service"; "error" => %e));
.unwrap_or_else(|e| debug!(log, "Could not send message to the network service"; "error" => %e));
}
_ => debug!(log, "UPnP no routes constructed. IPv6 not supported"),
}
@@ -132,6 +120,50 @@ pub fn construct_upnp_mappings<T: EthSpec>(
};
}
/// Sets up a port mapping for a protocol returning the mapped port if successful.
fn add_port_mapping(
gateway: &igd::Gateway,
protocol: igd::PortMappingProtocol,
socket: SocketAddrV4,
protocol_string: &'static str,
log: &slog::Logger,
) -> Result<(), ()> {
// We add specific port mappings rather than getting the router to arbitrary assign
// one.
// I've found this to be more reliable. If multiple users are behind a single
// router, they should ideally try to set different port numbers.
let mapping_string = &format!("lighthouse-{}", protocol_string);
for _ in 0..2 {
match gateway.add_port(protocol, socket.port(), socket, 0, mapping_string) {
Err(e) => {
match e {
igd::AddPortError::PortInUse => {
// Try and remove and re-create
debug!(log, "UPnP port in use, attempting to remap"; "protocol" => protocol_string, "port" => socket.port());
match gateway.remove_port(protocol, socket.port()) {
Ok(()) => {
debug!(log, "UPnP Removed port mapping"; "protocol" => protocol_string, "port" => socket.port())
}
Err(e) => {
debug!(log, "UPnP Port remove failure"; "protocol" => protocol_string, "port" => socket.port(), "error" => %e);
return Err(());
}
}
}
e => {
info!(log, "UPnP TCP route not set"; "error" => %e);
return Err(());
}
}
}
Ok(_) => {
return Ok(());
}
}
}
Err(())
}
/// Removes the specified TCP and UDP port mappings.
pub fn remove_mappings(tcp_port: Option<u16>, udp_port: Option<u16>, log: &slog::Logger) {
if tcp_port.is_some() || udp_port.is_some() {

View File

@@ -8,20 +8,16 @@ use crate::{error, metrics};
use beacon_chain::{BeaconChain, BeaconChainError, BeaconChainTypes};
use eth2_libp2p::{
rpc::{GoodbyeReason, RPCResponseErrorCode, RequestId},
Gossipsub, Libp2pEvent, PeerAction, PeerRequestId, PubsubMessage, Request, Response,
};
use eth2_libp2p::{
types::GossipKind, BehaviourEvent, GossipTopic, MessageId, NetworkGlobals, PeerId, TopicHash,
Libp2pEvent, PeerAction, PeerRequestId, PubsubMessage, Request, Response,
};
use eth2_libp2p::{types::GossipKind, BehaviourEvent, MessageId, NetworkGlobals, PeerId};
use eth2_libp2p::{MessageAcceptance, Service as LibP2PService};
use fnv::FnvHashMap;
use futures::prelude::*;
use slog::{debug, error, info, o, trace, warn};
use std::{collections::HashMap, net::SocketAddr, sync::Arc, time::Duration};
use std::{net::SocketAddr, sync::Arc, time::Duration};
use store::HotColdDB;
use tokio::sync::mpsc;
use tokio::time::Delay;
use types::subnet_id::subnet_id_to_string;
use tokio::time::Sleep;
use types::{EthSpec, RelativeEpoch, SubnetId, Unsigned, ValidatorSubscription};
mod tests;
@@ -111,7 +107,7 @@ pub struct NetworkService<T: BeaconChainTypes> {
/// update the UDP socket of discovery if the UPnP mappings get established.
discovery_auto_update: bool,
/// A delay that expires when a new fork takes place.
next_fork_update: Option<Delay>,
next_fork_update: Option<Sleep>,
/// Subscribe to all the subnets once synced.
subscribe_all_subnets: bool,
/// A timer for updating various network metrics.
@@ -274,12 +270,12 @@ fn spawn_service<T: BeaconChainTypes>(
.as_ref()
.map(|gauge| gauge.reset());
}
update_gossip_metrics::<T::EthSpec>(
metrics::update_gossip_metrics::<T::EthSpec>(
&service.libp2p.swarm.gs(),
&service.network_globals,
);
// update sync metrics
update_sync_metrics(&service.network_globals);
metrics::update_sync_metrics(&service.network_globals);
}
_ = service.gossipsub_parameter_update.next() => {
@@ -382,7 +378,7 @@ fn spawn_service<T: BeaconChainTypes>(
"count" => messages.len(),
"topics" => format!("{:?}", topic_kinds)
);
expose_publish_metrics(&messages);
metrics::expose_publish_metrics(&messages);
service.libp2p.swarm.publish(messages);
}
NetworkMessage::ReportPeer { peer_id, action } => service.libp2p.report_peer(&peer_id, action),
@@ -512,7 +508,7 @@ fn spawn_service<T: BeaconChainTypes>(
..
} => {
// Update prometheus metrics.
expose_receive_metrics(&message);
metrics::expose_receive_metrics(&message);
match message {
// attestation information gets processed in the attestation service
PubsubMessage::Attestation(ref subnet_and_attestation) => {
@@ -566,399 +562,22 @@ fn spawn_service<T: BeaconChainTypes>(
service.next_fork_update = next_fork_delay(&service.beacon_chain);
}
}
metrics::update_bandwidth_metrics(service.libp2p.bandwidth.clone());
}
}, "network");
Ok(())
}
/// Returns a `Delay` that triggers shortly after the next change in the beacon chain fork version.
/// Returns a `Sleep` that triggers shortly after the next change in the beacon chain fork version.
/// If there is no scheduled fork, `None` is returned.
fn next_fork_delay<T: BeaconChainTypes>(
beacon_chain: &BeaconChain<T>,
) -> Option<tokio::time::Delay> {
) -> Option<tokio::time::Sleep> {
beacon_chain.duration_to_next_fork().map(|until_fork| {
// Add a short time-out to start within the new fork period.
let delay = Duration::from_millis(200);
tokio::time::delay_until(tokio::time::Instant::now() + until_fork + delay)
tokio::time::sleep_until(tokio::time::Instant::now() + until_fork + delay)
})
}
/// Inspects the `messages` that were being sent to the network and updates Prometheus metrics.
fn expose_publish_metrics<T: EthSpec>(messages: &[PubsubMessage<T>]) {
for message in messages {
match message {
PubsubMessage::BeaconBlock(_) => metrics::inc_counter(&metrics::GOSSIP_BLOCKS_TX),
PubsubMessage::Attestation(subnet_id) => {
metrics::inc_counter_vec(
&metrics::ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT,
&[&subnet_id.0.as_ref()],
);
metrics::inc_counter(&metrics::GOSSIP_UNAGGREGATED_ATTESTATIONS_TX)
}
PubsubMessage::AggregateAndProofAttestation(_) => {
metrics::inc_counter(&metrics::GOSSIP_AGGREGATED_ATTESTATIONS_TX)
}
_ => {}
}
}
}
/// Inspects a `message` received from the network and updates Prometheus metrics.
fn expose_receive_metrics<T: EthSpec>(message: &PubsubMessage<T>) {
match message {
PubsubMessage::BeaconBlock(_) => metrics::inc_counter(&metrics::GOSSIP_BLOCKS_RX),
PubsubMessage::Attestation(_) => {
metrics::inc_counter(&metrics::GOSSIP_UNAGGREGATED_ATTESTATIONS_RX)
}
PubsubMessage::AggregateAndProofAttestation(_) => {
metrics::inc_counter(&metrics::GOSSIP_AGGREGATED_ATTESTATIONS_RX)
}
_ => {}
}
}
fn update_gossip_metrics<T: EthSpec>(
gossipsub: &Gossipsub,
network_globals: &Arc<NetworkGlobals<T>>,
) {
// Clear the metrics
let _ = metrics::PEERS_PER_PROTOCOL
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::PEERS_PER_PROTOCOL
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::MESH_PEERS_PER_MAIN_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::SCORES_BELOW_ZERO_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::MIN_SCORES_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::MEDIAN_SCORES_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::MEAN_SCORES_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::MAX_SCORES_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::BEACON_BLOCK_MESH_PEERS_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
let _ = metrics::BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT
.as_ref()
.map(|gauge| gauge.reset());
// reset the mesh peers, showing all subnets
for subnet_id in 0..T::default_spec().attestation_subnet_count {
let _ = metrics::get_int_gauge(
&metrics::MESH_PEERS_PER_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
let _ = metrics::get_int_gauge(
&metrics::GOSSIPSUB_SUBSCRIBED_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
let _ = metrics::get_int_gauge(
&metrics::GOSSIPSUB_SUBSCRIBED_PEERS_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id)],
)
.map(|v| v.set(0));
}
// Subnet topics subscribed to
for topic_hash in gossipsub.topics() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
if let GossipKind::Attestation(subnet_id) = topic.kind() {
let _ = metrics::get_int_gauge(
&metrics::GOSSIPSUB_SUBSCRIBED_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
)
.map(|v| v.set(1));
}
}
}
// Peers per subscribed subnet
let mut peers_per_topic: HashMap<TopicHash, usize> = HashMap::new();
for (peer_id, topics) in gossipsub.all_peers() {
for topic_hash in topics {
*peers_per_topic.entry(topic_hash.clone()).or_default() += 1;
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
if let Some(v) = metrics::get_int_gauge(
&metrics::GOSSIPSUB_SUBSCRIBED_PEERS_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.inc()
};
// average peer scores
if let Some(score) = gossipsub.peer_score(peer_id) {
if let Some(v) = metrics::get_gauge(
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.add(score)
};
}
}
kind => {
// main topics
if let Some(score) = gossipsub.peer_score(peer_id) {
if let Some(v) = metrics::get_gauge(
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
&[kind.as_ref()],
) {
v.add(score)
};
}
}
}
}
}
}
// adjust to average scores by dividing by number of peers
for (topic_hash, peers) in peers_per_topic.iter() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
// average peer scores
if let Some(v) = metrics::get_gauge(
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.set(v.get() / (*peers as f64))
};
}
kind => {
// main topics
if let Some(v) = metrics::get_gauge(
&metrics::AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
&[kind.as_ref()],
) {
v.set(v.get() / (*peers as f64))
};
}
}
}
}
// mesh peers
for topic_hash in gossipsub.topics() {
let peers = gossipsub.mesh_peers(&topic_hash).count();
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::Attestation(subnet_id) => {
if let Some(v) = metrics::get_int_gauge(
&metrics::MESH_PEERS_PER_SUBNET_TOPIC,
&[subnet_id_to_string(subnet_id.into())],
) {
v.set(peers as i64)
};
}
kind => {
// main topics
if let Some(v) = metrics::get_int_gauge(
&metrics::MESH_PEERS_PER_MAIN_TOPIC,
&[kind.as_ref()],
) {
v.set(peers as i64)
};
}
}
}
}
// protocol peers
let mut peers_per_protocol: HashMap<&'static str, i64> = HashMap::new();
for (_peer, protocol) in gossipsub.peer_protocol() {
*peers_per_protocol
.entry(protocol.as_static_ref())
.or_default() += 1;
}
for (protocol, peers) in peers_per_protocol.iter() {
if let Some(v) = metrics::get_int_gauge(&metrics::PEERS_PER_PROTOCOL, &[protocol]) {
v.set(*peers)
};
}
let mut peer_to_client = HashMap::new();
let mut scores_per_client: HashMap<&'static str, Vec<f64>> = HashMap::new();
{
let peers = network_globals.peers.read();
for (peer_id, _) in gossipsub.all_peers() {
let client = peers
.peer_info(peer_id)
.map(|peer_info| peer_info.client.kind.as_static_ref())
.unwrap_or_else(|| "Unknown");
peer_to_client.insert(peer_id, client);
let score = gossipsub.peer_score(peer_id).unwrap_or(0.0);
scores_per_client.entry(client).or_default().push(score);
}
}
// mesh peers per client
for topic_hash in gossipsub.topics() {
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
match topic.kind() {
GossipKind::BeaconBlock => {
for peer in gossipsub.mesh_peers(&topic_hash) {
if let Some(client) = peer_to_client.get(peer) {
if let Some(v) = metrics::get_int_gauge(
&metrics::BEACON_BLOCK_MESH_PEERS_PER_CLIENT,
&[client],
) {
v.inc()
};
}
}
}
GossipKind::BeaconAggregateAndProof => {
for peer in gossipsub.mesh_peers(&topic_hash) {
if let Some(client) = peer_to_client.get(peer) {
if let Some(v) = metrics::get_int_gauge(
&metrics::BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT,
&[client],
) {
v.inc()
};
}
}
}
_ => (),
}
}
}
for (client, scores) in scores_per_client.into_iter() {
let c = &[client];
let len = scores.len();
if len > 0 {
let mut below0 = 0;
let mut below_gossip_threshold = 0;
let mut below_publish_threshold = 0;
let mut below_greylist_threshold = 0;
let mut min = f64::INFINITY;
let mut sum = 0.0;
let mut max = f64::NEG_INFINITY;
let count = scores.len() as f64;
for &score in &scores {
if score < 0.0 {
below0 += 1;
}
if score < -4000.0 {
//TODO not hardcode
below_gossip_threshold += 1;
}
if score < -8000.0 {
//TODO not hardcode
below_publish_threshold += 1;
}
if score < -16000.0 {
//TODO not hardcode
below_greylist_threshold += 1;
}
if score < min {
min = score;
}
if score > max {
max = score;
}
sum += score;
}
let median = if len == 0 {
0.0
} else if len % 2 == 0 {
(scores[len / 2 - 1] + scores[len / 2]) / 2.0
} else {
scores[len / 2]
};
metrics::set_gauge_entry(
&metrics::SCORES_BELOW_ZERO_PER_CLIENT,
c,
below0 as f64 / count,
);
metrics::set_gauge_entry(
&metrics::SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT,
c,
below_gossip_threshold as f64 / count,
);
metrics::set_gauge_entry(
&metrics::SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT,
c,
below_publish_threshold as f64 / count,
);
metrics::set_gauge_entry(
&metrics::SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT,
c,
below_greylist_threshold as f64 / count,
);
metrics::set_gauge_entry(&metrics::MIN_SCORES_PER_CLIENT, c, min);
metrics::set_gauge_entry(&metrics::MEDIAN_SCORES_PER_CLIENT, c, median);
metrics::set_gauge_entry(&metrics::MEAN_SCORES_PER_CLIENT, c, sum / count);
metrics::set_gauge_entry(&metrics::MAX_SCORES_PER_CLIENT, c, max);
}
}
}
fn update_sync_metrics<T: EthSpec>(network_globals: &Arc<NetworkGlobals<T>>) {
// reset the counts
if metrics::PEERS_PER_SYNC_TYPE
.as_ref()
.map(|metric| metric.reset())
.is_err()
{
return;
};
// count per sync status, the number of connected peers
let mut peers_per_sync_type = FnvHashMap::default();
for sync_type in network_globals
.peers
.read()
.connected_peers()
.map(|(_peer_id, info)| info.sync_status.as_str())
{
*peers_per_sync_type.entry(sync_type).or_default() += 1;
}
for (sync_type, peer_count) in peers_per_sync_type {
metrics::set_gauge_entry(&metrics::PEERS_PER_SYNC_TYPE, &[sync_type], peer_count);
}
}

View File

@@ -5,6 +5,7 @@ mod tests {
use crate::{NetworkConfig, NetworkService};
use beacon_chain::test_utils::BeaconChainHarness;
use eth2_libp2p::Enr;
//use slog::{o, Drain, Level, Logger};
use slog::Logger;
use sloggers::{null::NullLoggerBuilder, Build};
use std::str::FromStr;
@@ -14,6 +15,18 @@ mod tests {
use types::{test_utils::generate_deterministic_keypairs, MinimalEthSpec};
fn get_logger() -> Logger {
/* For emitting logs during the tests
let drain = {
let decorator = slog_term::TermDecorator::new().build();
let decorator =
logging::AlignedTermDecorator::new(decorator, logging::MAX_MESSAGE_WIDTH);
let drain = slog_term::FullFormat::new(decorator).build().fuse();
let drain = slog_async::Async::new(drain).chan_size(2048).build();
drain.filter_level(Level::Debug)
};
Logger::root(drain.fuse(), o!())
*/
let builder = NullLoggerBuilder;
builder.build().expect("should build logger")
}
@@ -37,12 +50,12 @@ mod tests {
let enr2 = Enr::from_str("enr:-IS4QJ2d11eu6dC7E7LoXeLMgMP3kom1u3SE8esFSWvaHoo0dP1jg8O3-nx9ht-EO3CmG7L6OkHcMmoIh00IYWB92QABgmlkgnY0gmlwhH8AAAGJc2VjcDI1NmsxoQIB_c-jQMOXsbjWkbN-Oj99H57gfId5pfb4wa1qxwV4CIN1ZHCCIyk").unwrap();
let enrs = vec![enr1, enr2];
let runtime = Runtime::new().unwrap();
let runtime = Arc::new(Runtime::new().unwrap());
let (signal, exit) = exit_future::signal();
let (shutdown_tx, _) = futures::channel::mpsc::channel(1);
let executor = task_executor::TaskExecutor::new(
runtime.handle().clone(),
Arc::downgrade(&runtime),
exit,
log.clone(),
shutdown_tx,
@@ -50,9 +63,10 @@ mod tests {
let mut config = NetworkConfig::default();
config.libp2p_port = 21212;
config.upnp_enabled = false;
config.discovery_port = 21212;
config.boot_nodes_enr = enrs.clone();
runtime.spawn(async move {
runtime.block_on(async move {
// Create a new network service which implicitly gets dropped at the
// end of the block.
@@ -61,7 +75,9 @@ mod tests {
.unwrap();
drop(signal);
});
runtime.shutdown_timeout(tokio::time::Duration::from_millis(300));
let raw_runtime = Arc::try_unwrap(runtime).unwrap();
raw_runtime.shutdown_timeout(tokio::time::Duration::from_secs(10));
// Load the persisted dht from the store
let persisted_enrs = load_dht(store);