mirror of
https://github.com/sigp/lighthouse.git
synced 2026-05-02 12:13:46 +00:00
Integrate tracing (#6339)
Tracing Integration
- [reference](5bbf1859e9/projects/project-ideas.md (L297))
- [x] replace slog & log with tracing throughout the codebase
- [x] implement custom crit log
- [x] make relevant changes in the formatter
- [x] replace sloggers
- [x] re-write SSE logging components
cc: @macladson @eserilev
This commit is contained in:
@@ -11,12 +11,12 @@ use libp2p::identify::Info as IdentifyInfo;
|
||||
use lru_cache::LRUTimeCache;
|
||||
use peerdb::{BanOperation, BanResult, ScoreUpdateResult};
|
||||
use rand::seq::SliceRandom;
|
||||
use slog::{debug, error, trace, warn};
|
||||
use smallvec::SmallVec;
|
||||
use std::{
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use tracing::{debug, error, trace, warn};
|
||||
use types::{DataColumnSubnetId, EthSpec, SyncSubnetId};
|
||||
|
||||
pub use libp2p::core::Multiaddr;
|
||||
@@ -114,8 +114,6 @@ pub struct PeerManager<E: EthSpec> {
|
||||
metrics_enabled: bool,
|
||||
/// Keeps track of whether the QUIC protocol is enabled or not.
|
||||
quic_enabled: bool,
|
||||
/// The logger associated with the `PeerManager`.
|
||||
log: slog::Logger,
|
||||
}
|
||||
|
||||
/// The events that the `PeerManager` outputs (requests).
|
||||
@@ -150,7 +148,6 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
pub fn new(
|
||||
cfg: config::Config,
|
||||
network_globals: Arc<NetworkGlobals<E>>,
|
||||
log: &slog::Logger,
|
||||
) -> Result<Self, String> {
|
||||
let config::Config {
|
||||
discovery_enabled,
|
||||
@@ -195,7 +192,6 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
discovery_enabled,
|
||||
metrics_enabled,
|
||||
quic_enabled,
|
||||
log: log.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -209,7 +205,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
pub fn goodbye_peer(&mut self, peer_id: &PeerId, reason: GoodbyeReason, source: ReportSource) {
|
||||
// Update the sync status if required
|
||||
if let Some(info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
|
||||
debug!(self.log, "Sending goodbye to peer"; "peer_id" => %peer_id, "reason" => %reason, "score" => %info.score());
|
||||
debug!(%peer_id, %reason, score = %info.score(), "Sending goodbye to peer");
|
||||
if matches!(reason, GoodbyeReason::IrrelevantNetwork) {
|
||||
info.update_sync_status(SyncStatus::IrrelevantPeer);
|
||||
}
|
||||
@@ -369,7 +365,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
.update_min_ttl(&peer_id, min_ttl);
|
||||
}
|
||||
if self.dial_peer(enr) {
|
||||
debug!(self.log, "Added discovered ENR peer to dial queue"; "peer_id" => %peer_id);
|
||||
debug!(%peer_id, "Added discovered ENR peer to dial queue");
|
||||
to_dial_peers += 1;
|
||||
}
|
||||
}
|
||||
@@ -382,7 +378,10 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
// reach out target. To prevent the infinite loop, if a query returns no useful peers, we
|
||||
// will cancel the recursiveness and wait for the heartbeat to trigger another query latter.
|
||||
if results_count > 0 && to_dial_peers == 0 {
|
||||
debug!(self.log, "Skipping recursive discovery query after finding no useful results"; "results" => results_count);
|
||||
debug!(
|
||||
results = results_count,
|
||||
"Skipping recursive discovery query after finding no useful results"
|
||||
);
|
||||
metrics::inc_counter(&metrics::DISCOVERY_NO_USEFUL_ENRS);
|
||||
} else {
|
||||
// Queue another discovery if we need to
|
||||
@@ -481,16 +480,21 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
if previous_kind != peer_info.client().kind
|
||||
|| *peer_info.listening_addresses() != previous_listening_addresses
|
||||
{
|
||||
debug!(self.log, "Identified Peer"; "peer" => %peer_id,
|
||||
"protocol_version" => &info.protocol_version,
|
||||
"agent_version" => &info.agent_version,
|
||||
"listening_addresses" => ?info.listen_addrs,
|
||||
"observed_address" => ?info.observed_addr,
|
||||
"protocols" => ?info.protocols
|
||||
debug!(
|
||||
%peer_id,
|
||||
protocol_version = &info.protocol_version,
|
||||
agent_version = &info.agent_version,
|
||||
listening_addresses = ?info.listen_addrs,
|
||||
observed_address = ?info.observed_addr,
|
||||
protocols = ?info.protocols,
|
||||
"Identified Peer"
|
||||
);
|
||||
}
|
||||
} else {
|
||||
error!(self.log, "Received an Identify response from an unknown peer"; "peer_id" => peer_id.to_string());
|
||||
error!(
|
||||
peer_id = peer_id.to_string(),
|
||||
"Received an Identify response from an unknown peer"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -506,8 +510,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
) {
|
||||
let client = self.network_globals.client(peer_id);
|
||||
let score = self.network_globals.peers.read().score(peer_id);
|
||||
debug!(self.log, "RPC Error"; "protocol" => %protocol, "err" => %err, "client" => %client,
|
||||
"peer_id" => %peer_id, "score" => %score, "direction" => ?direction);
|
||||
debug!(%protocol, %err, %client, %peer_id, %score, ?direction, "RPC Error");
|
||||
metrics::inc_counter_vec(
|
||||
&metrics::TOTAL_RPC_ERRORS_PER_CLIENT,
|
||||
&[
|
||||
@@ -524,7 +527,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
PeerAction::MidToleranceError
|
||||
}
|
||||
RPCError::InternalError(e) => {
|
||||
debug!(self.log, "Internal RPC Error"; "error" => %e, "peer_id" => %peer_id);
|
||||
debug!(error = %e, %peer_id, "Internal RPC Error");
|
||||
return;
|
||||
}
|
||||
RPCError::HandlerRejected => PeerAction::Fatal,
|
||||
@@ -617,7 +620,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
RPCError::StreamTimeout => match direction {
|
||||
ConnectionDirection::Incoming => {
|
||||
// There was a timeout responding to a peer.
|
||||
debug!(self.log, "Timed out responding to RPC Request"; "peer_id" => %peer_id);
|
||||
debug!(%peer_id, "Timed out responding to RPC Request");
|
||||
return;
|
||||
}
|
||||
ConnectionDirection::Outgoing => match protocol {
|
||||
@@ -656,7 +659,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
if let Some(peer_info) = self.network_globals.peers.read().peer_info(peer_id) {
|
||||
// received a ping
|
||||
// reset the to-ping timer for this peer
|
||||
trace!(self.log, "Received a ping request"; "peer_id" => %peer_id, "seq_no" => seq);
|
||||
trace!(%peer_id, seq_no = seq, "Received a ping request");
|
||||
match peer_info.connection_direction() {
|
||||
Some(ConnectionDirection::Incoming) => {
|
||||
self.inbound_ping_peers.insert(*peer_id);
|
||||
@@ -665,26 +668,23 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
self.outbound_ping_peers.insert(*peer_id);
|
||||
}
|
||||
None => {
|
||||
warn!(self.log, "Received a ping from a peer with an unknown connection direction"; "peer_id" => %peer_id);
|
||||
warn!(%peer_id, "Received a ping from a peer with an unknown connection direction");
|
||||
}
|
||||
}
|
||||
|
||||
// if the sequence number is unknown send an update the meta data of the peer.
|
||||
if let Some(meta_data) = &peer_info.meta_data() {
|
||||
if *meta_data.seq_number() < seq {
|
||||
trace!(self.log, "Requesting new metadata from peer";
|
||||
"peer_id" => %peer_id, "known_seq_no" => meta_data.seq_number(), "ping_seq_no" => seq);
|
||||
trace!(%peer_id, known_seq_no = meta_data.seq_number(), ping_seq_no = seq, "Requesting new metadata from peer");
|
||||
self.events.push(PeerManagerEvent::MetaData(*peer_id));
|
||||
}
|
||||
} else {
|
||||
// if we don't know the meta-data, request it
|
||||
debug!(self.log, "Requesting first metadata from peer";
|
||||
"peer_id" => %peer_id);
|
||||
debug!(%peer_id, "Requesting first metadata from peer");
|
||||
self.events.push(PeerManagerEvent::MetaData(*peer_id));
|
||||
}
|
||||
} else {
|
||||
error!(self.log, "Received a PING from an unknown peer";
|
||||
"peer_id" => %peer_id);
|
||||
error!(%peer_id, "Received a PING from an unknown peer");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -696,18 +696,16 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
// if the sequence number is unknown send update the meta data of the peer.
|
||||
if let Some(meta_data) = &peer_info.meta_data() {
|
||||
if *meta_data.seq_number() < seq {
|
||||
trace!(self.log, "Requesting new metadata from peer";
|
||||
"peer_id" => %peer_id, "known_seq_no" => meta_data.seq_number(), "pong_seq_no" => seq);
|
||||
trace!(%peer_id, known_seq_no = meta_data.seq_number(), pong_seq_no = seq, "Requesting new metadata from peer");
|
||||
self.events.push(PeerManagerEvent::MetaData(*peer_id));
|
||||
}
|
||||
} else {
|
||||
// if we don't know the meta-data, request it
|
||||
trace!(self.log, "Requesting first metadata from peer";
|
||||
"peer_id" => %peer_id);
|
||||
trace!(%peer_id, "Requesting first metadata from peer");
|
||||
self.events.push(PeerManagerEvent::MetaData(*peer_id));
|
||||
}
|
||||
} else {
|
||||
error!(self.log, "Received a PONG from an unknown peer"; "peer_id" => %peer_id);
|
||||
error!(%peer_id, "Received a PONG from an unknown peer");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -718,18 +716,15 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
if let Some(peer_info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
|
||||
if let Some(known_meta_data) = &peer_info.meta_data() {
|
||||
if *known_meta_data.seq_number() < *meta_data.seq_number() {
|
||||
trace!(self.log, "Updating peer's metadata";
|
||||
"peer_id" => %peer_id, "known_seq_no" => known_meta_data.seq_number(), "new_seq_no" => meta_data.seq_number());
|
||||
trace!(%peer_id, known_seq_no = known_meta_data.seq_number(), new_seq_no = meta_data.seq_number(), "Updating peer's metadata");
|
||||
} else {
|
||||
trace!(self.log, "Received old metadata";
|
||||
"peer_id" => %peer_id, "known_seq_no" => known_meta_data.seq_number(), "new_seq_no" => meta_data.seq_number());
|
||||
trace!(%peer_id, known_seq_no = known_meta_data.seq_number(), new_seq_no = meta_data.seq_number(), "Received old metadata");
|
||||
// Updating metadata even in this case to prevent storing
|
||||
// incorrect `attnets/syncnets` for a peer
|
||||
}
|
||||
} else {
|
||||
// we have no meta-data for this peer, update
|
||||
debug!(self.log, "Obtained peer's metadata";
|
||||
"peer_id" => %peer_id, "new_seq_no" => meta_data.seq_number());
|
||||
debug!(%peer_id, new_seq_no = meta_data.seq_number(), "Obtained peer's metadata");
|
||||
}
|
||||
|
||||
let custody_group_count_opt = meta_data.custody_group_count().copied().ok();
|
||||
@@ -749,10 +744,9 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
.cloned()
|
||||
.unwrap_or_else(|| {
|
||||
warn!(
|
||||
self.log,
|
||||
"Custody group not found in subnet mapping";
|
||||
"custody_index" => custody_index,
|
||||
"peer_id" => %peer_id
|
||||
%custody_index,
|
||||
%peer_id,
|
||||
"Custody group not found in subnet mapping"
|
||||
);
|
||||
vec![]
|
||||
})
|
||||
@@ -761,11 +755,12 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
peer_info.set_custody_subnets(custody_subnets);
|
||||
}
|
||||
Err(err) => {
|
||||
debug!(self.log, "Unable to compute peer custody groups from metadata";
|
||||
"info" => "Sending goodbye to peer",
|
||||
"peer_id" => %peer_id,
|
||||
"custody_group_count" => custody_group_count,
|
||||
"error" => ?err,
|
||||
debug!(
|
||||
info = "Sending goodbye to peer",
|
||||
peer_id = %peer_id,
|
||||
custody_group_count,
|
||||
error = ?err,
|
||||
"Unable to compute peer custody groups from metadata"
|
||||
);
|
||||
invalid_meta_data = true;
|
||||
}
|
||||
@@ -773,8 +768,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!(self.log, "Received METADATA from an unknown peer";
|
||||
"peer_id" => %peer_id);
|
||||
error!(%peer_id, "Received METADATA from an unknown peer");
|
||||
}
|
||||
|
||||
// Disconnect peers with invalid metadata and find other peers instead.
|
||||
@@ -866,7 +860,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
let mut peerdb = self.network_globals.peers.write();
|
||||
if peerdb.ban_status(peer_id).is_some() {
|
||||
// don't connect if the peer is banned
|
||||
error!(self.log, "Connection has been allowed to a banned peer"; "peer_id" => %peer_id);
|
||||
error!(%peer_id, "Connection has been allowed to a banned peer");
|
||||
}
|
||||
|
||||
match connection {
|
||||
@@ -934,9 +928,8 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
// request the subnet query from discovery
|
||||
if !subnets_to_discover.is_empty() {
|
||||
debug!(
|
||||
self.log,
|
||||
"Making subnet queries for maintaining sync committee peers";
|
||||
"subnets" => ?subnets_to_discover.iter().map(|s| s.subnet).collect::<Vec<_>>()
|
||||
subnets = ?subnets_to_discover.iter().map(|s| s.subnet).collect::<Vec<_>>(),
|
||||
"Making subnet queries for maintaining sync committee peers"
|
||||
);
|
||||
self.events
|
||||
.push(PeerManagerEvent::DiscoverSubnetPeers(subnets_to_discover));
|
||||
@@ -965,7 +958,13 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
|
||||
if wanted_peers != 0 {
|
||||
// We need more peers, re-queue a discovery lookup.
|
||||
debug!(self.log, "Starting a new peer discovery query"; "connected" => peer_count, "target" => self.target_peers, "outbound" => outbound_only_peer_count, "wanted" => wanted_peers);
|
||||
debug!(
|
||||
connected = peer_count,
|
||||
target = self.target_peers,
|
||||
outbound = outbound_only_peer_count,
|
||||
wanted = wanted_peers,
|
||||
"Starting a new peer discovery query"
|
||||
);
|
||||
self.events
|
||||
.push(PeerManagerEvent::DiscoverPeers(wanted_peers));
|
||||
}
|
||||
@@ -1491,21 +1490,8 @@ enum ConnectingType {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::NetworkConfig;
|
||||
use slog::{o, Drain};
|
||||
use types::MainnetEthSpec as E;
|
||||
|
||||
pub fn build_log(level: slog::Level, enabled: bool) -> slog::Logger {
|
||||
let decorator = slog_term::TermDecorator::new().build();
|
||||
let drain = slog_term::FullFormat::new(decorator).build().fuse();
|
||||
let drain = slog_async::Async::new(drain).build().fuse();
|
||||
|
||||
if enabled {
|
||||
slog::Logger::root(drain.filter_level(level).fuse(), o!())
|
||||
} else {
|
||||
slog::Logger::root(drain.filter(|_| false).fuse(), o!())
|
||||
}
|
||||
}
|
||||
|
||||
async fn build_peer_manager(target_peer_count: usize) -> PeerManager<E> {
|
||||
build_peer_manager_with_trusted_peers(vec![], target_peer_count).await
|
||||
}
|
||||
@@ -1523,10 +1509,9 @@ mod tests {
|
||||
target_peers: target_peer_count,
|
||||
..Default::default()
|
||||
});
|
||||
let log = build_log(slog::Level::Debug, false);
|
||||
let spec = Arc::new(E::default_spec());
|
||||
let globals = NetworkGlobals::new_test_globals(trusted_peers, &log, network_config, spec);
|
||||
PeerManager::new(config, Arc::new(globals), &log).unwrap()
|
||||
let globals = NetworkGlobals::new_test_globals(trusted_peers, network_config, spec);
|
||||
PeerManager::new(config, Arc::new(globals)).unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -13,7 +13,7 @@ use libp2p::swarm::dial_opts::{DialOpts, PeerCondition};
|
||||
use libp2p::swarm::dummy::ConnectionHandler;
|
||||
use libp2p::swarm::{ConnectionDenied, ConnectionId, NetworkBehaviour, ToSwarm};
|
||||
pub use metrics::{set_gauge_vec, NAT_OPEN};
|
||||
use slog::{debug, error, trace};
|
||||
use tracing::{debug, error, trace};
|
||||
use types::EthSpec;
|
||||
|
||||
use crate::discovery::enr_ext::EnrExt;
|
||||
@@ -54,7 +54,10 @@ impl<E: EthSpec> NetworkBehaviour for PeerManager<E> {
|
||||
self.events.push(PeerManagerEvent::Ping(peer_id));
|
||||
}
|
||||
Poll::Ready(Some(Err(e))) => {
|
||||
error!(self.log, "Failed to check for inbound peers to ping"; "error" => e.to_string())
|
||||
error!(
|
||||
error = e.to_string(),
|
||||
"Failed to check for inbound peers to ping"
|
||||
)
|
||||
}
|
||||
Poll::Ready(None) | Poll::Pending => break,
|
||||
}
|
||||
@@ -67,7 +70,10 @@ impl<E: EthSpec> NetworkBehaviour for PeerManager<E> {
|
||||
self.events.push(PeerManagerEvent::Ping(peer_id));
|
||||
}
|
||||
Poll::Ready(Some(Err(e))) => {
|
||||
error!(self.log, "Failed to check for outbound peers to ping"; "error" => e.to_string())
|
||||
error!(
|
||||
error = e.to_string(),
|
||||
"Failed to check for outbound peers to ping"
|
||||
)
|
||||
}
|
||||
Poll::Ready(None) | Poll::Pending => break,
|
||||
}
|
||||
@@ -84,7 +90,7 @@ impl<E: EthSpec> NetworkBehaviour for PeerManager<E> {
|
||||
self.events.push(PeerManagerEvent::Status(peer_id))
|
||||
}
|
||||
Poll::Ready(Some(Err(e))) => {
|
||||
error!(self.log, "Failed to check for peers to ping"; "error" => e.to_string())
|
||||
error!(error = e.to_string(), "Failed to check for peers to ping")
|
||||
}
|
||||
Poll::Ready(None) | Poll::Pending => break,
|
||||
}
|
||||
@@ -109,7 +115,7 @@ impl<E: EthSpec> NetworkBehaviour for PeerManager<E> {
|
||||
]
|
||||
.concat();
|
||||
|
||||
debug!(self.log, "Dialing peer"; "peer_id"=> %enr.peer_id(), "multiaddrs" => ?multiaddrs);
|
||||
debug!(peer_id = %enr.peer_id(), ?multiaddrs, "Dialing peer");
|
||||
return Poll::Ready(ToSwarm::Dial {
|
||||
opts: DialOpts::peer_id(enr.peer_id())
|
||||
.condition(PeerCondition::Disconnected)
|
||||
@@ -141,7 +147,7 @@ impl<E: EthSpec> NetworkBehaviour for PeerManager<E> {
|
||||
error,
|
||||
connection_id: _,
|
||||
}) => {
|
||||
debug!(self.log, "Failed to dial peer"; "peer_id"=> ?peer_id, "error" => %ClearDialError(error));
|
||||
debug!(?peer_id, error = %ClearDialError(error),"Failed to dial peer");
|
||||
self.on_dial_failure(peer_id);
|
||||
}
|
||||
_ => {
|
||||
@@ -186,7 +192,7 @@ impl<E: EthSpec> NetworkBehaviour for PeerManager<E> {
|
||||
_local_addr: &libp2p::Multiaddr,
|
||||
remote_addr: &libp2p::Multiaddr,
|
||||
) -> Result<libp2p::swarm::THandler<Self>, ConnectionDenied> {
|
||||
trace!(self.log, "Inbound connection"; "peer_id" => %peer_id, "multiaddr" => %remote_addr);
|
||||
trace!(%peer_id, multiaddr = %remote_addr, "Inbound connection");
|
||||
// We already checked if the peer was banned on `handle_pending_inbound_connection`.
|
||||
if self.ban_status(&peer_id).is_some() {
|
||||
return Err(ConnectionDenied::new(
|
||||
@@ -227,9 +233,9 @@ impl<E: EthSpec> NetworkBehaviour for PeerManager<E> {
|
||||
_role_override: libp2p::core::Endpoint,
|
||||
_port_use: PortUse,
|
||||
) -> Result<libp2p::swarm::THandler<Self>, libp2p::swarm::ConnectionDenied> {
|
||||
trace!(self.log, "Outbound connection"; "peer_id" => %peer_id, "multiaddr" => %addr);
|
||||
trace!(%peer_id, multiaddr = %addr,"Outbound connection");
|
||||
if let Some(cause) = self.ban_status(&peer_id) {
|
||||
error!(self.log, "Connected a banned peer. Rejecting connection"; "peer_id" => %peer_id);
|
||||
error!(%peer_id, "Connected a banned peer. Rejecting connection");
|
||||
return Err(ConnectionDenied::new(cause));
|
||||
}
|
||||
|
||||
@@ -258,9 +264,11 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
endpoint: &ConnectedPoint,
|
||||
_other_established: usize,
|
||||
) {
|
||||
debug!(self.log, "Connection established"; "peer_id" => %peer_id,
|
||||
"multiaddr" => %endpoint.get_remote_address(),
|
||||
"connection" => ?endpoint.to_endpoint()
|
||||
debug!(
|
||||
multiaddr = %endpoint.get_remote_address(),
|
||||
connection = ?endpoint.to_endpoint(),
|
||||
%peer_id,
|
||||
"Connection established"
|
||||
);
|
||||
|
||||
// Update the prometheus metrics
|
||||
@@ -309,7 +317,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
// Inform the application.
|
||||
self.events
|
||||
.push(PeerManagerEvent::PeerDisconnected(peer_id));
|
||||
debug!(self.log, "Peer disconnected"; "peer_id" => %peer_id);
|
||||
debug!(%peer_id,"Peer disconnected");
|
||||
}
|
||||
|
||||
// NOTE: It may be the case that a rejected node, due to too many peers is disconnected
|
||||
|
||||
@@ -2,9 +2,9 @@ use crate::discovery::enr::PEERDAS_CUSTODY_GROUP_COUNT_ENR_KEY;
|
||||
use crate::discovery::{peer_id_to_node_id, CombinedKey};
|
||||
use crate::{metrics, multiaddr::Multiaddr, types::Subnet, Enr, EnrExt, Gossipsub, PeerId};
|
||||
use itertools::Itertools;
|
||||
use logging::crit;
|
||||
use peer_info::{ConnectionDirection, PeerConnectionStatus, PeerInfo};
|
||||
use score::{PeerAction, ReportSource, Score, ScoreState};
|
||||
use slog::{crit, debug, error, trace, warn};
|
||||
use std::net::IpAddr;
|
||||
use std::time::Instant;
|
||||
use std::{cmp::Ordering, fmt::Display};
|
||||
@@ -13,6 +13,7 @@ use std::{
|
||||
fmt::Formatter,
|
||||
};
|
||||
use sync_status::SyncStatus;
|
||||
use tracing::{debug, error, trace, warn};
|
||||
use types::data_column_custody_group::compute_subnets_for_node;
|
||||
use types::{ChainSpec, DataColumnSubnetId, EthSpec};
|
||||
|
||||
@@ -44,19 +45,16 @@ pub struct PeerDB<E: EthSpec> {
|
||||
banned_peers_count: BannedPeersCount,
|
||||
/// Specifies if peer scoring is disabled.
|
||||
disable_peer_scoring: bool,
|
||||
/// PeerDB's logger
|
||||
log: slog::Logger,
|
||||
}
|
||||
|
||||
impl<E: EthSpec> PeerDB<E> {
|
||||
pub fn new(trusted_peers: Vec<PeerId>, disable_peer_scoring: bool, log: &slog::Logger) -> Self {
|
||||
pub fn new(trusted_peers: Vec<PeerId>, disable_peer_scoring: bool) -> Self {
|
||||
// Initialize the peers hashmap with trusted peers
|
||||
let peers = trusted_peers
|
||||
.into_iter()
|
||||
.map(|peer_id| (peer_id, PeerInfo::trusted_peer_info()))
|
||||
.collect();
|
||||
Self {
|
||||
log: log.clone(),
|
||||
disconnected_peers: 0,
|
||||
banned_peers_count: BannedPeersCount::default(),
|
||||
disable_peer_scoring,
|
||||
@@ -385,15 +383,15 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
// Update scores
|
||||
info.score_update();
|
||||
|
||||
match Self::handle_score_transition(previous_state, peer_id, info, &self.log) {
|
||||
match Self::handle_score_transition(previous_state, peer_id, info) {
|
||||
// A peer should not be able to be banned from a score update.
|
||||
ScoreTransitionResult::Banned => {
|
||||
error!(self.log, "Peer has been banned in an update"; "peer_id" => %peer_id)
|
||||
error!(%peer_id, "Peer has been banned in an update");
|
||||
}
|
||||
// A peer should not be able to transition to a disconnected state from a healthy
|
||||
// state in a score update.
|
||||
ScoreTransitionResult::Disconnected => {
|
||||
error!(self.log, "Peer has been disconnected in an update"; "peer_id" => %peer_id)
|
||||
error!(%peer_id, "Peer has been disconnected in an update");
|
||||
}
|
||||
ScoreTransitionResult::Unbanned => {
|
||||
peers_to_unban.push(*peer_id);
|
||||
@@ -466,7 +464,7 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
|
||||
actions.push((
|
||||
*peer_id,
|
||||
Self::handle_score_transition(previous_state, peer_id, info, &self.log),
|
||||
Self::handle_score_transition(previous_state, peer_id, info),
|
||||
));
|
||||
}
|
||||
|
||||
@@ -537,15 +535,13 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
&metrics::PEER_ACTION_EVENTS_PER_CLIENT,
|
||||
&[info.client().kind.as_ref(), action.as_ref(), source.into()],
|
||||
);
|
||||
let result =
|
||||
Self::handle_score_transition(previous_state, peer_id, info, &self.log);
|
||||
let result = Self::handle_score_transition(previous_state, peer_id, info);
|
||||
if previous_state == info.score_state() {
|
||||
debug!(
|
||||
self.log,
|
||||
"Peer score adjusted";
|
||||
"msg" => %msg,
|
||||
"peer_id" => %peer_id,
|
||||
"score" => %info.score()
|
||||
%msg,
|
||||
%peer_id,
|
||||
score = %info.score(),
|
||||
"Peer score adjusted"
|
||||
);
|
||||
}
|
||||
match result {
|
||||
@@ -567,10 +563,9 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
ScoreTransitionResult::NoAction => ScoreUpdateResult::NoAction,
|
||||
ScoreTransitionResult::Unbanned => {
|
||||
error!(
|
||||
self.log,
|
||||
"Report peer action lead to an unbanning";
|
||||
"msg" => %msg,
|
||||
"peer_id" => %peer_id
|
||||
%msg,
|
||||
%peer_id,
|
||||
"Report peer action lead to an unbanning"
|
||||
);
|
||||
ScoreUpdateResult::NoAction
|
||||
}
|
||||
@@ -578,10 +573,9 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
}
|
||||
None => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Reporting a peer that doesn't exist";
|
||||
"msg" => %msg,
|
||||
"peer_id" =>%peer_id
|
||||
%msg,
|
||||
%peer_id,
|
||||
"Reporting a peer that doesn't exist"
|
||||
);
|
||||
ScoreUpdateResult::NoAction
|
||||
}
|
||||
@@ -601,7 +595,7 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
.checked_duration_since(Instant::now())
|
||||
.map(|duration| duration.as_secs())
|
||||
.unwrap_or_else(|| 0);
|
||||
debug!(self.log, "Updating the time a peer is required for"; "peer_id" => %peer_id, "future_min_ttl_secs" => min_ttl_secs);
|
||||
debug!(%peer_id, future_min_ttl_secs = min_ttl_secs, "Updating the time a peer is required for");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -625,12 +619,14 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
/// min_ttl than what's given.
|
||||
// VISIBILITY: The behaviour is able to adjust subscriptions.
|
||||
pub(crate) fn extend_peers_on_subnet(&mut self, subnet: &Subnet, min_ttl: Instant) {
|
||||
let log = &self.log;
|
||||
self.peers.iter_mut()
|
||||
self.peers
|
||||
.iter_mut()
|
||||
.filter(move |(_, info)| {
|
||||
info.is_connected() && info.on_subnet_metadata(subnet) && info.on_subnet_gossipsub(subnet)
|
||||
info.is_connected()
|
||||
&& info.on_subnet_metadata(subnet)
|
||||
&& info.on_subnet_gossipsub(subnet)
|
||||
})
|
||||
.for_each(|(peer_id,info)| {
|
||||
.for_each(|(peer_id, info)| {
|
||||
if info.min_ttl().is_none() || Some(&min_ttl) > info.min_ttl() {
|
||||
info.set_min_ttl(min_ttl);
|
||||
}
|
||||
@@ -638,7 +634,7 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
.checked_duration_since(Instant::now())
|
||||
.map(|duration| duration.as_secs())
|
||||
.unwrap_or_else(|| 0);
|
||||
trace!(log, "Updating minimum duration a peer is required for"; "peer_id" => %peer_id, "min_ttl" => min_ttl_secs);
|
||||
trace!(%peer_id, min_ttl_secs, "Updating minimum duration a peer is required for");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -740,7 +736,6 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
peer_id: &PeerId,
|
||||
new_state: NewConnectionState,
|
||||
) -> Option<BanOperation> {
|
||||
let log_ref = &self.log;
|
||||
let info = self.peers.entry(*peer_id).or_insert_with(|| {
|
||||
// If we are not creating a new connection (or dropping a current inbound connection) log a warning indicating we are updating a
|
||||
// connection state for an unknown peer.
|
||||
@@ -752,8 +747,7 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
| NewConnectionState::Disconnected { .. } // Dialing a peer that responds by a different ID can be immediately
|
||||
// disconnected without having being stored in the db before
|
||||
) {
|
||||
warn!(log_ref, "Updating state of unknown peer";
|
||||
"peer_id" => %peer_id, "new_state" => ?new_state);
|
||||
warn!(%peer_id, ?new_state, "Updating state of unknown peer");
|
||||
}
|
||||
if self.disable_peer_scoring {
|
||||
PeerInfo::trusted_peer_info()
|
||||
@@ -768,7 +762,7 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
ScoreState::Banned => {}
|
||||
_ => {
|
||||
// If score isn't low enough to ban, this function has been called incorrectly.
|
||||
error!(self.log, "Banning a peer with a good score"; "peer_id" => %peer_id);
|
||||
error!(%peer_id, "Banning a peer with a good score");
|
||||
info.apply_peer_action_to_score(score::PeerAction::Fatal);
|
||||
}
|
||||
}
|
||||
@@ -799,13 +793,13 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
self.disconnected_peers = self.disconnected_peers.saturating_sub(1);
|
||||
}
|
||||
PeerConnectionStatus::Banned { .. } => {
|
||||
error!(self.log, "Accepted a connection from a banned peer"; "peer_id" => %peer_id);
|
||||
error!(%peer_id, "Accepted a connection from a banned peer");
|
||||
// TODO: check if this happens and report the unban back
|
||||
self.banned_peers_count
|
||||
.remove_banned_peer(info.seen_ip_addresses());
|
||||
}
|
||||
PeerConnectionStatus::Disconnecting { .. } => {
|
||||
warn!(self.log, "Connected to a disconnecting peer"; "peer_id" => %peer_id)
|
||||
warn!(%peer_id, "Connected to a disconnecting peer");
|
||||
}
|
||||
PeerConnectionStatus::Unknown
|
||||
| PeerConnectionStatus::Connected { .. }
|
||||
@@ -827,7 +821,7 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
(old_state, NewConnectionState::Dialing { enr }) => {
|
||||
match old_state {
|
||||
PeerConnectionStatus::Banned { .. } => {
|
||||
warn!(self.log, "Dialing a banned peer"; "peer_id" => %peer_id);
|
||||
warn!(%peer_id, "Dialing a banned peer");
|
||||
self.banned_peers_count
|
||||
.remove_banned_peer(info.seen_ip_addresses());
|
||||
}
|
||||
@@ -835,13 +829,13 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
self.disconnected_peers = self.disconnected_peers.saturating_sub(1);
|
||||
}
|
||||
PeerConnectionStatus::Connected { .. } => {
|
||||
warn!(self.log, "Dialing an already connected peer"; "peer_id" => %peer_id)
|
||||
warn!(%peer_id, "Dialing an already connected peer");
|
||||
}
|
||||
PeerConnectionStatus::Dialing { .. } => {
|
||||
warn!(self.log, "Dialing an already dialing peer"; "peer_id" => %peer_id)
|
||||
warn!(%peer_id, "Dialing an already dialing peer");
|
||||
}
|
||||
PeerConnectionStatus::Disconnecting { .. } => {
|
||||
warn!(self.log, "Dialing a disconnecting peer"; "peer_id" => %peer_id)
|
||||
warn!(%peer_id, "Dialing a disconnecting peer");
|
||||
}
|
||||
PeerConnectionStatus::Unknown => {} // default behaviour
|
||||
}
|
||||
@@ -851,7 +845,7 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
}
|
||||
|
||||
if let Err(e) = info.set_dialing_peer() {
|
||||
error!(self.log, "{}", e; "peer_id" => %peer_id);
|
||||
error!(%peer_id, e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -907,7 +901,7 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
* Handles the transition to a disconnecting state
|
||||
*/
|
||||
(PeerConnectionStatus::Banned { .. }, NewConnectionState::Disconnecting { to_ban }) => {
|
||||
error!(self.log, "Disconnecting from a banned peer"; "peer_id" => %peer_id);
|
||||
error!(%peer_id, "Disconnecting from a banned peer");
|
||||
info.set_connection_status(PeerConnectionStatus::Disconnecting { to_ban });
|
||||
}
|
||||
(
|
||||
@@ -951,13 +945,13 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
(PeerConnectionStatus::Disconnecting { .. }, NewConnectionState::Banned) => {
|
||||
// NOTE: This can occur due a rapid downscore of a peer. It goes through the
|
||||
// disconnection phase and straight into banning in a short time-frame.
|
||||
debug!(log_ref, "Banning peer that is currently disconnecting"; "peer_id" => %peer_id);
|
||||
debug!(%peer_id, "Banning peer that is currently disconnecting");
|
||||
// Ban the peer once the disconnection process completes.
|
||||
info.set_connection_status(PeerConnectionStatus::Disconnecting { to_ban: true });
|
||||
return Some(BanOperation::PeerDisconnecting);
|
||||
}
|
||||
(PeerConnectionStatus::Banned { .. }, NewConnectionState::Banned) => {
|
||||
error!(log_ref, "Banning already banned peer"; "peer_id" => %peer_id);
|
||||
error!(%peer_id, "Banning already banned peer");
|
||||
let known_banned_ips = self.banned_peers_count.banned_ips();
|
||||
let banned_ips = info
|
||||
.seen_ip_addresses()
|
||||
@@ -975,7 +969,7 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
}
|
||||
(PeerConnectionStatus::Unknown, NewConnectionState::Banned) => {
|
||||
// shift the peer straight to banned
|
||||
warn!(log_ref, "Banning a peer of unknown connection state"; "peer_id" => %peer_id);
|
||||
warn!(%peer_id, "Banning a peer of unknown connection state");
|
||||
self.banned_peers_count
|
||||
.add_banned_peer(info.seen_ip_addresses());
|
||||
info.set_connection_status(PeerConnectionStatus::Banned {
|
||||
@@ -996,15 +990,15 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
*/
|
||||
(old_state, NewConnectionState::Unbanned) => {
|
||||
if matches!(info.score_state(), ScoreState::Banned) {
|
||||
error!(self.log, "Unbanning a banned peer"; "peer_id" => %peer_id);
|
||||
error!(%peer_id, "Unbanning a banned peer");
|
||||
}
|
||||
match old_state {
|
||||
PeerConnectionStatus::Unknown | PeerConnectionStatus::Connected { .. } => {
|
||||
error!(self.log, "Unbanning a connected peer"; "peer_id" => %peer_id);
|
||||
error!(%peer_id, "Unbanning a connected peer");
|
||||
}
|
||||
PeerConnectionStatus::Disconnected { .. }
|
||||
| PeerConnectionStatus::Disconnecting { .. } => {
|
||||
debug!(self.log, "Unbanning disconnected or disconnecting peer"; "peer_id" => %peer_id);
|
||||
debug!(%peer_id, "Unbanning disconnected or disconnecting peer");
|
||||
} // These are odd but fine.
|
||||
PeerConnectionStatus::Dialing { .. } => {} // Also odd but acceptable
|
||||
PeerConnectionStatus::Banned { since } => {
|
||||
@@ -1073,15 +1067,12 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
Some((*id, unbanned_ips))
|
||||
} else {
|
||||
// If there is no minimum, this is a coding error.
|
||||
crit!(
|
||||
self.log,
|
||||
"banned_peers > MAX_BANNED_PEERS despite no banned peers in db!"
|
||||
);
|
||||
crit!("banned_peers > MAX_BANNED_PEERS despite no banned peers in db!");
|
||||
// reset banned_peers this will also exit the loop
|
||||
self.banned_peers_count = BannedPeersCount::default();
|
||||
None
|
||||
} {
|
||||
debug!(self.log, "Removing old banned peer"; "peer_id" => %to_drop);
|
||||
debug!(peer_id = %to_drop, "Removing old banned peer");
|
||||
self.peers.remove(&to_drop);
|
||||
unbanned_peers.push((to_drop, unbanned_ips))
|
||||
}
|
||||
@@ -1100,7 +1091,11 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
.min_by_key(|(_, age)| *age)
|
||||
.map(|(id, _)| *id)
|
||||
{
|
||||
debug!(self.log, "Removing old disconnected peer"; "peer_id" => %to_drop, "disconnected_size" => self.disconnected_peers.saturating_sub(1));
|
||||
debug!(
|
||||
peer_id = %to_drop,
|
||||
disconnected_size = self.disconnected_peers.saturating_sub(1),
|
||||
"Removing old disconnected peer"
|
||||
);
|
||||
self.peers.remove(&to_drop);
|
||||
}
|
||||
// If there is no minimum, this is a coding error. For safety we decrease
|
||||
@@ -1117,15 +1112,19 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
previous_state: ScoreState,
|
||||
peer_id: &PeerId,
|
||||
info: &PeerInfo<E>,
|
||||
log: &slog::Logger,
|
||||
) -> ScoreTransitionResult {
|
||||
match (info.score_state(), previous_state) {
|
||||
(ScoreState::Banned, ScoreState::Healthy | ScoreState::ForcedDisconnect) => {
|
||||
debug!(log, "Peer has been banned"; "peer_id" => %peer_id, "score" => %info.score());
|
||||
debug!(%peer_id, score = %info.score(), "Peer has been banned");
|
||||
ScoreTransitionResult::Banned
|
||||
}
|
||||
(ScoreState::ForcedDisconnect, ScoreState::Banned | ScoreState::Healthy) => {
|
||||
debug!(log, "Peer transitioned to forced disconnect score state"; "peer_id" => %peer_id, "score" => %info.score(), "past_score_state" => %previous_state);
|
||||
debug!(
|
||||
%peer_id,
|
||||
score = %info.score(),
|
||||
past_score_state = %previous_state,
|
||||
"Peer transitioned to forced disconnect score state"
|
||||
);
|
||||
// disconnect the peer if it's currently connected or dialing
|
||||
if info.is_connected_or_dialing() {
|
||||
ScoreTransitionResult::Disconnected
|
||||
@@ -1138,11 +1137,21 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
}
|
||||
}
|
||||
(ScoreState::Healthy, ScoreState::ForcedDisconnect) => {
|
||||
debug!(log, "Peer transitioned to healthy score state"; "peer_id" => %peer_id, "score" => %info.score(), "past_score_state" => %previous_state);
|
||||
debug!(
|
||||
%peer_id,
|
||||
score = %info.score(),
|
||||
past_score_state = %previous_state,
|
||||
"Peer transitioned to healthy score state"
|
||||
);
|
||||
ScoreTransitionResult::NoAction
|
||||
}
|
||||
(ScoreState::Healthy, ScoreState::Banned) => {
|
||||
debug!(log, "Peer transitioned to healthy score state"; "peer_id" => %peer_id, "score" => %info.score(), "past_score_state" => %previous_state);
|
||||
debug!(
|
||||
%peer_id,
|
||||
score = %info.score(),
|
||||
past_score_state = %previous_state,
|
||||
"Peer transitioned to healthy score state"
|
||||
);
|
||||
// unban the peer if it was previously banned.
|
||||
ScoreTransitionResult::Unbanned
|
||||
}
|
||||
@@ -1309,24 +1318,11 @@ impl BannedPeersCount {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use libp2p::core::multiaddr::Protocol;
|
||||
use slog::{o, Drain};
|
||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
use types::MinimalEthSpec;
|
||||
|
||||
type M = MinimalEthSpec;
|
||||
|
||||
pub fn build_log(level: slog::Level, enabled: bool) -> slog::Logger {
|
||||
let decorator = slog_term::TermDecorator::new().build();
|
||||
let drain = slog_term::FullFormat::new(decorator).build().fuse();
|
||||
let drain = slog_async::Async::new(drain).build().fuse();
|
||||
|
||||
if enabled {
|
||||
slog::Logger::root(drain.filter_level(level).fuse(), o!())
|
||||
} else {
|
||||
slog::Logger::root(drain.filter(|_| false).fuse(), o!())
|
||||
}
|
||||
}
|
||||
|
||||
fn add_score<E: EthSpec>(db: &mut PeerDB<E>, peer_id: &PeerId, score: f64) {
|
||||
if let Some(info) = db.peer_info_mut(peer_id) {
|
||||
info.add_to_score(score);
|
||||
@@ -1340,8 +1336,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn get_db() -> PeerDB<M> {
|
||||
let log = build_log(slog::Level::Debug, false);
|
||||
PeerDB::new(vec![], false, &log)
|
||||
PeerDB::new(vec![], false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2039,8 +2034,7 @@ mod tests {
|
||||
#[allow(clippy::float_cmp)]
|
||||
fn test_trusted_peers_score() {
|
||||
let trusted_peer = PeerId::random();
|
||||
let log = build_log(slog::Level::Debug, false);
|
||||
let mut pdb: PeerDB<M> = PeerDB::new(vec![trusted_peer], false, &log);
|
||||
let mut pdb: PeerDB<M> = PeerDB::new(vec![trusted_peer], false);
|
||||
|
||||
pdb.connect_ingoing(&trusted_peer, "/ip4/0.0.0.0".parse().unwrap(), None);
|
||||
|
||||
@@ -2063,8 +2057,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_disable_peer_scoring() {
|
||||
let peer = PeerId::random();
|
||||
let log = build_log(slog::Level::Debug, false);
|
||||
let mut pdb: PeerDB<M> = PeerDB::new(vec![], true, &log);
|
||||
let mut pdb: PeerDB<M> = PeerDB::new(vec![], true);
|
||||
|
||||
pdb.connect_ingoing(&peer, "/ip4/0.0.0.0".parse().unwrap(), None);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user