mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-14 18:32:42 +00:00
Merge latest master
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
pub use self::peerdb::*;
|
||||
use crate::metrics;
|
||||
use crate::rpc::MetaData;
|
||||
use crate::rpc::{MetaData, Protocol, RPCError, RPCResponseErrorCode};
|
||||
use crate::{NetworkGlobals, PeerId};
|
||||
use futures::prelude::*;
|
||||
use futures::Stream;
|
||||
@@ -10,6 +10,7 @@ use hashmap_delay::HashSetDelay;
|
||||
use libp2p::identify::IdentifyInfo;
|
||||
use slog::{crit, debug, error, warn};
|
||||
use smallvec::SmallVec;
|
||||
use std::convert::TryInto;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
@@ -21,11 +22,11 @@ mod peer_info;
|
||||
mod peer_sync_status;
|
||||
mod peerdb;
|
||||
|
||||
pub use peer_info::PeerInfo;
|
||||
pub use peer_info::{PeerConnectionStatus::*, PeerInfo};
|
||||
pub use peer_sync_status::{PeerSyncStatus, SyncInfo};
|
||||
/// The minimum reputation before a peer is disconnected.
|
||||
// Most likely this needs tweaking
|
||||
const _MINIMUM_REPUTATION_BEFORE_BAN: Rep = 20;
|
||||
// Most likely this needs tweaking.
|
||||
const MIN_REP_BEFORE_BAN: Rep = 10;
|
||||
/// The time in seconds between re-status's peers.
|
||||
const STATUS_INTERVAL: u64 = 300;
|
||||
/// The time in seconds between PING events. We do not send a ping if the other peer as PING'd us within
|
||||
@@ -34,7 +35,7 @@ const PING_INTERVAL: u64 = 30;
|
||||
|
||||
/// The main struct that handles peer's reputation and connection status.
|
||||
pub struct PeerManager<TSpec: EthSpec> {
|
||||
/// Storage of network globals to access the PeerDB.
|
||||
/// Storage of network globals to access the `PeerDB`.
|
||||
network_globals: Arc<NetworkGlobals<TSpec>>,
|
||||
/// A queue of events that the `PeerManager` is waiting to produce.
|
||||
events: SmallVec<[PeerManagerEvent; 5]>,
|
||||
@@ -48,22 +49,45 @@ pub struct PeerManager<TSpec: EthSpec> {
|
||||
log: slog::Logger,
|
||||
}
|
||||
|
||||
/// A collection of actions a peer can perform which will adjust its reputation
|
||||
/// A collection of actions a peer can perform which will adjust its reputation.
|
||||
/// Each variant has an associated reputation change.
|
||||
// To easily assess the behaviour of reputation changes the number of variants should stay low, and
|
||||
// somewhat generic.
|
||||
pub enum PeerAction {
|
||||
/// The peer timed out on an RPC request/response.
|
||||
_TimedOut = -10,
|
||||
/// The peer sent and invalid request/response or encoding.
|
||||
_InvalidMessage = -20,
|
||||
/// The peer sent something objectively malicious.
|
||||
_Malicious = -50,
|
||||
/// We should not communicate more with this peer.
|
||||
/// This action will cause the peer to get banned.
|
||||
Fatal,
|
||||
/// An error occurred with this peer but it is not necessarily malicious.
|
||||
/// We have high tolerance for this actions: several occurrences are needed for a peer to get
|
||||
/// kicked.
|
||||
/// NOTE: ~15 occurrences will get the peer banned
|
||||
HighToleranceError,
|
||||
/// An error occurred with this peer but it is not necessarily malicious.
|
||||
/// We have high tolerance for this actions: several occurrences are needed for a peer to get
|
||||
/// kicked.
|
||||
/// NOTE: ~10 occurrences will get the peer banned
|
||||
MidToleranceError,
|
||||
/// This peer's action is not malicious but will not be tolerated. A few occurrences will cause
|
||||
/// the peer to get kicked.
|
||||
/// NOTE: ~5 occurrences will get the peer banned
|
||||
LowToleranceError,
|
||||
/// Received an expected message.
|
||||
_ValidMessage = 20,
|
||||
/// Peer disconnected.
|
||||
Disconnected = -30,
|
||||
_ValidMessage,
|
||||
}
|
||||
|
||||
/// The events that the PeerManager outputs (requests).
|
||||
impl PeerAction {
|
||||
fn rep_change(&self) -> RepChange {
|
||||
match self {
|
||||
PeerAction::Fatal => RepChange::worst(),
|
||||
PeerAction::LowToleranceError => RepChange::bad(60),
|
||||
PeerAction::MidToleranceError => RepChange::bad(25),
|
||||
PeerAction::HighToleranceError => RepChange::bad(15),
|
||||
PeerAction::_ValidMessage => RepChange::good(20),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The events that the `PeerManager` outputs (requests).
|
||||
pub enum PeerManagerEvent {
|
||||
/// Sends a STATUS to a peer.
|
||||
Status(PeerId),
|
||||
@@ -98,24 +122,27 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
if let Some(peer_info) = self.network_globals.peers.read().peer_info(peer_id) {
|
||||
// received a ping
|
||||
// reset the to-ping timer for this peer
|
||||
debug!(self.log, "Received a ping request"; "peer_id" => format!("{}", peer_id), "seq_no" => seq);
|
||||
debug!(self.log, "Received a ping request"; "peer_id" => peer_id.to_string(), "seq_no" => seq);
|
||||
self.ping_peers.insert(peer_id.clone());
|
||||
|
||||
// if the sequence number is unknown send update the meta data of the peer.
|
||||
if let Some(meta_data) = &peer_info.meta_data {
|
||||
if meta_data.seq_number < seq {
|
||||
debug!(self.log, "Requesting new metadata from peer"; "peer_id" => format!("{}", peer_id), "known_seq_no" => meta_data.seq_number, "ping_seq_no" => seq);
|
||||
debug!(self.log, "Requesting new metadata from peer";
|
||||
"peer_id" => peer_id.to_string(), "known_seq_no" => meta_data.seq_number, "ping_seq_no" => seq);
|
||||
self.events
|
||||
.push(PeerManagerEvent::MetaData(peer_id.clone()));
|
||||
}
|
||||
} else {
|
||||
// if we don't know the meta-data, request it
|
||||
debug!(self.log, "Requesting first metadata from peer"; "peer_id" => format!("{}", peer_id));
|
||||
debug!(self.log, "Requesting first metadata from peer";
|
||||
"peer_id" => peer_id.to_string());
|
||||
self.events
|
||||
.push(PeerManagerEvent::MetaData(peer_id.clone()));
|
||||
}
|
||||
} else {
|
||||
crit!(self.log, "Received a PING from an unknown peer"; "peer_id" => format!("{}", peer_id));
|
||||
crit!(self.log, "Received a PING from an unknown peer";
|
||||
"peer_id" => peer_id.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,18 +155,20 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
// if the sequence number is unknown send update the meta data of the peer.
|
||||
if let Some(meta_data) = &peer_info.meta_data {
|
||||
if meta_data.seq_number < seq {
|
||||
debug!(self.log, "Requesting new metadata from peer"; "peer_id" => format!("{}", peer_id), "known_seq_no" => meta_data.seq_number, "pong_seq_no" => seq);
|
||||
debug!(self.log, "Requesting new metadata from peer";
|
||||
"peer_id" => peer_id.to_string(), "known_seq_no" => meta_data.seq_number, "pong_seq_no" => seq);
|
||||
self.events
|
||||
.push(PeerManagerEvent::MetaData(peer_id.clone()));
|
||||
}
|
||||
} else {
|
||||
// if we don't know the meta-data, request it
|
||||
debug!(self.log, "Requesting first metadata from peer"; "peer_id" => format!("{}", peer_id));
|
||||
debug!(self.log, "Requesting first metadata from peer";
|
||||
"peer_id" => peer_id.to_string());
|
||||
self.events
|
||||
.push(PeerManagerEvent::MetaData(peer_id.clone()));
|
||||
}
|
||||
} else {
|
||||
crit!(self.log, "Received a PONG from an unknown peer"; "peer_id" => format!("{}", peer_id));
|
||||
crit!(self.log, "Received a PONG from an unknown peer"; "peer_id" => peer_id.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,18 +178,24 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
if let Some(peer_info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
|
||||
if let Some(known_meta_data) = &peer_info.meta_data {
|
||||
if known_meta_data.seq_number < meta_data.seq_number {
|
||||
debug!(self.log, "Updating peer's metadata"; "peer_id" => format!("{}", peer_id), "known_seq_no" => known_meta_data.seq_number, "new_seq_no" => meta_data.seq_number);
|
||||
debug!(self.log, "Updating peer's metadata";
|
||||
"peer_id" => peer_id.to_string(), "known_seq_no" => known_meta_data.seq_number, "new_seq_no" => meta_data.seq_number);
|
||||
peer_info.meta_data = Some(meta_data);
|
||||
} else {
|
||||
warn!(self.log, "Received old metadata"; "peer_id" => format!("{}", peer_id), "known_seq_no" => known_meta_data.seq_number, "new_seq_no" => meta_data.seq_number);
|
||||
// TODO: isn't this malicious/random behaviour? What happens if the seq_number
|
||||
// is the same but the contents differ?
|
||||
warn!(self.log, "Received old metadata";
|
||||
"peer_id" => peer_id.to_string(), "known_seq_no" => known_meta_data.seq_number, "new_seq_no" => meta_data.seq_number);
|
||||
}
|
||||
} else {
|
||||
// we have no meta-data for this peer, update
|
||||
debug!(self.log, "Obtained peer's metadata"; "peer_id" => format!("{}", peer_id), "new_seq_no" => meta_data.seq_number);
|
||||
debug!(self.log, "Obtained peer's metadata";
|
||||
"peer_id" => peer_id.to_string(), "new_seq_no" => meta_data.seq_number);
|
||||
peer_info.meta_data = Some(meta_data);
|
||||
}
|
||||
} else {
|
||||
crit!(self.log, "Received METADATA from an unknown peer"; "peer_id" => format!("{}", peer_id));
|
||||
crit!(self.log, "Received METADATA from an unknown peer";
|
||||
"peer_id" => peer_id.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,38 +204,12 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
self.status_peers.insert(peer_id.clone());
|
||||
}
|
||||
|
||||
/// Checks the reputation of a peer and if it is too low, bans it and
|
||||
/// sends the corresponding event. Informs if it got banned
|
||||
fn _gets_banned(&mut self, peer_id: &PeerId) -> bool {
|
||||
// if the peer was already banned don't inform again
|
||||
let mut peerdb = self.network_globals.peers.write();
|
||||
|
||||
if let Some(connection_status) = peerdb.connection_status(peer_id) {
|
||||
if peerdb.reputation(peer_id) < _MINIMUM_REPUTATION_BEFORE_BAN
|
||||
&& !connection_status.is_banned()
|
||||
{
|
||||
peerdb.ban(peer_id);
|
||||
self.events
|
||||
.push(PeerManagerEvent::_BanPeer(peer_id.clone()));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Requests that a peer get disconnected.
|
||||
pub fn _disconnect_peer(&mut self, peer_id: &PeerId) {
|
||||
self.events
|
||||
.push(PeerManagerEvent::_DisconnectPeer(peer_id.clone()));
|
||||
}
|
||||
|
||||
/// Updates the state of the peer as disconnected.
|
||||
pub fn notify_disconnect(&mut self, peer_id: &PeerId) {
|
||||
self.update_reputations();
|
||||
{
|
||||
let mut peerdb = self.network_globals.peers.write();
|
||||
peerdb.disconnect(peer_id);
|
||||
peerdb.add_reputation(peer_id, PeerAction::Disconnected as Rep);
|
||||
}
|
||||
|
||||
// remove the ping and status timer for the peer
|
||||
@@ -225,35 +234,15 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
self.connect_peer(peer_id, true)
|
||||
}
|
||||
|
||||
/// Provides a given peer's reputation if it exists.
|
||||
pub fn _get_peer_rep(&self, peer_id: &PeerId) -> Rep {
|
||||
self.network_globals.peers.read().reputation(peer_id)
|
||||
}
|
||||
|
||||
/// Updates the reputation of known peers according to their connection
|
||||
/// status and the time that has passed.
|
||||
pub fn update_reputations(&mut self) {
|
||||
let now = Instant::now();
|
||||
let elapsed = (now - self.last_updated).as_secs();
|
||||
// 0 seconds means now - last_updated < 0, but (most likely) not = 0.
|
||||
// In this case, do nothing (updating last_updated would propagate
|
||||
// rounding errors)
|
||||
if elapsed > 0 {
|
||||
self.last_updated = now;
|
||||
// TODO decide how reputations change with time. If they get too low
|
||||
// set the peers as banned
|
||||
}
|
||||
}
|
||||
|
||||
/// Reports a peer for some action.
|
||||
///
|
||||
/// If the peer doesn't exist, log a warning and insert defaults.
|
||||
pub fn _report_peer(&mut self, peer_id: &PeerId, action: PeerAction) {
|
||||
pub fn report_peer(&mut self, peer_id: &PeerId, action: PeerAction) {
|
||||
self.update_reputations();
|
||||
self.network_globals
|
||||
.peers
|
||||
.write()
|
||||
.add_reputation(peer_id, action as Rep);
|
||||
.add_reputation(peer_id, action.rep_change());
|
||||
self.update_reputations();
|
||||
}
|
||||
|
||||
@@ -263,10 +252,68 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
peer_info.client = client::Client::from_identify_info(info);
|
||||
peer_info.listening_addresses = info.listen_addrs.clone();
|
||||
} else {
|
||||
crit!(self.log, "Received an Identify response from an unknown peer"; "peer_id" => format!("{}", peer_id));
|
||||
crit!(self.log, "Received an Identify response from an unknown peer"; "peer_id" => peer_id.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
pub fn handle_rpc_error(&mut self, peer_id: &PeerId, protocol: Protocol, err: &RPCError) {
|
||||
debug!(self.log, "RPCError"; "protocol" => protocol.to_string(), "err" => err.to_string());
|
||||
|
||||
// Map this error to a `PeerAction` (if any)
|
||||
let peer_action = match err {
|
||||
RPCError::IncompleteStream => {
|
||||
// They closed early, this could mean poor connection
|
||||
PeerAction::MidToleranceError
|
||||
}
|
||||
RPCError::InternalError(_reason) => {
|
||||
// Our fault. Do nothing
|
||||
return;
|
||||
}
|
||||
RPCError::InvalidData => {
|
||||
// Peer is not complying with the protocol. This is considered a malicious action
|
||||
PeerAction::Fatal
|
||||
}
|
||||
RPCError::IoError(_e) => {
|
||||
// this could their fault or ours, so we tolerate this
|
||||
PeerAction::HighToleranceError
|
||||
}
|
||||
RPCError::ErrorResponse(code) => match code {
|
||||
RPCResponseErrorCode::Unknown => PeerAction::HighToleranceError,
|
||||
RPCResponseErrorCode::ServerError => PeerAction::MidToleranceError,
|
||||
RPCResponseErrorCode::InvalidRequest => PeerAction::LowToleranceError,
|
||||
},
|
||||
RPCError::SSZDecodeError(_) => PeerAction::Fatal,
|
||||
RPCError::UnsupportedProtocol => {
|
||||
// Not supporting a protocol shouldn't be considered a malicious action, but
|
||||
// it is an action that in some cases will make the peer unfit to continue
|
||||
// communicating.
|
||||
// TODO: To avoid punishing a peer repeatedly for not supporting a protocol, this
|
||||
// information could be stored and used to prevent sending requests for the given
|
||||
// protocol to this peer. Similarly, to avoid blacklisting a peer for a protocol
|
||||
// forever, if stored this information should expire.
|
||||
match protocol {
|
||||
Protocol::Ping => PeerAction::Fatal,
|
||||
Protocol::BlocksByRange => return,
|
||||
Protocol::BlocksByRoot => return,
|
||||
Protocol::Goodbye => return,
|
||||
Protocol::MetaData => PeerAction::LowToleranceError,
|
||||
Protocol::Status => PeerAction::LowToleranceError,
|
||||
}
|
||||
}
|
||||
RPCError::StreamTimeout => match protocol {
|
||||
Protocol::Ping => PeerAction::LowToleranceError,
|
||||
Protocol::BlocksByRange => PeerAction::MidToleranceError,
|
||||
Protocol::BlocksByRoot => PeerAction::MidToleranceError,
|
||||
Protocol::Goodbye => return,
|
||||
Protocol::MetaData => return,
|
||||
Protocol::Status => return,
|
||||
},
|
||||
RPCError::NegotiationTimeout => PeerAction::HighToleranceError,
|
||||
};
|
||||
|
||||
self.report_peer(peer_id, peer_action);
|
||||
}
|
||||
|
||||
/* Internal functions */
|
||||
|
||||
/// Registers a peer as connected. The `ingoing` parameter determines if the peer is being
|
||||
@@ -277,7 +324,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
/// This informs if the peer was accepted in to the db or not.
|
||||
// TODO: Drop peers if over max_peer limit
|
||||
fn connect_peer(&mut self, peer_id: &PeerId, outgoing: bool) -> bool {
|
||||
// TODO: Call this on a timer
|
||||
// TODO: remove after timed updates
|
||||
self.update_reputations();
|
||||
|
||||
{
|
||||
@@ -290,7 +337,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
if outgoing {
|
||||
peerdb.connect_outgoing(peer_id);
|
||||
} else {
|
||||
peerdb.connect_outgoing(peer_id);
|
||||
peerdb.connect_ingoing(peer_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -312,6 +359,86 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
pub fn _dialing_peer(&mut self, peer_id: &PeerId) {
|
||||
self.network_globals.peers.write().dialing_peer(peer_id);
|
||||
}
|
||||
|
||||
/// Updates the reputation of known peers according to their connection
|
||||
/// status and the time that has passed.
|
||||
///
|
||||
/// **Disconnected peers** get a 1rep hit every hour they stay disconnected.
|
||||
/// **Banned peers** get a 1rep gain for every hour to slowly allow them back again.
|
||||
///
|
||||
/// A banned(disconnected) peer that gets its rep above(below) MIN_REP_BEFORE_BAN is
|
||||
/// now considered a disconnected(banned) peer.
|
||||
fn update_reputations(&mut self) {
|
||||
// avoid locking the peerdb too often
|
||||
// TODO: call this on a timer
|
||||
if self.last_updated.elapsed().as_secs() < 30 {
|
||||
return;
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
|
||||
// Check for peers that get banned, unbanned and that should be disconnected
|
||||
let mut ban_queue = Vec::new();
|
||||
let mut unban_queue = Vec::new();
|
||||
|
||||
/* Check how long have peers been in this state and update their reputations if needed */
|
||||
let mut pdb = self.network_globals.peers.write();
|
||||
|
||||
for (id, info) in pdb.peers_mut() {
|
||||
// Update reputations
|
||||
match info.connection_status {
|
||||
Connected { .. } => {
|
||||
// Connected peers gain reputation by sending useful messages
|
||||
}
|
||||
Disconnected { since } | Banned { since } => {
|
||||
// For disconnected peers, lower their reputation by 1 for every hour they
|
||||
// stay disconnected. This helps us slowly forget disconnected peers.
|
||||
// In the same way, slowly allow banned peers back again.
|
||||
let dc_hours = (now - since).as_secs() / 3600;
|
||||
let last_dc_hours = (self.last_updated - since).as_secs() / 3600;
|
||||
if dc_hours > last_dc_hours {
|
||||
// this should be 1 most of the time
|
||||
let rep_dif = (dc_hours - last_dc_hours)
|
||||
.try_into()
|
||||
.unwrap_or(Rep::max_value());
|
||||
|
||||
info.reputation = if info.connection_status.is_banned() {
|
||||
info.reputation.saturating_add(rep_dif)
|
||||
} else {
|
||||
info.reputation.saturating_sub(rep_dif)
|
||||
};
|
||||
}
|
||||
}
|
||||
Dialing { since } => {
|
||||
// A peer shouldn't be dialing for more than 2 minutes
|
||||
if since.elapsed().as_secs() > 120 {
|
||||
warn!(self.log,"Peer has been dialing for too long"; "peer_id" => id.to_string());
|
||||
// TODO: decide how to handle this
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check if the peer gets banned or unbanned and if it should be disconnected
|
||||
if info.reputation < MIN_REP_BEFORE_BAN && !info.connection_status.is_banned() {
|
||||
// This peer gets banned. Check if we should request disconnection
|
||||
ban_queue.push(id.clone());
|
||||
} else if info.reputation >= MIN_REP_BEFORE_BAN && info.connection_status.is_banned() {
|
||||
// This peer gets unbanned
|
||||
unban_queue.push(id.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for id in ban_queue {
|
||||
pdb.ban(&id);
|
||||
|
||||
self.events.push(PeerManagerEvent::_BanPeer(id.clone()));
|
||||
}
|
||||
|
||||
for id in unban_queue {
|
||||
pdb.disconnect(&id);
|
||||
}
|
||||
|
||||
self.last_updated = Instant::now();
|
||||
}
|
||||
}
|
||||
|
||||
impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
|
||||
@@ -319,10 +446,12 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
|
||||
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
// poll the timeouts for pings and status'
|
||||
// TODO: was getting a bit messy with while lets. Check if logic is preserved.
|
||||
loop {
|
||||
match self.ping_peers.poll_next_unpin(cx) {
|
||||
Poll::Ready(Some(Ok(peer_id))) => self.events.push(PeerManagerEvent::Ping(peer_id)),
|
||||
Poll::Ready(Some(Ok(peer_id))) => {
|
||||
self.ping_peers.insert(peer_id.clone());
|
||||
self.events.push(PeerManagerEvent::Ping(peer_id));
|
||||
}
|
||||
Poll::Ready(Some(Err(e))) => {
|
||||
error!(self.log, "Failed to check for peers to ping"; "error" => format!("{}",e))
|
||||
}
|
||||
@@ -333,6 +462,7 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
|
||||
loop {
|
||||
match self.status_peers.poll_next_unpin(cx) {
|
||||
Poll::Ready(Some(Ok(peer_id))) => {
|
||||
self.status_peers.insert(peer_id.clone());
|
||||
self.events.push(PeerManagerEvent::Status(peer_id))
|
||||
}
|
||||
Poll::Ready(Some(Err(e))) => {
|
||||
|
||||
Reference in New Issue
Block a user