Merge latest master

This commit is contained in:
Age Manning
2020-05-04 15:43:49 +10:00
27 changed files with 913 additions and 545 deletions

View File

@@ -2,7 +2,7 @@
pub use self::peerdb::*;
use crate::metrics;
use crate::rpc::MetaData;
use crate::rpc::{MetaData, Protocol, RPCError, RPCResponseErrorCode};
use crate::{NetworkGlobals, PeerId};
use futures::prelude::*;
use futures::Stream;
@@ -10,6 +10,7 @@ use hashmap_delay::HashSetDelay;
use libp2p::identify::IdentifyInfo;
use slog::{crit, debug, error, warn};
use smallvec::SmallVec;
use std::convert::TryInto;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
@@ -21,11 +22,11 @@ mod peer_info;
mod peer_sync_status;
mod peerdb;
pub use peer_info::PeerInfo;
pub use peer_info::{PeerConnectionStatus::*, PeerInfo};
pub use peer_sync_status::{PeerSyncStatus, SyncInfo};
/// The minimum reputation before a peer is disconnected.
// Most likely this needs tweaking
const _MINIMUM_REPUTATION_BEFORE_BAN: Rep = 20;
// Most likely this needs tweaking.
const MIN_REP_BEFORE_BAN: Rep = 10;
/// The time in seconds between re-status's peers.
const STATUS_INTERVAL: u64 = 300;
/// The time in seconds between PING events. We do not send a ping if the other peer as PING'd us within
@@ -34,7 +35,7 @@ const PING_INTERVAL: u64 = 30;
/// The main struct that handles peer's reputation and connection status.
pub struct PeerManager<TSpec: EthSpec> {
/// Storage of network globals to access the PeerDB.
/// Storage of network globals to access the `PeerDB`.
network_globals: Arc<NetworkGlobals<TSpec>>,
/// A queue of events that the `PeerManager` is waiting to produce.
events: SmallVec<[PeerManagerEvent; 5]>,
@@ -48,22 +49,45 @@ pub struct PeerManager<TSpec: EthSpec> {
log: slog::Logger,
}
/// A collection of actions a peer can perform which will adjust its reputation
/// A collection of actions a peer can perform which will adjust its reputation.
/// Each variant has an associated reputation change.
// To easily assess the behaviour of reputation changes the number of variants should stay low, and
// somewhat generic.
pub enum PeerAction {
/// The peer timed out on an RPC request/response.
_TimedOut = -10,
/// The peer sent and invalid request/response or encoding.
_InvalidMessage = -20,
/// The peer sent something objectively malicious.
_Malicious = -50,
/// We should not communicate more with this peer.
/// This action will cause the peer to get banned.
Fatal,
/// An error occurred with this peer but it is not necessarily malicious.
/// We have high tolerance for this actions: several occurrences are needed for a peer to get
/// kicked.
/// NOTE: ~15 occurrences will get the peer banned
HighToleranceError,
/// An error occurred with this peer but it is not necessarily malicious.
/// We have high tolerance for this actions: several occurrences are needed for a peer to get
/// kicked.
/// NOTE: ~10 occurrences will get the peer banned
MidToleranceError,
/// This peer's action is not malicious but will not be tolerated. A few occurrences will cause
/// the peer to get kicked.
/// NOTE: ~5 occurrences will get the peer banned
LowToleranceError,
/// Received an expected message.
_ValidMessage = 20,
/// Peer disconnected.
Disconnected = -30,
_ValidMessage,
}
/// The events that the PeerManager outputs (requests).
impl PeerAction {
fn rep_change(&self) -> RepChange {
match self {
PeerAction::Fatal => RepChange::worst(),
PeerAction::LowToleranceError => RepChange::bad(60),
PeerAction::MidToleranceError => RepChange::bad(25),
PeerAction::HighToleranceError => RepChange::bad(15),
PeerAction::_ValidMessage => RepChange::good(20),
}
}
}
/// The events that the `PeerManager` outputs (requests).
pub enum PeerManagerEvent {
/// Sends a STATUS to a peer.
Status(PeerId),
@@ -98,24 +122,27 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
if let Some(peer_info) = self.network_globals.peers.read().peer_info(peer_id) {
// received a ping
// reset the to-ping timer for this peer
debug!(self.log, "Received a ping request"; "peer_id" => format!("{}", peer_id), "seq_no" => seq);
debug!(self.log, "Received a ping request"; "peer_id" => peer_id.to_string(), "seq_no" => seq);
self.ping_peers.insert(peer_id.clone());
// if the sequence number is unknown send update the meta data of the peer.
if let Some(meta_data) = &peer_info.meta_data {
if meta_data.seq_number < seq {
debug!(self.log, "Requesting new metadata from peer"; "peer_id" => format!("{}", peer_id), "known_seq_no" => meta_data.seq_number, "ping_seq_no" => seq);
debug!(self.log, "Requesting new metadata from peer";
"peer_id" => peer_id.to_string(), "known_seq_no" => meta_data.seq_number, "ping_seq_no" => seq);
self.events
.push(PeerManagerEvent::MetaData(peer_id.clone()));
}
} else {
// if we don't know the meta-data, request it
debug!(self.log, "Requesting first metadata from peer"; "peer_id" => format!("{}", peer_id));
debug!(self.log, "Requesting first metadata from peer";
"peer_id" => peer_id.to_string());
self.events
.push(PeerManagerEvent::MetaData(peer_id.clone()));
}
} else {
crit!(self.log, "Received a PING from an unknown peer"; "peer_id" => format!("{}", peer_id));
crit!(self.log, "Received a PING from an unknown peer";
"peer_id" => peer_id.to_string());
}
}
@@ -128,18 +155,20 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
// if the sequence number is unknown send update the meta data of the peer.
if let Some(meta_data) = &peer_info.meta_data {
if meta_data.seq_number < seq {
debug!(self.log, "Requesting new metadata from peer"; "peer_id" => format!("{}", peer_id), "known_seq_no" => meta_data.seq_number, "pong_seq_no" => seq);
debug!(self.log, "Requesting new metadata from peer";
"peer_id" => peer_id.to_string(), "known_seq_no" => meta_data.seq_number, "pong_seq_no" => seq);
self.events
.push(PeerManagerEvent::MetaData(peer_id.clone()));
}
} else {
// if we don't know the meta-data, request it
debug!(self.log, "Requesting first metadata from peer"; "peer_id" => format!("{}", peer_id));
debug!(self.log, "Requesting first metadata from peer";
"peer_id" => peer_id.to_string());
self.events
.push(PeerManagerEvent::MetaData(peer_id.clone()));
}
} else {
crit!(self.log, "Received a PONG from an unknown peer"; "peer_id" => format!("{}", peer_id));
crit!(self.log, "Received a PONG from an unknown peer"; "peer_id" => peer_id.to_string());
}
}
@@ -149,18 +178,24 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
if let Some(peer_info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
if let Some(known_meta_data) = &peer_info.meta_data {
if known_meta_data.seq_number < meta_data.seq_number {
debug!(self.log, "Updating peer's metadata"; "peer_id" => format!("{}", peer_id), "known_seq_no" => known_meta_data.seq_number, "new_seq_no" => meta_data.seq_number);
debug!(self.log, "Updating peer's metadata";
"peer_id" => peer_id.to_string(), "known_seq_no" => known_meta_data.seq_number, "new_seq_no" => meta_data.seq_number);
peer_info.meta_data = Some(meta_data);
} else {
warn!(self.log, "Received old metadata"; "peer_id" => format!("{}", peer_id), "known_seq_no" => known_meta_data.seq_number, "new_seq_no" => meta_data.seq_number);
// TODO: isn't this malicious/random behaviour? What happens if the seq_number
// is the same but the contents differ?
warn!(self.log, "Received old metadata";
"peer_id" => peer_id.to_string(), "known_seq_no" => known_meta_data.seq_number, "new_seq_no" => meta_data.seq_number);
}
} else {
// we have no meta-data for this peer, update
debug!(self.log, "Obtained peer's metadata"; "peer_id" => format!("{}", peer_id), "new_seq_no" => meta_data.seq_number);
debug!(self.log, "Obtained peer's metadata";
"peer_id" => peer_id.to_string(), "new_seq_no" => meta_data.seq_number);
peer_info.meta_data = Some(meta_data);
}
} else {
crit!(self.log, "Received METADATA from an unknown peer"; "peer_id" => format!("{}", peer_id));
crit!(self.log, "Received METADATA from an unknown peer";
"peer_id" => peer_id.to_string());
}
}
@@ -169,38 +204,12 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
self.status_peers.insert(peer_id.clone());
}
/// Checks the reputation of a peer and if it is too low, bans it and
/// sends the corresponding event. Informs if it got banned
fn _gets_banned(&mut self, peer_id: &PeerId) -> bool {
// if the peer was already banned don't inform again
let mut peerdb = self.network_globals.peers.write();
if let Some(connection_status) = peerdb.connection_status(peer_id) {
if peerdb.reputation(peer_id) < _MINIMUM_REPUTATION_BEFORE_BAN
&& !connection_status.is_banned()
{
peerdb.ban(peer_id);
self.events
.push(PeerManagerEvent::_BanPeer(peer_id.clone()));
return true;
}
}
false
}
/// Requests that a peer get disconnected.
pub fn _disconnect_peer(&mut self, peer_id: &PeerId) {
self.events
.push(PeerManagerEvent::_DisconnectPeer(peer_id.clone()));
}
/// Updates the state of the peer as disconnected.
pub fn notify_disconnect(&mut self, peer_id: &PeerId) {
self.update_reputations();
{
let mut peerdb = self.network_globals.peers.write();
peerdb.disconnect(peer_id);
peerdb.add_reputation(peer_id, PeerAction::Disconnected as Rep);
}
// remove the ping and status timer for the peer
@@ -225,35 +234,15 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
self.connect_peer(peer_id, true)
}
/// Provides a given peer's reputation if it exists.
pub fn _get_peer_rep(&self, peer_id: &PeerId) -> Rep {
self.network_globals.peers.read().reputation(peer_id)
}
/// Updates the reputation of known peers according to their connection
/// status and the time that has passed.
pub fn update_reputations(&mut self) {
let now = Instant::now();
let elapsed = (now - self.last_updated).as_secs();
// 0 seconds means now - last_updated < 0, but (most likely) not = 0.
// In this case, do nothing (updating last_updated would propagate
// rounding errors)
if elapsed > 0 {
self.last_updated = now;
// TODO decide how reputations change with time. If they get too low
// set the peers as banned
}
}
/// Reports a peer for some action.
///
/// If the peer doesn't exist, log a warning and insert defaults.
pub fn _report_peer(&mut self, peer_id: &PeerId, action: PeerAction) {
pub fn report_peer(&mut self, peer_id: &PeerId, action: PeerAction) {
self.update_reputations();
self.network_globals
.peers
.write()
.add_reputation(peer_id, action as Rep);
.add_reputation(peer_id, action.rep_change());
self.update_reputations();
}
@@ -263,10 +252,68 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
peer_info.client = client::Client::from_identify_info(info);
peer_info.listening_addresses = info.listen_addrs.clone();
} else {
crit!(self.log, "Received an Identify response from an unknown peer"; "peer_id" => format!("{}", peer_id));
crit!(self.log, "Received an Identify response from an unknown peer"; "peer_id" => peer_id.to_string());
}
}
pub fn handle_rpc_error(&mut self, peer_id: &PeerId, protocol: Protocol, err: &RPCError) {
debug!(self.log, "RPCError"; "protocol" => protocol.to_string(), "err" => err.to_string());
// Map this error to a `PeerAction` (if any)
let peer_action = match err {
RPCError::IncompleteStream => {
// They closed early, this could mean poor connection
PeerAction::MidToleranceError
}
RPCError::InternalError(_reason) => {
// Our fault. Do nothing
return;
}
RPCError::InvalidData => {
// Peer is not complying with the protocol. This is considered a malicious action
PeerAction::Fatal
}
RPCError::IoError(_e) => {
// this could their fault or ours, so we tolerate this
PeerAction::HighToleranceError
}
RPCError::ErrorResponse(code) => match code {
RPCResponseErrorCode::Unknown => PeerAction::HighToleranceError,
RPCResponseErrorCode::ServerError => PeerAction::MidToleranceError,
RPCResponseErrorCode::InvalidRequest => PeerAction::LowToleranceError,
},
RPCError::SSZDecodeError(_) => PeerAction::Fatal,
RPCError::UnsupportedProtocol => {
// Not supporting a protocol shouldn't be considered a malicious action, but
// it is an action that in some cases will make the peer unfit to continue
// communicating.
// TODO: To avoid punishing a peer repeatedly for not supporting a protocol, this
// information could be stored and used to prevent sending requests for the given
// protocol to this peer. Similarly, to avoid blacklisting a peer for a protocol
// forever, if stored this information should expire.
match protocol {
Protocol::Ping => PeerAction::Fatal,
Protocol::BlocksByRange => return,
Protocol::BlocksByRoot => return,
Protocol::Goodbye => return,
Protocol::MetaData => PeerAction::LowToleranceError,
Protocol::Status => PeerAction::LowToleranceError,
}
}
RPCError::StreamTimeout => match protocol {
Protocol::Ping => PeerAction::LowToleranceError,
Protocol::BlocksByRange => PeerAction::MidToleranceError,
Protocol::BlocksByRoot => PeerAction::MidToleranceError,
Protocol::Goodbye => return,
Protocol::MetaData => return,
Protocol::Status => return,
},
RPCError::NegotiationTimeout => PeerAction::HighToleranceError,
};
self.report_peer(peer_id, peer_action);
}
/* Internal functions */
/// Registers a peer as connected. The `ingoing` parameter determines if the peer is being
@@ -277,7 +324,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
/// This informs if the peer was accepted in to the db or not.
// TODO: Drop peers if over max_peer limit
fn connect_peer(&mut self, peer_id: &PeerId, outgoing: bool) -> bool {
// TODO: Call this on a timer
// TODO: remove after timed updates
self.update_reputations();
{
@@ -290,7 +337,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
if outgoing {
peerdb.connect_outgoing(peer_id);
} else {
peerdb.connect_outgoing(peer_id);
peerdb.connect_ingoing(peer_id);
}
}
@@ -312,6 +359,86 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
pub fn _dialing_peer(&mut self, peer_id: &PeerId) {
self.network_globals.peers.write().dialing_peer(peer_id);
}
/// Updates the reputation of known peers according to their connection
/// status and the time that has passed.
///
/// **Disconnected peers** get a 1rep hit every hour they stay disconnected.
/// **Banned peers** get a 1rep gain for every hour to slowly allow them back again.
///
/// A banned(disconnected) peer that gets its rep above(below) MIN_REP_BEFORE_BAN is
/// now considered a disconnected(banned) peer.
fn update_reputations(&mut self) {
// avoid locking the peerdb too often
// TODO: call this on a timer
if self.last_updated.elapsed().as_secs() < 30 {
return;
}
let now = Instant::now();
// Check for peers that get banned, unbanned and that should be disconnected
let mut ban_queue = Vec::new();
let mut unban_queue = Vec::new();
/* Check how long have peers been in this state and update their reputations if needed */
let mut pdb = self.network_globals.peers.write();
for (id, info) in pdb.peers_mut() {
// Update reputations
match info.connection_status {
Connected { .. } => {
// Connected peers gain reputation by sending useful messages
}
Disconnected { since } | Banned { since } => {
// For disconnected peers, lower their reputation by 1 for every hour they
// stay disconnected. This helps us slowly forget disconnected peers.
// In the same way, slowly allow banned peers back again.
let dc_hours = (now - since).as_secs() / 3600;
let last_dc_hours = (self.last_updated - since).as_secs() / 3600;
if dc_hours > last_dc_hours {
// this should be 1 most of the time
let rep_dif = (dc_hours - last_dc_hours)
.try_into()
.unwrap_or(Rep::max_value());
info.reputation = if info.connection_status.is_banned() {
info.reputation.saturating_add(rep_dif)
} else {
info.reputation.saturating_sub(rep_dif)
};
}
}
Dialing { since } => {
// A peer shouldn't be dialing for more than 2 minutes
if since.elapsed().as_secs() > 120 {
warn!(self.log,"Peer has been dialing for too long"; "peer_id" => id.to_string());
// TODO: decide how to handle this
}
}
}
// Check if the peer gets banned or unbanned and if it should be disconnected
if info.reputation < MIN_REP_BEFORE_BAN && !info.connection_status.is_banned() {
// This peer gets banned. Check if we should request disconnection
ban_queue.push(id.clone());
} else if info.reputation >= MIN_REP_BEFORE_BAN && info.connection_status.is_banned() {
// This peer gets unbanned
unban_queue.push(id.clone());
}
}
for id in ban_queue {
pdb.ban(&id);
self.events.push(PeerManagerEvent::_BanPeer(id.clone()));
}
for id in unban_queue {
pdb.disconnect(&id);
}
self.last_updated = Instant::now();
}
}
impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
@@ -319,10 +446,12 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
// poll the timeouts for pings and status'
// TODO: was getting a bit messy with while lets. Check if logic is preserved.
loop {
match self.ping_peers.poll_next_unpin(cx) {
Poll::Ready(Some(Ok(peer_id))) => self.events.push(PeerManagerEvent::Ping(peer_id)),
Poll::Ready(Some(Ok(peer_id))) => {
self.ping_peers.insert(peer_id.clone());
self.events.push(PeerManagerEvent::Ping(peer_id));
}
Poll::Ready(Some(Err(e))) => {
error!(self.log, "Failed to check for peers to ping"; "error" => format!("{}",e))
}
@@ -333,6 +462,7 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
loop {
match self.status_peers.poll_next_unpin(cx) {
Poll::Ready(Some(Ok(peer_id))) => {
self.status_peers.insert(peer_id.clone());
self.events.push(PeerManagerEvent::Status(peer_id))
}
Poll::Ready(Some(Err(e))) => {