Activate peer scoring (#1284)

* Initial score structure

* Peer manager update

* Updates to dialing

* Correct tests

* Correct typos and remove unused function

* Integrate scoring into the network crate

* Clean warnings

* Formatting

* Shift core functionality into the behaviour

* Temp commit

* Shift disconnections into the behaviour

* Temp commit

* Update libp2p and gossipsub

* Remove gossipsub lru cache

* Correct merge conflicts

* Modify handler and correct tests

* Update enr network globals on socket update

* Apply clippy lints

* Add new prysm fingerprint

* More clippy fixes
This commit is contained in:
Age Manning
2020-07-07 10:13:16 +10:00
committed by GitHub
parent 5977c00edb
commit 5bc8fea2e0
26 changed files with 1339 additions and 934 deletions

View File

@@ -2,7 +2,7 @@
pub use self::peerdb::*;
use crate::discovery::{Discovery, DiscoveryEvent};
use crate::rpc::{MetaData, Protocol, RPCError, RPCResponseErrorCode};
use crate::rpc::{GoodbyeReason, MetaData, Protocol, RPCError, RPCResponseErrorCode};
use crate::{error, metrics};
use crate::{Enr, EnrExt, NetworkConfig, NetworkGlobals, PeerId};
use futures::prelude::*;
@@ -10,7 +10,7 @@ use futures::Stream;
use hashset_delay::HashSetDelay;
use libp2p::core::multiaddr::Protocol as MProtocol;
use libp2p::identify::IdentifyInfo;
use slog::{crit, debug, error};
use slog::{crit, debug, error, warn};
use smallvec::SmallVec;
use std::{
net::SocketAddr,
@@ -27,12 +27,11 @@ pub mod client;
mod peer_info;
mod peer_sync_status;
mod peerdb;
pub(crate) mod score;
pub use peer_info::{PeerConnectionStatus::*, PeerInfo};
pub use peer_sync_status::{PeerSyncStatus, SyncInfo};
/// The minimum reputation before a peer is disconnected.
// Most likely this needs tweaking.
const _MIN_REP_BEFORE_BAN: Rep = 10;
use score::{PeerAction, ScoreState};
/// The time in seconds between re-status's peers.
const STATUS_INTERVAL: u64 = 300;
/// The time in seconds between PING events. We do not send a ping if the other peer as PING'd us within
@@ -63,44 +62,6 @@ pub struct PeerManager<TSpec: EthSpec> {
log: slog::Logger,
}
/// A collection of actions a peer can perform which will adjust its reputation.
/// Each variant has an associated reputation change.
// To easily assess the behaviour of reputation changes the number of variants should stay low, and
// somewhat generic.
pub enum PeerAction {
/// We should not communicate more with this peer.
/// This action will cause the peer to get banned.
Fatal,
/// An error occurred with this peer but it is not necessarily malicious.
/// We have high tolerance for this actions: several occurrences are needed for a peer to get
/// kicked.
/// NOTE: ~15 occurrences will get the peer banned
HighToleranceError,
/// An error occurred with this peer but it is not necessarily malicious.
/// We have high tolerance for this actions: several occurrences are needed for a peer to get
/// kicked.
/// NOTE: ~10 occurrences will get the peer banned
MidToleranceError,
/// This peer's action is not malicious but will not be tolerated. A few occurrences will cause
/// the peer to get kicked.
/// NOTE: ~5 occurrences will get the peer banned
LowToleranceError,
/// Received an expected message.
_ValidMessage,
}
impl PeerAction {
fn rep_change(&self) -> RepChange {
match self {
PeerAction::Fatal => RepChange::worst(),
PeerAction::LowToleranceError => RepChange::bad(60),
PeerAction::MidToleranceError => RepChange::bad(25),
PeerAction::HighToleranceError => RepChange::bad(15),
PeerAction::_ValidMessage => RepChange::good(20),
}
}
}
/// The events that the `PeerManager` outputs (requests).
pub enum PeerManagerEvent {
/// Dial a PeerId.
@@ -114,7 +75,7 @@ pub enum PeerManagerEvent {
/// Request METADATA from a peer.
MetaData(PeerId),
/// The peer should be disconnected.
DisconnectPeer(PeerId),
DisconnectPeer(PeerId, GoodbyeReason),
}
impl<TSpec: EthSpec> PeerManager<TSpec> {
@@ -147,6 +108,89 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
/* Public accessible functions */
/// Attempts to connect to a peer.
///
/// Returns true if the peer was accepted into the database.
pub fn dial_peer(&mut self, peer_id: &PeerId) -> bool {
self.events.push(PeerManagerEvent::Dial(peer_id.clone()));
self.connect_peer(peer_id, ConnectingType::Dialing)
}
/// The application layer wants to disconnect from a peer for a particular reason.
///
/// All instant disconnections are fatal and we ban the associated peer.
///
/// This will send a goodbye and disconnect the peer if it is connected or dialing.
pub fn goodbye_peer(&mut self, peer_id: &PeerId, reason: GoodbyeReason) {
// get the peer info
if let Some(info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
debug!(self.log, "Sending goodbye to peer"; "peer_id" => peer_id.to_string(), "reason" => reason.to_string(), "score" => info.score.to_string());
// Goodbye's are fatal
info.score.apply_peer_action(PeerAction::Fatal);
if info.connection_status.is_connected_or_dialing() {
self.events
.push(PeerManagerEvent::DisconnectPeer(peer_id.clone(), reason));
}
}
}
/// Reports a peer for some action.
///
/// If the peer doesn't exist, log a warning and insert defaults.
pub fn report_peer(&mut self, peer_id: &PeerId, action: PeerAction) {
// TODO: Remove duplicate code - This is duplicated in the update_peer_scores()
// function.
// Variables to update the PeerDb if required.
let mut ban_peer = None;
let mut unban_peer = None;
if let Some(info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
let previous_state = info.score.state();
info.score.apply_peer_action(action);
if previous_state != info.score.state() {
match info.score.state() {
ScoreState::Ban => {
debug!(self.log, "Peer has been banned"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string());
ban_peer = Some(peer_id.clone());
if info.connection_status.is_connected_or_dialing() {
self.events.push(PeerManagerEvent::DisconnectPeer(
peer_id.clone(),
GoodbyeReason::BadScore,
));
}
}
ScoreState::Disconnect => {
debug!(self.log, "Peer transitioned to disconnect state"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string(), "past_state" => previous_state.to_string());
// disconnect the peer if it's currently connected or dialing
unban_peer = Some(peer_id.clone());
if info.connection_status.is_connected_or_dialing() {
self.events.push(PeerManagerEvent::DisconnectPeer(
peer_id.clone(),
GoodbyeReason::BadScore,
));
}
// TODO: Update the peer manager to inform that the peer is disconnecting.
}
ScoreState::Healthy => {
debug!(self.log, "Peer transitioned to healthy state"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string(), "past_state" => previous_state.to_string());
// unban the peer if it was previously banned.
unban_peer = Some(peer_id.clone());
}
}
}
}
// Update the PeerDB state.
if let Some(peer_id) = ban_peer.take() {
self.network_globals.peers.write().ban(&peer_id);
} else {
if let Some(peer_id) = unban_peer.take() {
self.network_globals.peers.write().unban(&peer_id);
}
}
}
/* Discovery Requests */
/// Provides a reference to the underlying discovery service.
@@ -178,9 +222,12 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
self.status_peers.insert(peer_id.clone());
}
/* Notifications from the Swarm */
/// Updates the state of the peer as disconnected.
///
/// This is also called when dialing a peer fails.
pub fn notify_disconnect(&mut self, peer_id: &PeerId) {
//self.update_reputations();
self.network_globals.peers.write().disconnect(peer_id);
// remove the ping and status timer for the peer
@@ -193,6 +240,17 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
);
}
/// A dial attempt has failed.
///
/// NOTE: It can be the case that we are dialing a peer and during the dialing process the peer
/// connects and the dial attempt later fails. To handle this, we only update the peer_db if
/// the peer is not already connected.
pub fn notify_dial_failure(&mut self, peer_id: &PeerId) {
if !self.network_globals.peers.read().is_connected(peer_id) {
self.notify_disconnect(peer_id);
}
}
/// Sets a peer as connected as long as their reputation allows it
/// Informs if the peer was accepted
pub fn connect_ingoing(&mut self, peer_id: &PeerId) -> bool {
@@ -205,28 +263,17 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
self.connect_peer(peer_id, ConnectingType::OutgoingConnected)
}
/// Updates the database informing that a peer is being dialed.
pub fn dialing_peer(&mut self, peer_id: &PeerId) -> bool {
self.connect_peer(peer_id, ConnectingType::Dialing)
}
/// Updates the database informing that a peer is being disconnected.
pub fn _disconnecting_peer(&mut self, _peer_id: &PeerId) -> bool {
// TODO: implement
true
}
/// Reports a peer for some action.
/// Reports if a peer is banned or not.
///
/// If the peer doesn't exist, log a warning and insert defaults.
pub fn report_peer(&mut self, peer_id: &PeerId, action: PeerAction) {
//TODO: Check these. There are double disconnects for example
// self.update_reputations();
self.network_globals
.peers
.write()
.add_reputation(peer_id, action.rep_change());
// self.update_reputations();
/// This is used to determine if we should accept incoming connections.
pub fn is_banned(&self, peer_id: &PeerId) -> bool {
self.network_globals.peers.read().is_banned(peer_id)
}
/// Updates `PeerInfo` with `identify` information.
@@ -239,9 +286,12 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
}
}
/// An error has occured in the RPC.
///
/// This adjusts a peer's score based on the error.
pub fn handle_rpc_error(&mut self, peer_id: &PeerId, protocol: Protocol, err: &RPCError) {
let client = self.network_globals.client(peer_id);
debug!(self.log, "RPCError"; "protocol" => protocol.to_string(), "err" => err.to_string(), "client" => client.to_string());
warn!(self.log, "RPC Error"; "protocol" => protocol.to_string(), "err" => err.to_string(), "client" => client.to_string());
// Map this error to a `PeerAction` (if any)
let peer_action = match err {
@@ -423,6 +473,8 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
/// multiaddr here, however this could relate to duplicate PeerId's etc. If the lookup
/// proves resource constraining, we should switch to multiaddr dialling here.
fn peers_discovered(&mut self, peers: &[Enr], min_ttl: Option<Instant>) {
let mut to_dial_peers = Vec::new();
for enr in peers {
let peer_id = enr.peer_id();
@@ -433,9 +485,8 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
.peers
.read()
.is_connected_or_dialing(&peer_id)
&& !self.network_globals.peers.read().peer_banned(&peer_id)
&& !self.network_globals.peers.read().is_banned(&peer_id)
{
debug!(self.log, "Dialing discovered peer"; "peer_id"=> peer_id.to_string());
// TODO: Update output
// This should be updated with the peer dialing. In fact created once the peer is
// dialed
@@ -445,9 +496,13 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
.write()
.update_min_ttl(&peer_id, min_ttl);
}
self.events.push(PeerManagerEvent::Dial(peer_id));
to_dial_peers.push(peer_id);
}
}
for peer_id in to_dial_peers {
debug!(self.log, "Dialing discovered peer"; "peer_id"=> peer_id.to_string());
self.dial_peer(&peer_id);
}
}
/// Registers a peer as connected. The `ingoing` parameter determines if the peer is being
@@ -465,9 +520,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
let mut peerdb = self.network_globals.peers.write();
if peerdb.connection_status(peer_id).map(|c| c.is_banned()) == Some(true) {
// don't connect if the peer is banned
// TODO: Handle this case. If peer is banned this shouldn't be reached. It will put
// our connection/disconnection out of sync with libp2p
// return false;
slog::crit!(self.log, "Connection has been allowed to a banned peer"; "peer_id" => peer_id.to_string());
}
match connection {
@@ -491,36 +544,22 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
true
}
/// Notifies the peer manager that this peer is being dialed.
pub fn _dialing_peer(&mut self, peer_id: &PeerId) {
self.network_globals.peers.write().dialing_peer(peer_id);
}
/// Updates the reputation of known peers according to their connection
/// Updates the scores of known peers according to their connection
/// status and the time that has passed.
///
/// **Disconnected peers** get a 1rep hit every hour they stay disconnected.
/// **Banned peers** get a 1rep gain for every hour to slowly allow them back again.
///
/// A banned(disconnected) peer that gets its rep above(below) MIN_REP_BEFORE_BAN is
/// now considered a disconnected(banned) peer.
// TODO: Implement when reputation is added.
fn _update_reputations(&mut self) {
/*
// avoid locking the peerdb too often
// TODO: call this on a timer
let now = Instant::now();
// Check for peers that get banned, unbanned and that should be disconnected
let mut ban_queue = Vec::new();
let mut unban_queue = Vec::new();
/// NOTE: This is experimental and will likely be adjusted
fn update_peer_scores(&mut self) {
/* Check how long have peers been in this state and update their reputations if needed */
let mut pdb = self.network_globals.peers.write();
for (id, info) in pdb._peers_mut() {
// Update reputations
let mut to_ban_peers = Vec::new();
let mut to_unban_peers = Vec::new();
for (peer_id, info) in pdb.peers_mut() {
let previous_state = info.score.state();
// Update scores
info.score.update();
/* TODO: Implement logic about connection lifetimes
match info.connection_status {
Connected { .. } => {
// Connected peers gain reputation by sending useful messages
@@ -570,21 +609,49 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
// This peer gets unbanned
unban_queue.push(id.clone());
}
*/
// handle score transitions
if previous_state != info.score.state() {
match info.score.state() {
ScoreState::Ban => {
debug!(self.log, "Peer has been banned"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string());
to_ban_peers.push(peer_id.clone());
if info.connection_status.is_connected_or_dialing() {
self.events.push(PeerManagerEvent::DisconnectPeer(
peer_id.clone(),
GoodbyeReason::BadScore,
));
}
}
ScoreState::Disconnect => {
debug!(self.log, "Peer transitioned to disconnect state"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string(), "past_state" => previous_state.to_string());
// disconnect the peer if it's currently connected or dialing
to_unban_peers.push(peer_id.clone());
if info.connection_status.is_connected_or_dialing() {
self.events.push(PeerManagerEvent::DisconnectPeer(
peer_id.clone(),
GoodbyeReason::BadScore,
));
}
// TODO: Update peer manager to report that it's disconnecting.
}
ScoreState::Healthy => {
debug!(self.log, "Peer transitioned to healthy state"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string(), "past_state" => previous_state.to_string());
// unban the peer if it was previously banned.
to_unban_peers.push(peer_id.clone());
}
}
}
}
for id in ban_queue {
pdb.ban(&id);
self.events
.push(PeerManagerEvent::DisconnectPeer(id.clone()));
// process banning peers
for peer_id in to_ban_peers {
pdb.ban(&peer_id);
}
for id in unban_queue {
pdb.disconnect(&id);
// process unbanning peers
for peer_id in to_unban_peers {
pdb.unban(&peer_id);
}
self._last_updated = Instant::now();
*/
}
/// The Peer manager's heartbeat maintains the peer count and maintains peer reputations.
@@ -605,7 +672,8 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
// TODO: If we have too many peers, remove peers that are not required for subnet
// validation.
// TODO: Perform peer reputation maintenance here
// Updates peer's scores.
self.update_peer_scores();
}
}
@@ -636,7 +704,7 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
self.events.push(PeerManagerEvent::Ping(peer_id));
}
Poll::Ready(Some(Err(e))) => {
error!(self.log, "Failed to check for peers to ping"; "error" => format!("{}",e))
error!(self.log, "Failed to check for peers to ping"; "error" => e.to_string())
}
Poll::Ready(None) | Poll::Pending => break,
}
@@ -649,7 +717,7 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
self.events.push(PeerManagerEvent::Status(peer_id))
}
Poll::Ready(Some(Err(e))) => {
error!(self.log, "Failed to check for peers to ping"; "error" => format!("{}",e))
error!(self.log, "Failed to check for peers to ping"; "error" => e.to_string())
}
Poll::Ready(None) | Poll::Pending => break,
}