mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-09 11:41:51 +00:00
Activate peer scoring (#1284)
* Initial score structure * Peer manager update * Updates to dialing * Correct tests * Correct typos and remove unused function * Integrate scoring into the network crate * Clean warnings * Formatting * Shift core functionality into the behaviour * Temp commit * Shift disconnections into the behaviour * Temp commit * Update libp2p and gossipsub * Remove gossipsub lru cache * Correct merge conflicts * Modify handler and correct tests * Update enr network globals on socket update * Apply clippy lints * Add new prysm fingerprint * More clippy fixes
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
pub use self::peerdb::*;
|
||||
use crate::discovery::{Discovery, DiscoveryEvent};
|
||||
use crate::rpc::{MetaData, Protocol, RPCError, RPCResponseErrorCode};
|
||||
use crate::rpc::{GoodbyeReason, MetaData, Protocol, RPCError, RPCResponseErrorCode};
|
||||
use crate::{error, metrics};
|
||||
use crate::{Enr, EnrExt, NetworkConfig, NetworkGlobals, PeerId};
|
||||
use futures::prelude::*;
|
||||
@@ -10,7 +10,7 @@ use futures::Stream;
|
||||
use hashset_delay::HashSetDelay;
|
||||
use libp2p::core::multiaddr::Protocol as MProtocol;
|
||||
use libp2p::identify::IdentifyInfo;
|
||||
use slog::{crit, debug, error};
|
||||
use slog::{crit, debug, error, warn};
|
||||
use smallvec::SmallVec;
|
||||
use std::{
|
||||
net::SocketAddr,
|
||||
@@ -27,12 +27,11 @@ pub mod client;
|
||||
mod peer_info;
|
||||
mod peer_sync_status;
|
||||
mod peerdb;
|
||||
pub(crate) mod score;
|
||||
|
||||
pub use peer_info::{PeerConnectionStatus::*, PeerInfo};
|
||||
pub use peer_sync_status::{PeerSyncStatus, SyncInfo};
|
||||
/// The minimum reputation before a peer is disconnected.
|
||||
// Most likely this needs tweaking.
|
||||
const _MIN_REP_BEFORE_BAN: Rep = 10;
|
||||
use score::{PeerAction, ScoreState};
|
||||
/// The time in seconds between re-status's peers.
|
||||
const STATUS_INTERVAL: u64 = 300;
|
||||
/// The time in seconds between PING events. We do not send a ping if the other peer as PING'd us within
|
||||
@@ -63,44 +62,6 @@ pub struct PeerManager<TSpec: EthSpec> {
|
||||
log: slog::Logger,
|
||||
}
|
||||
|
||||
/// A collection of actions a peer can perform which will adjust its reputation.
|
||||
/// Each variant has an associated reputation change.
|
||||
// To easily assess the behaviour of reputation changes the number of variants should stay low, and
|
||||
// somewhat generic.
|
||||
pub enum PeerAction {
|
||||
/// We should not communicate more with this peer.
|
||||
/// This action will cause the peer to get banned.
|
||||
Fatal,
|
||||
/// An error occurred with this peer but it is not necessarily malicious.
|
||||
/// We have high tolerance for this actions: several occurrences are needed for a peer to get
|
||||
/// kicked.
|
||||
/// NOTE: ~15 occurrences will get the peer banned
|
||||
HighToleranceError,
|
||||
/// An error occurred with this peer but it is not necessarily malicious.
|
||||
/// We have high tolerance for this actions: several occurrences are needed for a peer to get
|
||||
/// kicked.
|
||||
/// NOTE: ~10 occurrences will get the peer banned
|
||||
MidToleranceError,
|
||||
/// This peer's action is not malicious but will not be tolerated. A few occurrences will cause
|
||||
/// the peer to get kicked.
|
||||
/// NOTE: ~5 occurrences will get the peer banned
|
||||
LowToleranceError,
|
||||
/// Received an expected message.
|
||||
_ValidMessage,
|
||||
}
|
||||
|
||||
impl PeerAction {
|
||||
fn rep_change(&self) -> RepChange {
|
||||
match self {
|
||||
PeerAction::Fatal => RepChange::worst(),
|
||||
PeerAction::LowToleranceError => RepChange::bad(60),
|
||||
PeerAction::MidToleranceError => RepChange::bad(25),
|
||||
PeerAction::HighToleranceError => RepChange::bad(15),
|
||||
PeerAction::_ValidMessage => RepChange::good(20),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The events that the `PeerManager` outputs (requests).
|
||||
pub enum PeerManagerEvent {
|
||||
/// Dial a PeerId.
|
||||
@@ -114,7 +75,7 @@ pub enum PeerManagerEvent {
|
||||
/// Request METADATA from a peer.
|
||||
MetaData(PeerId),
|
||||
/// The peer should be disconnected.
|
||||
DisconnectPeer(PeerId),
|
||||
DisconnectPeer(PeerId, GoodbyeReason),
|
||||
}
|
||||
|
||||
impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
@@ -147,6 +108,89 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
|
||||
/* Public accessible functions */
|
||||
|
||||
/// Attempts to connect to a peer.
|
||||
///
|
||||
/// Returns true if the peer was accepted into the database.
|
||||
pub fn dial_peer(&mut self, peer_id: &PeerId) -> bool {
|
||||
self.events.push(PeerManagerEvent::Dial(peer_id.clone()));
|
||||
self.connect_peer(peer_id, ConnectingType::Dialing)
|
||||
}
|
||||
|
||||
/// The application layer wants to disconnect from a peer for a particular reason.
|
||||
///
|
||||
/// All instant disconnections are fatal and we ban the associated peer.
|
||||
///
|
||||
/// This will send a goodbye and disconnect the peer if it is connected or dialing.
|
||||
pub fn goodbye_peer(&mut self, peer_id: &PeerId, reason: GoodbyeReason) {
|
||||
// get the peer info
|
||||
if let Some(info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
|
||||
debug!(self.log, "Sending goodbye to peer"; "peer_id" => peer_id.to_string(), "reason" => reason.to_string(), "score" => info.score.to_string());
|
||||
// Goodbye's are fatal
|
||||
info.score.apply_peer_action(PeerAction::Fatal);
|
||||
if info.connection_status.is_connected_or_dialing() {
|
||||
self.events
|
||||
.push(PeerManagerEvent::DisconnectPeer(peer_id.clone(), reason));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reports a peer for some action.
|
||||
///
|
||||
/// If the peer doesn't exist, log a warning and insert defaults.
|
||||
pub fn report_peer(&mut self, peer_id: &PeerId, action: PeerAction) {
|
||||
// TODO: Remove duplicate code - This is duplicated in the update_peer_scores()
|
||||
// function.
|
||||
|
||||
// Variables to update the PeerDb if required.
|
||||
let mut ban_peer = None;
|
||||
let mut unban_peer = None;
|
||||
|
||||
if let Some(info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
|
||||
let previous_state = info.score.state();
|
||||
info.score.apply_peer_action(action);
|
||||
if previous_state != info.score.state() {
|
||||
match info.score.state() {
|
||||
ScoreState::Ban => {
|
||||
debug!(self.log, "Peer has been banned"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string());
|
||||
ban_peer = Some(peer_id.clone());
|
||||
if info.connection_status.is_connected_or_dialing() {
|
||||
self.events.push(PeerManagerEvent::DisconnectPeer(
|
||||
peer_id.clone(),
|
||||
GoodbyeReason::BadScore,
|
||||
));
|
||||
}
|
||||
}
|
||||
ScoreState::Disconnect => {
|
||||
debug!(self.log, "Peer transitioned to disconnect state"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string(), "past_state" => previous_state.to_string());
|
||||
// disconnect the peer if it's currently connected or dialing
|
||||
unban_peer = Some(peer_id.clone());
|
||||
if info.connection_status.is_connected_or_dialing() {
|
||||
self.events.push(PeerManagerEvent::DisconnectPeer(
|
||||
peer_id.clone(),
|
||||
GoodbyeReason::BadScore,
|
||||
));
|
||||
}
|
||||
// TODO: Update the peer manager to inform that the peer is disconnecting.
|
||||
}
|
||||
ScoreState::Healthy => {
|
||||
debug!(self.log, "Peer transitioned to healthy state"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string(), "past_state" => previous_state.to_string());
|
||||
// unban the peer if it was previously banned.
|
||||
unban_peer = Some(peer_id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update the PeerDB state.
|
||||
if let Some(peer_id) = ban_peer.take() {
|
||||
self.network_globals.peers.write().ban(&peer_id);
|
||||
} else {
|
||||
if let Some(peer_id) = unban_peer.take() {
|
||||
self.network_globals.peers.write().unban(&peer_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Discovery Requests */
|
||||
|
||||
/// Provides a reference to the underlying discovery service.
|
||||
@@ -178,9 +222,12 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
self.status_peers.insert(peer_id.clone());
|
||||
}
|
||||
|
||||
/* Notifications from the Swarm */
|
||||
|
||||
/// Updates the state of the peer as disconnected.
|
||||
///
|
||||
/// This is also called when dialing a peer fails.
|
||||
pub fn notify_disconnect(&mut self, peer_id: &PeerId) {
|
||||
//self.update_reputations();
|
||||
self.network_globals.peers.write().disconnect(peer_id);
|
||||
|
||||
// remove the ping and status timer for the peer
|
||||
@@ -193,6 +240,17 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
);
|
||||
}
|
||||
|
||||
/// A dial attempt has failed.
|
||||
///
|
||||
/// NOTE: It can be the case that we are dialing a peer and during the dialing process the peer
|
||||
/// connects and the dial attempt later fails. To handle this, we only update the peer_db if
|
||||
/// the peer is not already connected.
|
||||
pub fn notify_dial_failure(&mut self, peer_id: &PeerId) {
|
||||
if !self.network_globals.peers.read().is_connected(peer_id) {
|
||||
self.notify_disconnect(peer_id);
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets a peer as connected as long as their reputation allows it
|
||||
/// Informs if the peer was accepted
|
||||
pub fn connect_ingoing(&mut self, peer_id: &PeerId) -> bool {
|
||||
@@ -205,28 +263,17 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
self.connect_peer(peer_id, ConnectingType::OutgoingConnected)
|
||||
}
|
||||
|
||||
/// Updates the database informing that a peer is being dialed.
|
||||
pub fn dialing_peer(&mut self, peer_id: &PeerId) -> bool {
|
||||
self.connect_peer(peer_id, ConnectingType::Dialing)
|
||||
}
|
||||
|
||||
/// Updates the database informing that a peer is being disconnected.
|
||||
pub fn _disconnecting_peer(&mut self, _peer_id: &PeerId) -> bool {
|
||||
// TODO: implement
|
||||
true
|
||||
}
|
||||
|
||||
/// Reports a peer for some action.
|
||||
/// Reports if a peer is banned or not.
|
||||
///
|
||||
/// If the peer doesn't exist, log a warning and insert defaults.
|
||||
pub fn report_peer(&mut self, peer_id: &PeerId, action: PeerAction) {
|
||||
//TODO: Check these. There are double disconnects for example
|
||||
// self.update_reputations();
|
||||
self.network_globals
|
||||
.peers
|
||||
.write()
|
||||
.add_reputation(peer_id, action.rep_change());
|
||||
// self.update_reputations();
|
||||
/// This is used to determine if we should accept incoming connections.
|
||||
pub fn is_banned(&self, peer_id: &PeerId) -> bool {
|
||||
self.network_globals.peers.read().is_banned(peer_id)
|
||||
}
|
||||
|
||||
/// Updates `PeerInfo` with `identify` information.
|
||||
@@ -239,9 +286,12 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
}
|
||||
}
|
||||
|
||||
/// An error has occured in the RPC.
|
||||
///
|
||||
/// This adjusts a peer's score based on the error.
|
||||
pub fn handle_rpc_error(&mut self, peer_id: &PeerId, protocol: Protocol, err: &RPCError) {
|
||||
let client = self.network_globals.client(peer_id);
|
||||
debug!(self.log, "RPCError"; "protocol" => protocol.to_string(), "err" => err.to_string(), "client" => client.to_string());
|
||||
warn!(self.log, "RPC Error"; "protocol" => protocol.to_string(), "err" => err.to_string(), "client" => client.to_string());
|
||||
|
||||
// Map this error to a `PeerAction` (if any)
|
||||
let peer_action = match err {
|
||||
@@ -423,6 +473,8 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
/// multiaddr here, however this could relate to duplicate PeerId's etc. If the lookup
|
||||
/// proves resource constraining, we should switch to multiaddr dialling here.
|
||||
fn peers_discovered(&mut self, peers: &[Enr], min_ttl: Option<Instant>) {
|
||||
let mut to_dial_peers = Vec::new();
|
||||
|
||||
for enr in peers {
|
||||
let peer_id = enr.peer_id();
|
||||
|
||||
@@ -433,9 +485,8 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
.peers
|
||||
.read()
|
||||
.is_connected_or_dialing(&peer_id)
|
||||
&& !self.network_globals.peers.read().peer_banned(&peer_id)
|
||||
&& !self.network_globals.peers.read().is_banned(&peer_id)
|
||||
{
|
||||
debug!(self.log, "Dialing discovered peer"; "peer_id"=> peer_id.to_string());
|
||||
// TODO: Update output
|
||||
// This should be updated with the peer dialing. In fact created once the peer is
|
||||
// dialed
|
||||
@@ -445,9 +496,13 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
.write()
|
||||
.update_min_ttl(&peer_id, min_ttl);
|
||||
}
|
||||
self.events.push(PeerManagerEvent::Dial(peer_id));
|
||||
to_dial_peers.push(peer_id);
|
||||
}
|
||||
}
|
||||
for peer_id in to_dial_peers {
|
||||
debug!(self.log, "Dialing discovered peer"; "peer_id"=> peer_id.to_string());
|
||||
self.dial_peer(&peer_id);
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers a peer as connected. The `ingoing` parameter determines if the peer is being
|
||||
@@ -465,9 +520,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
let mut peerdb = self.network_globals.peers.write();
|
||||
if peerdb.connection_status(peer_id).map(|c| c.is_banned()) == Some(true) {
|
||||
// don't connect if the peer is banned
|
||||
// TODO: Handle this case. If peer is banned this shouldn't be reached. It will put
|
||||
// our connection/disconnection out of sync with libp2p
|
||||
// return false;
|
||||
slog::crit!(self.log, "Connection has been allowed to a banned peer"; "peer_id" => peer_id.to_string());
|
||||
}
|
||||
|
||||
match connection {
|
||||
@@ -491,36 +544,22 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
true
|
||||
}
|
||||
|
||||
/// Notifies the peer manager that this peer is being dialed.
|
||||
pub fn _dialing_peer(&mut self, peer_id: &PeerId) {
|
||||
self.network_globals.peers.write().dialing_peer(peer_id);
|
||||
}
|
||||
|
||||
/// Updates the reputation of known peers according to their connection
|
||||
/// Updates the scores of known peers according to their connection
|
||||
/// status and the time that has passed.
|
||||
///
|
||||
/// **Disconnected peers** get a 1rep hit every hour they stay disconnected.
|
||||
/// **Banned peers** get a 1rep gain for every hour to slowly allow them back again.
|
||||
///
|
||||
/// A banned(disconnected) peer that gets its rep above(below) MIN_REP_BEFORE_BAN is
|
||||
/// now considered a disconnected(banned) peer.
|
||||
// TODO: Implement when reputation is added.
|
||||
fn _update_reputations(&mut self) {
|
||||
/*
|
||||
// avoid locking the peerdb too often
|
||||
// TODO: call this on a timer
|
||||
|
||||
let now = Instant::now();
|
||||
|
||||
// Check for peers that get banned, unbanned and that should be disconnected
|
||||
let mut ban_queue = Vec::new();
|
||||
let mut unban_queue = Vec::new();
|
||||
|
||||
/// NOTE: This is experimental and will likely be adjusted
|
||||
fn update_peer_scores(&mut self) {
|
||||
/* Check how long have peers been in this state and update their reputations if needed */
|
||||
let mut pdb = self.network_globals.peers.write();
|
||||
|
||||
for (id, info) in pdb._peers_mut() {
|
||||
// Update reputations
|
||||
let mut to_ban_peers = Vec::new();
|
||||
let mut to_unban_peers = Vec::new();
|
||||
|
||||
for (peer_id, info) in pdb.peers_mut() {
|
||||
let previous_state = info.score.state();
|
||||
// Update scores
|
||||
info.score.update();
|
||||
|
||||
/* TODO: Implement logic about connection lifetimes
|
||||
match info.connection_status {
|
||||
Connected { .. } => {
|
||||
// Connected peers gain reputation by sending useful messages
|
||||
@@ -570,21 +609,49 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
// This peer gets unbanned
|
||||
unban_queue.push(id.clone());
|
||||
}
|
||||
*/
|
||||
|
||||
// handle score transitions
|
||||
if previous_state != info.score.state() {
|
||||
match info.score.state() {
|
||||
ScoreState::Ban => {
|
||||
debug!(self.log, "Peer has been banned"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string());
|
||||
to_ban_peers.push(peer_id.clone());
|
||||
if info.connection_status.is_connected_or_dialing() {
|
||||
self.events.push(PeerManagerEvent::DisconnectPeer(
|
||||
peer_id.clone(),
|
||||
GoodbyeReason::BadScore,
|
||||
));
|
||||
}
|
||||
}
|
||||
ScoreState::Disconnect => {
|
||||
debug!(self.log, "Peer transitioned to disconnect state"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string(), "past_state" => previous_state.to_string());
|
||||
// disconnect the peer if it's currently connected or dialing
|
||||
to_unban_peers.push(peer_id.clone());
|
||||
if info.connection_status.is_connected_or_dialing() {
|
||||
self.events.push(PeerManagerEvent::DisconnectPeer(
|
||||
peer_id.clone(),
|
||||
GoodbyeReason::BadScore,
|
||||
));
|
||||
}
|
||||
// TODO: Update peer manager to report that it's disconnecting.
|
||||
}
|
||||
ScoreState::Healthy => {
|
||||
debug!(self.log, "Peer transitioned to healthy state"; "peer_id" => peer_id.to_string(), "score" => info.score.to_string(), "past_state" => previous_state.to_string());
|
||||
// unban the peer if it was previously banned.
|
||||
to_unban_peers.push(peer_id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for id in ban_queue {
|
||||
pdb.ban(&id);
|
||||
|
||||
self.events
|
||||
.push(PeerManagerEvent::DisconnectPeer(id.clone()));
|
||||
// process banning peers
|
||||
for peer_id in to_ban_peers {
|
||||
pdb.ban(&peer_id);
|
||||
}
|
||||
|
||||
for id in unban_queue {
|
||||
pdb.disconnect(&id);
|
||||
// process unbanning peers
|
||||
for peer_id in to_unban_peers {
|
||||
pdb.unban(&peer_id);
|
||||
}
|
||||
|
||||
self._last_updated = Instant::now();
|
||||
*/
|
||||
}
|
||||
|
||||
/// The Peer manager's heartbeat maintains the peer count and maintains peer reputations.
|
||||
@@ -605,7 +672,8 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
// TODO: If we have too many peers, remove peers that are not required for subnet
|
||||
// validation.
|
||||
|
||||
// TODO: Perform peer reputation maintenance here
|
||||
// Updates peer's scores.
|
||||
self.update_peer_scores();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -636,7 +704,7 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
|
||||
self.events.push(PeerManagerEvent::Ping(peer_id));
|
||||
}
|
||||
Poll::Ready(Some(Err(e))) => {
|
||||
error!(self.log, "Failed to check for peers to ping"; "error" => format!("{}",e))
|
||||
error!(self.log, "Failed to check for peers to ping"; "error" => e.to_string())
|
||||
}
|
||||
Poll::Ready(None) | Poll::Pending => break,
|
||||
}
|
||||
@@ -649,7 +717,7 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
|
||||
self.events.push(PeerManagerEvent::Status(peer_id))
|
||||
}
|
||||
Poll::Ready(Some(Err(e))) => {
|
||||
error!(self.log, "Failed to check for peers to ping"; "error" => format!("{}",e))
|
||||
error!(self.log, "Failed to check for peers to ping"; "error" => e.to_string())
|
||||
}
|
||||
Poll::Ready(None) | Poll::Pending => break,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user