Activate peer scoring (#1284)

* Initial score structure

* Peer manager update

* Updates to dialing

* Correct tests

* Correct typos and remove unused function

* Integrate scoring into the network crate

* Clean warnings

* Formatting

* Shift core functionality into the behaviour

* Temp commit

* Shift disconnections into the behaviour

* Temp commit

* Update libp2p and gossipsub

* Remove gossipsub lru cache

* Correct merge conflicts

* Modify handler and correct tests

* Update enr network globals on socket update

* Apply clippy lints

* Add new prysm fingerprint

* More clippy fixes
This commit is contained in:
Age Manning
2020-07-07 10:13:16 +10:00
committed by GitHub
parent 5977c00edb
commit 5bc8fea2e0
26 changed files with 1339 additions and 934 deletions

View File

@@ -40,9 +40,9 @@ use super::range_sync::{BatchId, ChainId, RangeSync, EPOCHS_PER_BATCH};
use super::RequestId;
use crate::service::NetworkMessage;
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError};
use eth2_libp2p::rpc::{methods::MAX_REQUEST_BLOCKS, BlocksByRootRequest};
use eth2_libp2p::rpc::{methods::MAX_REQUEST_BLOCKS, BlocksByRootRequest, GoodbyeReason};
use eth2_libp2p::types::NetworkGlobals;
use eth2_libp2p::PeerId;
use eth2_libp2p::{PeerAction, PeerId};
use fnv::FnvHashMap;
use slog::{crit, debug, error, info, trace, warn, Logger};
use smallvec::SmallVec;
@@ -347,10 +347,13 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// stream termination for a single block lookup, remove the key
if let Some(single_block_request) = self.single_block_lookups.remove(&request_id) {
// the peer didn't respond with a block that it referenced
// The peer didn't respond with a block that it referenced.
// This can be allowed as some clients may implement pruning. We mildly
// tolerate this behaviour.
if !single_block_request.block_returned {
warn!(self.log, "Peer didn't respond with a block it referenced"; "referenced_block_hash" => format!("{}", single_block_request.hash), "peer_id" => format!("{}", peer_id));
self.network.downvote_peer(peer_id);
self.network
.report_peer(peer_id, PeerAction::MidToleranceError);
}
return;
}
@@ -389,9 +392,10 @@ impl<T: BeaconChainTypes> SyncManager<T> {
) {
// verify the hash is correct and try and process the block
if expected_block_hash != block.canonical_root() {
// the peer that sent this, sent us the wrong block
// The peer that sent this, sent us the wrong block.
// We do not tolerate this behaviour. The peer is instantly disconnected and banned.
warn!(self.log, "Peer sent incorrect block for single block lookup"; "peer_id" => format!("{}", peer_id));
self.network.downvote_peer(peer_id);
self.network.goodbye_peer(peer_id, GoodbyeReason::Fault);
return;
}
@@ -426,7 +430,10 @@ impl<T: BeaconChainTypes> SyncManager<T> {
}
outcome => {
warn!(self.log, "Single block lookup failed"; "outcome" => format!("{:?}", outcome));
self.network.downvote_peer(peer_id);
// This could be a range of errors. But we couldn't process the block.
// For now we consider this a mid tolerance error.
self.network
.report_peer(peer_id, PeerAction::MidToleranceError);
}
}
}
@@ -624,8 +631,12 @@ impl<T: BeaconChainTypes> SyncManager<T> {
"expected_parent" => format!("{}", expected_hash),
);
// We try again, but downvote the peer.
self.request_parent(parent_request);
self.network.downvote_peer(peer);
// We do not tolerate these kinds of errors. We will accept a few but these are signs
// of a faulty peer.
self.network
.report_peer(peer, PeerAction::LowToleranceError);
} else {
// The last block in the queue is the only one that has not attempted to be processed yet.
//
@@ -662,12 +673,18 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// all else we consider the chain a failure and downvote the peer that sent
// us the last block
warn!(
self.log, "Invalid parent chain. Downvoting peer";
self.log, "Invalid parent chain";
"score_adjustment" => PeerAction::MidToleranceError.to_string(),
"outcome" => format!("{:?}", outcome),
"last_peer" => format!("{:?}", parent_request.last_submitted_peer),
"last_peer" => parent_request.last_submitted_peer.to_string(),
);
// This currently can be a host of errors. We permit this due to the partial
// ambiguity.
// TODO: Refine the error types and score the peer appropriately.
self.network.report_peer(
parent_request.last_submitted_peer,
PeerAction::MidToleranceError,
);
self.network
.downvote_peer(parent_request.last_submitted_peer);
return;
}
}
@@ -774,7 +791,13 @@ impl<T: BeaconChainTypes> SyncManager<T> {
);
}
SyncMessage::ParentLookupFailed(peer_id) => {
self.network.downvote_peer(peer_id);
// A peer sent an object (block or attestation) that referenced a parent.
// On request for this parent the peer indicated it did not have this
// block.
// This is not fatal. Peer's could prune old blocks so we moderately
// tolerate this behaviour.
self.network
.report_peer(peer_id, PeerAction::MidToleranceError);
}
}
}

View File

@@ -5,7 +5,7 @@ use crate::router::processor::status_message;
use crate::service::NetworkMessage;
use beacon_chain::{BeaconChain, BeaconChainTypes};
use eth2_libp2p::rpc::{BlocksByRangeRequest, BlocksByRootRequest, GoodbyeReason, RequestId};
use eth2_libp2p::{Client, NetworkGlobals, PeerId, Request};
use eth2_libp2p::{Client, NetworkGlobals, PeerAction, PeerId, Request};
use slog::{debug, trace, warn};
use std::sync::Arc;
use tokio::sync::mpsc;
@@ -101,37 +101,20 @@ impl<T: EthSpec> SyncNetworkContext<T> {
self.send_rpc_request(peer_id, Request::BlocksByRoot(request))
}
pub fn downvote_peer(&mut self, peer_id: PeerId) {
debug!(
self.log,
"Peer downvoted";
"peer" => format!("{:?}", peer_id)
);
// TODO: Implement reputation
// TODO: what if we first close the channel sending a response
// RPCResponseErrorCode::InvalidRequest (or something)
// and then disconnect the peer? either request dc or let the behaviour have that logic
// itself
self.disconnect(peer_id, GoodbyeReason::Fault);
pub fn goodbye_peer(&mut self, peer_id: PeerId, reason: GoodbyeReason) {
self.network_send
.send(NetworkMessage::GoodbyePeer { peer_id, reason })
.unwrap_or_else(|_| {
warn!(self.log, "Could not report peer, channel failed");
});
}
fn disconnect(&mut self, peer_id: PeerId, reason: GoodbyeReason) {
warn!(
&self.log,
"Disconnecting peer (RPC)";
"reason" => format!("{:?}", reason),
"peer_id" => format!("{:?}", peer_id),
);
// ignore the error if the channel send fails
let _ = self.send_rpc_request(peer_id.clone(), Request::Goodbye(reason));
pub fn report_peer(&mut self, peer_id: PeerId, action: PeerAction) {
debug!(self.log, "Sync reporting peer"; "peer_id" => peer_id.to_string(), "action"=> action.to_string());
self.network_send
.send(NetworkMessage::Disconnect { peer_id })
.send(NetworkMessage::ReportPeer { peer_id, action })
.unwrap_or_else(|_| {
warn!(
self.log,
"Could not send a Disconnect to the network service"
)
warn!(self.log, "Could not report peer, channel failed");
});
}

View File

@@ -3,7 +3,7 @@ use crate::sync::block_processor::{spawn_block_processor, BatchProcessResult, Pr
use crate::sync::network_context::SyncNetworkContext;
use crate::sync::{RequestId, SyncMessage};
use beacon_chain::{BeaconChain, BeaconChainTypes};
use eth2_libp2p::PeerId;
use eth2_libp2p::{PeerAction, PeerId};
use rand::prelude::*;
use slog::{crit, debug, warn};
use std::collections::HashSet;
@@ -14,7 +14,7 @@ use types::{Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot};
/// Blocks are downloaded in batches from peers. This constant specifies how many epochs worth of
/// blocks per batch are requested _at most_. A batch may request less blocks to account for
/// already requested slots. There is a timeout for each batch request. If this value is too high,
/// we will downvote peers with poor bandwidth. This can be set arbitrarily high, in which case the
/// we will negatively report peers with poor bandwidth. This can be set arbitrarily high, in which case the
/// responder will fill the response up to the max request size, assuming they have the bandwidth
/// to do so.
pub const EPOCHS_PER_BATCH: u64 = 2;
@@ -27,7 +27,7 @@ const BATCH_BUFFER_SIZE: u8 = 5;
/// Invalid batches are attempted to be re-downloaded from other peers. If they cannot be processed
/// after `INVALID_BATCH_LOOKUP_ATTEMPTS` times, the chain is considered faulty and all peers will
/// be downvoted.
/// be reported negatively.
const INVALID_BATCH_LOOKUP_ATTEMPTS: u8 = 3;
#[derive(PartialEq)]
@@ -192,7 +192,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
warn!(self.log, "BlocksByRange response returned out of range blocks";
"response_initial_slot" => first_slot,
"requested_initial_slot" => batch.start_slot);
network.downvote_peer(batch.current_peer);
// This is a pretty bad error. We don't consider this fatal, but we don't tolerate
// this much either.
network.report_peer(batch.current_peer, PeerAction::LowToleranceError);
self.to_be_processed_id = batch.id; // reset the id back to here, when incrementing, it will check against completed batches
return;
}
@@ -363,14 +365,18 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// check that we have not exceeded the re-process retry counter
if batch.reprocess_retries > INVALID_BATCH_LOOKUP_ATTEMPTS {
// if a batch has exceeded the invalid batch lookup attempts limit, it means
// If a batch has exceeded the invalid batch lookup attempts limit, it means
// that it is likely all peers in this chain are are sending invalid batches
// repeatedly and are either malicious or faulty. We drop the chain and
// downvote all peers.
warn!(self.log, "Batch failed to download. Dropping chain and downvoting peers";
// report all peers.
// There are some edge cases with forks that could land us in this situation.
// This should be unlikely, so we tolerate these errors, but not often.
let action = PeerAction::LowToleranceError;
warn!(self.log, "Batch failed to download. Dropping chain scoring peers";
"score_adjustment" => action.to_string(),
"chain_id" => self.id, "id"=> *batch.id);
for peer_id in self.peer_pool.drain() {
network.downvote_peer(peer_id);
network.report_peer(peer_id, action);
}
ProcessingResult::RemoveChain
} else {
@@ -389,14 +395,16 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// check that we have not exceeded the re-process retry counter
if batch.reprocess_retries > INVALID_BATCH_LOOKUP_ATTEMPTS {
// if a batch has exceeded the invalid batch lookup attempts limit, it means
// If a batch has exceeded the invalid batch lookup attempts limit, it means
// that it is likely all peers in this chain are are sending invalid batches
// repeatedly and are either malicious or faulty. We drop the chain and
// downvote all peers.
warn!(self.log, "Batch failed to download. Dropping chain and downvoting peers";
let action = PeerAction::LowToleranceError;
warn!(self.log, "Batch failed to download. Dropping chain scoring peers";
"score_adjustment" => action.to_string(),
"chain_id" => self.id, "id"=> *batch.id);
for peer_id in self.peer_pool.drain() {
network.downvote_peer(peer_id);
network.report_peer(peer_id, action);
}
ProcessingResult::RemoveChain
} else {
@@ -437,18 +445,30 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// The re-downloaded version was different
if processed_batch.current_peer != processed_batch.original_peer {
// A new peer sent the correct batch, the previous peer did not
// downvote the original peer
//
// If the same peer corrected it's mistake, we allow it.... for
// now.
// We negatively score the original peer.
let action = PeerAction::LowToleranceError;
debug!(
self.log, "Re-processed batch validated. Downvoting original peer";
self.log, "Re-processed batch validated. Scoring original peer";
"chain_id" => self.id,
"batch_id" => *processed_batch.id,
"score_adjustment" => action.to_string(),
"original_peer" => format!("{}",processed_batch.original_peer),
"new_peer" => format!("{}", processed_batch.current_peer)
);
network.downvote_peer(processed_batch.original_peer);
network.report_peer(processed_batch.original_peer, action);
} else {
// The same peer corrected it's previous mistake. There was an error, so we
// negative score the original peer.
let action = PeerAction::MidToleranceError;
debug!(
self.log, "Re-processed batch validated by the same peer.";
"chain_id" => self.id,
"batch_id" => *processed_batch.id,
"score_adjustment" => action.to_string(),
"original_peer" => format!("{}",processed_batch.original_peer),
"new_peer" => format!("{}", processed_batch.current_peer)
);
network.report_peer(processed_batch.original_peer, action);
}
}
}