mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-15 02:42:38 +00:00
Remove sync await points (#3036)
## Issue Addressed Removes the await points in sync waiting for a processor response for rpc block processing. Built on top of #3029 This also handles a couple of bugs in the previous code and adds a relatively comprehensive test suite.
This commit is contained in:
@@ -34,29 +34,25 @@
|
||||
//! search for the block and subsequently search for parents if needed.
|
||||
|
||||
use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart};
|
||||
use super::block_lookups::BlockLookups;
|
||||
use super::network_context::SyncNetworkContext;
|
||||
use super::peer_sync_info::{remote_sync_type, PeerSyncType};
|
||||
use super::range_sync::{ChainId, RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
|
||||
use crate::beacon_processor::{ProcessId, WorkEvent as BeaconWorkEvent};
|
||||
use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
|
||||
use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent as BeaconWorkEvent};
|
||||
use crate::service::NetworkMessage;
|
||||
use crate::status::ToStatusMessage;
|
||||
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError};
|
||||
use fnv::FnvHashMap;
|
||||
use lighthouse_network::rpc::{methods::MAX_REQUEST_BLOCKS, BlocksByRootRequest, GoodbyeReason};
|
||||
use lighthouse_network::rpc::methods::MAX_REQUEST_BLOCKS;
|
||||
use lighthouse_network::types::{NetworkGlobals, SyncState};
|
||||
use lighthouse_network::SyncInfo;
|
||||
use lighthouse_network::{PeerAction, PeerId};
|
||||
use lru_cache::LRUCache;
|
||||
use slog::{crit, debug, error, info, trace, warn, Logger};
|
||||
use smallvec::SmallVec;
|
||||
use ssz_types::VariableList;
|
||||
use slog::{crit, debug, error, info, trace, Logger};
|
||||
use std::boxed::Box;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::ops::Sub;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::mpsc;
|
||||
use types::{Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot};
|
||||
use types::{EthSpec, Hash256, SignedBeaconBlock, Slot};
|
||||
|
||||
/// The number of slots ahead of us that is allowed before requesting a long-range (batch) Sync
|
||||
/// from a peer. If a peer is within this tolerance (forwards or backwards), it is treated as a
|
||||
@@ -66,12 +62,6 @@ use types::{Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot};
|
||||
/// gossip if no peers are further than this range ahead of us that we have not already downloaded
|
||||
/// blocks for.
|
||||
pub const SLOT_IMPORT_TOLERANCE: usize = 32;
|
||||
/// How many attempts we try to find a parent of a block before we give up trying .
|
||||
const PARENT_FAIL_TOLERANCE: usize = 5;
|
||||
/// The maximum depth we will search for a parent block. In principle we should have sync'd any
|
||||
/// canonical chain to its head once the peer connects. A chain should not appear where it's depth
|
||||
/// is further back than the most recent head slot.
|
||||
const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2;
|
||||
|
||||
pub type Id = u32;
|
||||
|
||||
@@ -120,26 +110,22 @@ pub enum SyncMessage<T: EthSpec> {
|
||||
|
||||
/// A batch has been processed by the block processor thread.
|
||||
BatchProcessed {
|
||||
sync_type: SyncRequestType,
|
||||
sync_type: ChainSegmentProcessId,
|
||||
result: BatchProcessResult,
|
||||
},
|
||||
|
||||
/// A parent lookup has failed.
|
||||
ParentLookupFailed {
|
||||
/// The head of the chain of blocks that failed to process.
|
||||
chain_head: Hash256,
|
||||
/// The peer that instigated the chain lookup.
|
||||
peer_id: PeerId,
|
||||
/// Block processed
|
||||
BlockProcessed {
|
||||
process_type: BlockProcessType,
|
||||
result: Result<(), BlockError<T>>,
|
||||
},
|
||||
}
|
||||
|
||||
/// The type of sync request made
|
||||
/// The type of processing specified for a received block.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SyncRequestType {
|
||||
/// Request was from the backfill sync algorithm.
|
||||
BackFillSync(Epoch),
|
||||
/// The request was from a chain in the range sync algorithm.
|
||||
RangeSync(Epoch, ChainId),
|
||||
pub enum BlockProcessType {
|
||||
SingleBlock { id: Id },
|
||||
ParentLookup { chain_hash: Hash256 },
|
||||
}
|
||||
|
||||
/// The result of processing multiple blocks (a chain segment).
|
||||
@@ -154,23 +140,6 @@ pub enum BatchProcessResult {
|
||||
},
|
||||
}
|
||||
|
||||
/// Maintains a sequential list of parents to lookup and the lookup's current state.
|
||||
struct ParentRequests<T: EthSpec> {
|
||||
/// The blocks that have currently been downloaded.
|
||||
downloaded_blocks: Vec<SignedBeaconBlock<T>>,
|
||||
|
||||
/// The number of failed attempts to retrieve a parent block. If too many attempts occur, this
|
||||
/// lookup is failed and rejected.
|
||||
failed_attempts: usize,
|
||||
|
||||
/// The peer who last submitted a block. If the chain ends or fails, this is the peer that is
|
||||
/// penalized.
|
||||
last_submitted_peer: PeerId,
|
||||
|
||||
/// The request ID of this lookup is in progress.
|
||||
pending: Option<Id>,
|
||||
}
|
||||
|
||||
/// The primary object for handling and driving all the current syncing logic. It maintains the
|
||||
/// current state of the syncing process, the number of useful peers, downloaded blocks and
|
||||
/// controls the logic behind both the long-range (batch) sync and the on-going potential parent
|
||||
@@ -194,45 +163,12 @@ pub struct SyncManager<T: BeaconChainTypes> {
|
||||
/// Backfill syncing.
|
||||
backfill_sync: BackFillSync<T>,
|
||||
|
||||
/// A collection of parent block lookups.
|
||||
parent_queue: SmallVec<[ParentRequests<T::EthSpec>; 3]>,
|
||||
|
||||
/// A cache of failed chain lookups to prevent duplicate searches.
|
||||
failed_chains: LRUCache<Hash256>,
|
||||
|
||||
/// A collection of block hashes being searched for and a flag indicating if a result has been
|
||||
/// received or not.
|
||||
///
|
||||
/// The flag allows us to determine if the peer returned data or sent us nothing.
|
||||
single_block_lookups: FnvHashMap<Id, SingleBlockRequest>,
|
||||
|
||||
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
|
||||
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T>>,
|
||||
|
||||
/// Used for spawning tasks.
|
||||
executor: task_executor::TaskExecutor,
|
||||
block_lookups: BlockLookups<T>,
|
||||
|
||||
/// The logger for the import manager.
|
||||
log: Logger,
|
||||
}
|
||||
|
||||
/// Object representing a single block lookup request.
|
||||
struct SingleBlockRequest {
|
||||
/// The hash of the requested block.
|
||||
pub hash: Hash256,
|
||||
/// Whether a block was received from this request, or the peer returned an empty response.
|
||||
pub block_returned: bool,
|
||||
}
|
||||
|
||||
impl SingleBlockRequest {
|
||||
pub fn new(hash: Hash256) -> Self {
|
||||
Self {
|
||||
hash,
|
||||
block_returned: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawns a new `SyncManager` thread which has a weak reference to underlying beacon
|
||||
/// chain. This allows the chain to be
|
||||
/// dropped during the syncing process which will gracefully end the `SyncManager`.
|
||||
@@ -253,26 +189,22 @@ pub fn spawn<T: BeaconChainTypes>(
|
||||
|
||||
// create an instance of the SyncManager
|
||||
let mut sync_manager = SyncManager {
|
||||
chain: beacon_chain.clone(),
|
||||
network_globals: network_globals.clone(),
|
||||
input_channel: sync_recv,
|
||||
network: SyncNetworkContext::new(network_send, network_globals.clone(), log.clone()),
|
||||
range_sync: RangeSync::new(
|
||||
beacon_chain.clone(),
|
||||
beacon_processor_send.clone(),
|
||||
log.clone(),
|
||||
),
|
||||
backfill_sync: BackFillSync::new(
|
||||
beacon_chain.clone(),
|
||||
network_globals.clone(),
|
||||
beacon_chain,
|
||||
network_globals,
|
||||
beacon_processor_send.clone(),
|
||||
log.clone(),
|
||||
),
|
||||
network: SyncNetworkContext::new(network_send, network_globals.clone(), log.clone()),
|
||||
chain: beacon_chain,
|
||||
network_globals,
|
||||
input_channel: sync_recv,
|
||||
parent_queue: SmallVec::new(),
|
||||
failed_chains: LRUCache::new(500),
|
||||
single_block_lookups: FnvHashMap::default(),
|
||||
beacon_processor_send,
|
||||
executor: executor.clone(),
|
||||
block_lookups: BlockLookups::new(beacon_processor_send, log.clone()),
|
||||
log: log.clone(),
|
||||
};
|
||||
|
||||
@@ -322,321 +254,17 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
self.update_sync_state();
|
||||
}
|
||||
|
||||
/// The response to a `BlocksByRoot` request.
|
||||
/// The current implementation takes one block at a time. As blocks are streamed, any
|
||||
/// subsequent blocks will simply be ignored.
|
||||
/// There are two reasons we could have received a BlocksByRoot response
|
||||
/// - We requested a single hash and have received a response for the single_block_lookup
|
||||
/// - We are looking up parent blocks in parent lookup search
|
||||
async fn parent_lookup_response(
|
||||
&mut self,
|
||||
peer_id: PeerId,
|
||||
request_id: Id,
|
||||
block: Option<SignedBeaconBlock<T::EthSpec>>,
|
||||
_seen_timestamp: Duration,
|
||||
) {
|
||||
let mut parent_request = if let Some(pos) = self
|
||||
.parent_queue
|
||||
.iter()
|
||||
.position(|request| request.pending == Some(request_id))
|
||||
{
|
||||
// we remove from the queue and process it. It will get re-added if required
|
||||
self.parent_queue.remove(pos)
|
||||
} else {
|
||||
if block.is_some() {
|
||||
debug!(self.log, "Response for a parent lookup request that was not found"; "peer_id" => %peer_id);
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
match block {
|
||||
Some(block) => {
|
||||
// data was returned, not just a stream termination
|
||||
|
||||
// check if the parent of this block isn't in our failed cache. If it is, this
|
||||
// chain should be dropped and the peer downscored.
|
||||
if self.failed_chains.contains(&block.message().parent_root()) {
|
||||
debug!(
|
||||
self.log,
|
||||
"Parent chain ignored due to past failure";
|
||||
"block" => ?block.message().parent_root(),
|
||||
"slot" => block.slot()
|
||||
);
|
||||
if !parent_request.downloaded_blocks.is_empty() {
|
||||
// Add the root block to failed chains
|
||||
self.failed_chains
|
||||
.insert(parent_request.downloaded_blocks[0].canonical_root());
|
||||
} else {
|
||||
crit!(self.log, "Parent chain has no blocks");
|
||||
}
|
||||
self.network.report_peer(
|
||||
peer_id,
|
||||
PeerAction::MidToleranceError,
|
||||
"bbroot_failed_chains",
|
||||
);
|
||||
return;
|
||||
}
|
||||
// add the block to response
|
||||
parent_request.downloaded_blocks.push(block);
|
||||
// queue for processing
|
||||
self.process_parent_request(parent_request).await;
|
||||
}
|
||||
None => {
|
||||
// An empty response has been returned to a parent request
|
||||
// if an empty response is given, the peer didn't have the requested block, try again
|
||||
parent_request.failed_attempts += 1;
|
||||
parent_request.last_submitted_peer = peer_id;
|
||||
self.request_parent(parent_request);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_block_async(
|
||||
&mut self,
|
||||
block: SignedBeaconBlock<T::EthSpec>,
|
||||
) -> Option<Result<Hash256, BlockError<T::EthSpec>>> {
|
||||
let (event, rx) = BeaconWorkEvent::rpc_beacon_block(Box::new(block));
|
||||
match self.beacon_processor_send.try_send(event) {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
error!(
|
||||
self.log,
|
||||
"Failed to send sync block to processor";
|
||||
"error" => ?e
|
||||
);
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
match rx.await {
|
||||
Ok(block_result) => Some(block_result),
|
||||
Err(_) => {
|
||||
warn!(
|
||||
self.log,
|
||||
"Sync block not processed";
|
||||
"msg" => "likely due to system resource exhaustion"
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Processes the response obtained from a single block lookup search. If the block is
|
||||
/// processed or errors, the search ends. If the blocks parent is unknown, a block parent
|
||||
/// lookup search is started.
|
||||
async fn single_block_lookup_response(
|
||||
&mut self,
|
||||
request_id: Id,
|
||||
peer_id: PeerId,
|
||||
block: Option<SignedBeaconBlock<T::EthSpec>>,
|
||||
seen_timestamp: Duration,
|
||||
) {
|
||||
if let Entry::Occupied(mut entry) = self.single_block_lookups.entry(request_id) {
|
||||
match block {
|
||||
None => {
|
||||
// Stream termination. Remove the lookup
|
||||
let (_, single_block_request) = entry.remove_entry();
|
||||
// The peer didn't respond with a block that it referenced.
|
||||
// This can be allowed as some clients may implement pruning. We mildly
|
||||
// tolerate this behaviour.
|
||||
if !single_block_request.block_returned {
|
||||
warn!(self.log, "Peer didn't respond with a block it referenced";
|
||||
"referenced_block_hash" => %single_block_request.hash, "peer_id" => %peer_id);
|
||||
self.network.report_peer(
|
||||
peer_id,
|
||||
PeerAction::MidToleranceError,
|
||||
"bbroot_no_block",
|
||||
);
|
||||
}
|
||||
}
|
||||
Some(block) => {
|
||||
// update the state of the lookup indicating a block was received from the peer
|
||||
entry.get_mut().block_returned = true;
|
||||
// verify the hash is correct and try and process the block
|
||||
if entry.get().hash != block.canonical_root() {
|
||||
// The peer that sent this, sent us the wrong block.
|
||||
// We do not tolerate this behaviour. The peer is instantly disconnected and banned.
|
||||
warn!(self.log, "Peer sent incorrect block for single block lookup"; "peer_id" => %peer_id);
|
||||
self.network.goodbye_peer(peer_id, GoodbyeReason::Fault);
|
||||
return;
|
||||
}
|
||||
|
||||
let block_result = match self.process_block_async(block.clone()).await {
|
||||
Some(block_result) => block_result,
|
||||
None => return,
|
||||
};
|
||||
|
||||
// we have the correct block, try and process it
|
||||
match block_result {
|
||||
Ok(block_root) => {
|
||||
// Block has been processed, so write the block time to the cache.
|
||||
self.chain.block_times_cache.write().set_time_observed(
|
||||
block_root,
|
||||
block.slot(),
|
||||
seen_timestamp,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
info!(self.log, "Processed block"; "block" => %block_root);
|
||||
|
||||
// Spawn `BeaconChain::fork_choice` in a blocking task. It's
|
||||
// potentially long-running and it might panic if run from an async
|
||||
// context.
|
||||
let chain = self.chain.clone();
|
||||
let log = self.log.clone();
|
||||
self.executor.spawn_blocking(
|
||||
move || match chain.fork_choice() {
|
||||
Ok(()) => trace!(
|
||||
log,
|
||||
"Fork choice success";
|
||||
"location" => "single block"
|
||||
),
|
||||
Err(e) => error!(
|
||||
log,
|
||||
"Fork choice failed";
|
||||
"error" => ?e,
|
||||
"location" => "single block"
|
||||
),
|
||||
},
|
||||
"sync_manager_fork_choice",
|
||||
);
|
||||
}
|
||||
Err(BlockError::ParentUnknown { .. }) => {
|
||||
// We don't know of the blocks parent, begin a parent lookup search
|
||||
self.add_unknown_block(peer_id, block);
|
||||
}
|
||||
Err(BlockError::BlockIsAlreadyKnown) => {
|
||||
trace!(self.log, "Single block lookup already known");
|
||||
}
|
||||
Err(BlockError::BeaconChainError(e)) => {
|
||||
warn!(self.log, "Unexpected block processing error"; "error" => ?e);
|
||||
}
|
||||
outcome => {
|
||||
warn!(self.log, "Single block lookup failed"; "outcome" => ?outcome);
|
||||
// This could be a range of errors. But we couldn't process the block.
|
||||
// For now we consider this a mid tolerance error.
|
||||
self.network.report_peer(
|
||||
peer_id,
|
||||
PeerAction::MidToleranceError,
|
||||
"single_block_lookup_failed",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A block has been sent to us that has an unknown parent. This begins a parent lookup search
|
||||
/// to find the parent or chain of parents that match our current chain.
|
||||
fn add_unknown_block(&mut self, peer_id: PeerId, block: SignedBeaconBlock<T::EthSpec>) {
|
||||
// If we are not synced or within SLOT_IMPORT_TOLERANCE of the block, ignore
|
||||
if !self.network_globals.sync_state.read().is_synced() {
|
||||
let head_slot = self
|
||||
.chain
|
||||
.head_info()
|
||||
.map(|info| info.slot)
|
||||
.unwrap_or_else(|_| Slot::from(0u64));
|
||||
let unknown_block_slot = block.slot();
|
||||
|
||||
// if the block is far in the future, ignore it. If its within the slot tolerance of
|
||||
// our current head, regardless of the syncing state, fetch it.
|
||||
if (head_slot >= unknown_block_slot
|
||||
&& head_slot.sub(unknown_block_slot).as_usize() > SLOT_IMPORT_TOLERANCE)
|
||||
|| (head_slot < unknown_block_slot
|
||||
&& unknown_block_slot.sub(head_slot).as_usize() > SLOT_IMPORT_TOLERANCE)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let block_root = block.canonical_root();
|
||||
// If this block or it's parent is part of a known failed chain, ignore it.
|
||||
if self.failed_chains.contains(&block.message().parent_root())
|
||||
|| self.failed_chains.contains(&block_root)
|
||||
{
|
||||
debug!(self.log, "Block is from a past failed chain. Dropping"; "block_root" => ?block_root, "block_slot" => block.slot());
|
||||
return;
|
||||
}
|
||||
|
||||
// Make sure this block is not already being searched for
|
||||
// NOTE: Potentially store a hashset of blocks for O(1) lookups
|
||||
for parent_req in self.parent_queue.iter() {
|
||||
if parent_req
|
||||
.downloaded_blocks
|
||||
.iter()
|
||||
.any(|d_block| d_block == &block)
|
||||
{
|
||||
// we are already searching for this block, ignore it
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
debug!(self.log, "Unknown block received. Starting a parent lookup"; "block_slot" => block.slot(), "block_hash" => %block.canonical_root());
|
||||
|
||||
let parent_request = ParentRequests {
|
||||
downloaded_blocks: vec![block],
|
||||
failed_attempts: 0,
|
||||
last_submitted_peer: peer_id,
|
||||
pending: None,
|
||||
};
|
||||
|
||||
self.request_parent(parent_request)
|
||||
}
|
||||
|
||||
/// A request to search for a block hash has been received. This function begins a BlocksByRoot
|
||||
/// request to find the requested block.
|
||||
fn search_for_block(&mut self, peer_id: PeerId, block_hash: Hash256) {
|
||||
// If we are not synced, ignore this block
|
||||
if !self.network_globals.sync_state.read().is_synced() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Do not re-request a block that is already being requested
|
||||
if self
|
||||
.single_block_lookups
|
||||
.values()
|
||||
.any(|single_block_request| single_block_request.hash == block_hash)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
debug!(
|
||||
self.log,
|
||||
"Searching for block";
|
||||
"peer_id" => %peer_id,
|
||||
"block" => %block_hash
|
||||
);
|
||||
|
||||
let request = BlocksByRootRequest {
|
||||
block_roots: VariableList::from(vec![block_hash]),
|
||||
};
|
||||
|
||||
if let Ok(request_id) = self.network.single_block_lookup_request(peer_id, request) {
|
||||
self.single_block_lookups
|
||||
.insert(request_id, SingleBlockRequest::new(block_hash));
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles RPC errors related to requests that were emitted from the sync manager.
|
||||
fn inject_error(&mut self, peer_id: PeerId, request_id: RequestId) {
|
||||
trace!(self.log, "Sync manager received a failed RPC");
|
||||
match request_id {
|
||||
RequestId::SingleBlock { id } => {
|
||||
self.single_block_lookups.remove(&id);
|
||||
self.block_lookups
|
||||
.single_block_lookup_failed(id, &mut self.network);
|
||||
}
|
||||
RequestId::ParentLookup { id } => {
|
||||
if let Some(pos) = self
|
||||
.parent_queue
|
||||
.iter()
|
||||
.position(|request| request.pending == Some(id))
|
||||
{
|
||||
// increment the failure of a parent lookup if the request matches a parent search
|
||||
let mut parent_request = self.parent_queue.remove(pos);
|
||||
parent_request.failed_attempts += 1;
|
||||
parent_request.last_submitted_peer = peer_id;
|
||||
self.request_parent(parent_request);
|
||||
}
|
||||
self.block_lookups
|
||||
.parent_lookup_failed(id, peer_id, &mut self.network);
|
||||
}
|
||||
RequestId::BackFillSync { id } => {
|
||||
if let Some(batch_id) = self.network.backfill_sync_response(id, true) {
|
||||
@@ -666,6 +294,8 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
|
||||
fn peer_disconnect(&mut self, peer_id: &PeerId) {
|
||||
self.range_sync.peer_disconnect(&mut self.network, peer_id);
|
||||
self.block_lookups
|
||||
.peer_disconnected(peer_id, &mut self.network);
|
||||
// Regardless of the outcome, we update the sync status.
|
||||
let _ = self
|
||||
.backfill_sync
|
||||
@@ -824,187 +454,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/* Processing State Functions */
|
||||
// These functions are called in the main poll function to transition the state of the sync
|
||||
// manager
|
||||
|
||||
/// A new block has been received for a parent lookup query, process it.
|
||||
async fn process_parent_request(&mut self, mut parent_request: ParentRequests<T::EthSpec>) {
|
||||
// verify the last added block is the parent of the last requested block
|
||||
|
||||
if parent_request.downloaded_blocks.len() < 2 {
|
||||
crit!(
|
||||
self.log,
|
||||
"There must be at least two blocks in a parent request lookup at all times"
|
||||
);
|
||||
panic!("There must be at least two blocks in parent request lookup at all times");
|
||||
// fail loudly
|
||||
}
|
||||
let previous_index = parent_request.downloaded_blocks.len() - 2;
|
||||
let expected_hash = parent_request.downloaded_blocks[previous_index].parent_root();
|
||||
|
||||
// Note: the length must be greater than 2 so this cannot panic.
|
||||
let block_hash = parent_request
|
||||
.downloaded_blocks
|
||||
.last()
|
||||
.expect("Complete batch cannot be empty")
|
||||
.canonical_root();
|
||||
if block_hash != expected_hash {
|
||||
// The sent block is not the correct block, remove the head block and downvote
|
||||
// the peer
|
||||
let _ = parent_request.downloaded_blocks.pop();
|
||||
let peer = parent_request.last_submitted_peer;
|
||||
|
||||
warn!(self.log, "Peer sent invalid parent.";
|
||||
"peer_id" => %peer,
|
||||
"received_block" => %block_hash,
|
||||
"expected_parent" => %expected_hash,
|
||||
);
|
||||
|
||||
// We try again, but downvote the peer.
|
||||
self.request_parent(parent_request);
|
||||
// We do not tolerate these kinds of errors. We will accept a few but these are signs
|
||||
// of a faulty peer.
|
||||
self.network.report_peer(
|
||||
peer,
|
||||
PeerAction::LowToleranceError,
|
||||
"parent_request_bad_hash",
|
||||
);
|
||||
} else {
|
||||
// The last block in the queue is the only one that has not attempted to be processed yet.
|
||||
//
|
||||
// The logic here attempts to process the last block. If it can be processed, the rest
|
||||
// of the blocks must have known parents. If any of them cannot be processed, we
|
||||
// consider the entire chain corrupt and drop it, notifying the user.
|
||||
//
|
||||
// If the last block in the queue cannot be processed, we also drop the entire queue.
|
||||
// If the last block in the queue has an unknown parent, we continue the parent
|
||||
// lookup-search.
|
||||
|
||||
let chain_block_hash = parent_request.downloaded_blocks[0].canonical_root();
|
||||
|
||||
let newest_block = parent_request
|
||||
.downloaded_blocks
|
||||
.pop()
|
||||
.expect("There is always at least one block in the queue");
|
||||
|
||||
let block_result = match self.process_block_async(newest_block.clone()).await {
|
||||
Some(block_result) => block_result,
|
||||
None => return,
|
||||
};
|
||||
|
||||
match block_result {
|
||||
Err(BlockError::ParentUnknown { .. }) => {
|
||||
// need to keep looking for parents
|
||||
// add the block back to the queue and continue the search
|
||||
parent_request.downloaded_blocks.push(newest_block);
|
||||
self.request_parent(parent_request);
|
||||
}
|
||||
Ok(_) | Err(BlockError::BlockIsAlreadyKnown { .. }) => {
|
||||
let process_id = ProcessId::ParentLookup(
|
||||
parent_request.last_submitted_peer,
|
||||
chain_block_hash,
|
||||
);
|
||||
let blocks = parent_request.downloaded_blocks;
|
||||
|
||||
match self
|
||||
.beacon_processor_send
|
||||
.try_send(BeaconWorkEvent::chain_segment(process_id, blocks))
|
||||
{
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
error!(
|
||||
self.log,
|
||||
"Failed to send chain segment to processor";
|
||||
"error" => ?e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(outcome) => {
|
||||
// all else we consider the chain a failure and downvote the peer that sent
|
||||
// us the last block
|
||||
warn!(
|
||||
self.log, "Invalid parent chain";
|
||||
"score_adjustment" => %PeerAction::MidToleranceError,
|
||||
"outcome" => ?outcome,
|
||||
"last_peer" => %parent_request.last_submitted_peer,
|
||||
);
|
||||
|
||||
// Add this chain to cache of failed chains
|
||||
self.failed_chains.insert(chain_block_hash);
|
||||
|
||||
// This currently can be a host of errors. We permit this due to the partial
|
||||
// ambiguity.
|
||||
self.network.report_peer(
|
||||
parent_request.last_submitted_peer,
|
||||
PeerAction::MidToleranceError,
|
||||
"parent_request_err",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Progresses a parent request query.
|
||||
///
|
||||
/// This checks to ensure there a peers to progress the query, checks for failures and
|
||||
/// initiates requests.
|
||||
fn request_parent(&mut self, mut parent_request: ParentRequests<T::EthSpec>) {
|
||||
// check to make sure this request hasn't failed
|
||||
if parent_request.failed_attempts >= PARENT_FAIL_TOLERANCE
|
||||
|| parent_request.downloaded_blocks.len() >= PARENT_DEPTH_TOLERANCE
|
||||
{
|
||||
let error = if parent_request.failed_attempts >= PARENT_FAIL_TOLERANCE {
|
||||
// This is a peer-specific error and the chain could be continued with another
|
||||
// peer. We don't consider this chain a failure and prevent retries with another
|
||||
// peer.
|
||||
"too many failed attempts"
|
||||
} else if !parent_request.downloaded_blocks.is_empty() {
|
||||
self.failed_chains
|
||||
.insert(parent_request.downloaded_blocks[0].canonical_root());
|
||||
"reached maximum lookup-depth"
|
||||
} else {
|
||||
crit!(self.log, "Parent lookup has no blocks");
|
||||
"no blocks"
|
||||
};
|
||||
|
||||
debug!(self.log, "Parent import failed";
|
||||
"block" => ?parent_request.downloaded_blocks[0].canonical_root(),
|
||||
"ancestors_found" => parent_request.downloaded_blocks.len(),
|
||||
"reason" => error
|
||||
);
|
||||
// Downscore the peer.
|
||||
self.network.report_peer(
|
||||
parent_request.last_submitted_peer,
|
||||
PeerAction::LowToleranceError,
|
||||
"request_parent_import_failed",
|
||||
);
|
||||
return; // drop the request
|
||||
}
|
||||
|
||||
let parent_hash = if let Some(block) = parent_request.downloaded_blocks.last() {
|
||||
block.parent_root()
|
||||
} else {
|
||||
crit!(self.log, "Parent queue is empty. This should never happen");
|
||||
return;
|
||||
};
|
||||
|
||||
let request = BlocksByRootRequest {
|
||||
block_roots: VariableList::from(vec![parent_hash]),
|
||||
};
|
||||
|
||||
// We continue to search for the chain of blocks from the same peer. Other peers are not
|
||||
// guaranteed to have this chain of blocks.
|
||||
let peer_id = parent_request.last_submitted_peer;
|
||||
|
||||
if let Ok(request_id) = self.network.parent_lookup_request(peer_id, request) {
|
||||
// if the request was successful add the queue back into self
|
||||
parent_request.pending = Some(request_id);
|
||||
self.parent_queue.push(parent_request);
|
||||
}
|
||||
}
|
||||
|
||||
/// The main driving future for the sync manager.
|
||||
async fn main(&mut self) {
|
||||
// process any inbound messages
|
||||
@@ -1020,19 +469,43 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
beacon_block,
|
||||
seen_timestamp,
|
||||
} => {
|
||||
self.rpc_block_received(
|
||||
request_id,
|
||||
peer_id,
|
||||
beacon_block.map(|b| *b),
|
||||
seen_timestamp,
|
||||
)
|
||||
.await;
|
||||
self.rpc_block_received(request_id, peer_id, beacon_block, seen_timestamp);
|
||||
}
|
||||
SyncMessage::UnknownBlock(peer_id, block) => {
|
||||
self.add_unknown_block(peer_id, *block);
|
||||
// If we are not synced or within SLOT_IMPORT_TOLERANCE of the block, ignore
|
||||
if !self.network_globals.sync_state.read().is_synced() {
|
||||
let head_slot = self
|
||||
.chain
|
||||
.head_info()
|
||||
.map(|info| info.slot)
|
||||
.unwrap_or_else(|_| Slot::from(0u64));
|
||||
let unknown_block_slot = block.slot();
|
||||
|
||||
// if the block is far in the future, ignore it. If its within the slot tolerance of
|
||||
// our current head, regardless of the syncing state, fetch it.
|
||||
if (head_slot >= unknown_block_slot
|
||||
&& head_slot.sub(unknown_block_slot).as_usize()
|
||||
> SLOT_IMPORT_TOLERANCE)
|
||||
|| (head_slot < unknown_block_slot
|
||||
&& unknown_block_slot.sub(head_slot).as_usize()
|
||||
> SLOT_IMPORT_TOLERANCE)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if self.network_globals.peers.read().is_connected(&peer_id) {
|
||||
self.block_lookups
|
||||
.search_parent(block, peer_id, &mut self.network);
|
||||
}
|
||||
}
|
||||
SyncMessage::UnknownBlockHash(peer_id, block_hash) => {
|
||||
self.search_for_block(peer_id, block_hash);
|
||||
// If we are not synced, ignore this block.
|
||||
if self.network_globals.sync_state.read().is_synced()
|
||||
&& self.network_globals.peers.read().is_connected(&peer_id)
|
||||
{
|
||||
self.block_lookups
|
||||
.search_block(block_hash, peer_id, &mut self.network);
|
||||
}
|
||||
}
|
||||
SyncMessage::Disconnect(peer_id) => {
|
||||
self.peer_disconnect(&peer_id);
|
||||
@@ -1041,8 +514,19 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
peer_id,
|
||||
request_id,
|
||||
} => self.inject_error(peer_id, request_id),
|
||||
SyncMessage::BlockProcessed {
|
||||
process_type,
|
||||
result,
|
||||
} => match process_type {
|
||||
BlockProcessType::SingleBlock { id } => self
|
||||
.block_lookups
|
||||
.single_block_processed(id, result, &mut self.network),
|
||||
BlockProcessType::ParentLookup { chain_hash } => self
|
||||
.block_lookups
|
||||
.parent_block_processed(chain_hash, result, &mut self.network),
|
||||
},
|
||||
SyncMessage::BatchProcessed { sync_type, result } => match sync_type {
|
||||
SyncRequestType::RangeSync(epoch, chain_id) => {
|
||||
ChainSegmentProcessId::RangeBatchId(chain_id, epoch) => {
|
||||
self.range_sync.handle_block_process_result(
|
||||
&mut self.network,
|
||||
chain_id,
|
||||
@@ -1051,7 +535,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
);
|
||||
self.update_sync_state();
|
||||
}
|
||||
SyncRequestType::BackFillSync(epoch) => {
|
||||
ChainSegmentProcessId::BackSyncBatchId(epoch) => {
|
||||
match self.backfill_sync.on_batch_process_result(
|
||||
&mut self.network,
|
||||
epoch,
|
||||
@@ -1066,41 +550,37 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
ChainSegmentProcessId::ParentLookup(chain_hash) => self
|
||||
.block_lookups
|
||||
.parent_chain_processed(chain_hash, result, &mut self.network),
|
||||
},
|
||||
SyncMessage::ParentLookupFailed {
|
||||
chain_head,
|
||||
peer_id,
|
||||
} => {
|
||||
// A peer sent an object (block or attestation) that referenced a parent.
|
||||
// The processing of this chain failed.
|
||||
self.failed_chains.insert(chain_head);
|
||||
self.network.report_peer(
|
||||
peer_id,
|
||||
PeerAction::MidToleranceError,
|
||||
"parent_lookup_failed",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn rpc_block_received(
|
||||
fn rpc_block_received(
|
||||
&mut self,
|
||||
request_id: RequestId,
|
||||
peer_id: PeerId,
|
||||
beacon_block: Option<SignedBeaconBlock<T::EthSpec>>,
|
||||
beacon_block: Option<Box<SignedBeaconBlock<T::EthSpec>>>,
|
||||
seen_timestamp: Duration,
|
||||
) {
|
||||
match request_id {
|
||||
RequestId::SingleBlock { id } => {
|
||||
self.single_block_lookup_response(id, peer_id, beacon_block, seen_timestamp)
|
||||
.await;
|
||||
}
|
||||
RequestId::ParentLookup { id } => {
|
||||
self.parent_lookup_response(peer_id, id, beacon_block, seen_timestamp)
|
||||
.await
|
||||
}
|
||||
RequestId::SingleBlock { id } => self.block_lookups.single_block_lookup_response(
|
||||
id,
|
||||
peer_id,
|
||||
beacon_block,
|
||||
seen_timestamp,
|
||||
&mut self.network,
|
||||
),
|
||||
RequestId::ParentLookup { id } => self.block_lookups.parent_lookup_response(
|
||||
id,
|
||||
peer_id,
|
||||
beacon_block,
|
||||
seen_timestamp,
|
||||
&mut self.network,
|
||||
),
|
||||
RequestId::BackFillSync { id } => {
|
||||
if let Some(batch_id) = self
|
||||
.network
|
||||
@@ -1111,7 +591,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
batch_id,
|
||||
&peer_id,
|
||||
id,
|
||||
beacon_block,
|
||||
beacon_block.map(|b| *b),
|
||||
) {
|
||||
Ok(ProcessResult::SyncCompleted) => self.update_sync_state(),
|
||||
Ok(ProcessResult::Successful) => {}
|
||||
@@ -1133,7 +613,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
chain_id,
|
||||
batch_id,
|
||||
id,
|
||||
beacon_block,
|
||||
beacon_block.map(|b| *b),
|
||||
);
|
||||
self.update_sync_state();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user