Process gossip blocks on the GossipProcessor (#1523)

## Issue Addressed

NA

## Proposed Changes

Moves beacon block processing over to the newly-added `GossipProcessor`. This moves the task off the core executor onto the blocking one.

## Additional Info

- With this PR, gossip blocks are being ignored during sync.
This commit is contained in:
Paul Hauner
2020-08-17 09:20:27 +00:00
parent 61d5b592cb
commit f85485884f
12 changed files with 845 additions and 454 deletions

View File

@@ -1,255 +0,0 @@
use crate::router::processor::FUTURE_SLOT_TOLERANCE;
use crate::sync::manager::SyncMessage;
use crate::sync::range_sync::{BatchId, ChainId};
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError, ChainSegmentResult};
use eth2_libp2p::PeerId;
use slog::{debug, error, trace, warn};
use std::sync::{Arc, Weak};
use tokio::sync::mpsc;
use types::{EthSpec, SignedBeaconBlock};
/// Id associated to a block processing request, either a batch or a single block.
#[derive(Clone, Debug, PartialEq)]
pub enum ProcessId {
/// Processing Id of a range syncing batch.
RangeBatchId(ChainId, BatchId),
/// Processing Id of the parent lookup of a block
ParentLookup(PeerId),
}
/// The result of a block processing request.
// TODO: When correct batch error handling occurs, we will include an error type.
#[derive(Debug)]
pub enum BatchProcessResult {
/// The batch was completed successfully.
Success,
/// The batch processing failed.
Failed,
/// The batch processing failed but managed to import at least one block.
Partial,
}
/// Spawns a thread handling the block processing of a request: range syncing or parent lookup.
pub fn spawn_block_processor<T: BeaconChainTypes>(
chain: Weak<BeaconChain<T>>,
process_id: ProcessId,
downloaded_blocks: Vec<SignedBeaconBlock<T::EthSpec>>,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
log: slog::Logger,
) {
std::thread::spawn(move || {
match process_id {
// this a request from the range sync
ProcessId::RangeBatchId(chain_id, batch_id) => {
let len = downloaded_blocks.len();
let start_slot = if len > 0 {
downloaded_blocks[0].message.slot.as_u64()
} else {
0
};
let end_slot = if len > 0 {
downloaded_blocks[len - 1].message.slot.as_u64()
} else {
0
};
debug!(log, "Processing batch"; "id" => *batch_id, "blocks" => downloaded_blocks.len(), "start_slot" => start_slot, "end_slot" => end_slot);
let result = match process_blocks(chain, downloaded_blocks.iter(), &log) {
(_, Ok(_)) => {
debug!(log, "Batch processed"; "id" => *batch_id , "start_slot" => start_slot, "end_slot" => end_slot);
BatchProcessResult::Success
}
(imported_blocks, Err(e)) if imported_blocks > 0 => {
debug!(log, "Batch processing failed but imported some blocks";
"id" => *batch_id, "error" => e, "imported_blocks"=> imported_blocks);
BatchProcessResult::Partial
}
(_, Err(e)) => {
debug!(log, "Batch processing failed"; "id" => *batch_id, "error" => e);
BatchProcessResult::Failed
}
};
let msg = SyncMessage::BatchProcessed {
chain_id,
batch_id,
downloaded_blocks,
result,
};
sync_send.send(msg).unwrap_or_else(|_| {
debug!(
log,
"Block processor could not inform range sync result. Likely shutting down."
);
});
}
// this a parent lookup request from the sync manager
ProcessId::ParentLookup(peer_id) => {
debug!(
log, "Processing parent lookup";
"last_peer_id" => format!("{}", peer_id),
"blocks" => downloaded_blocks.len()
);
// parent blocks are ordered from highest slot to lowest, so we need to process in
// reverse
match process_blocks(chain, downloaded_blocks.iter().rev(), &log) {
(_, Err(e)) => {
warn!(log, "Parent lookup failed"; "last_peer_id" => format!("{}", peer_id), "error" => e);
sync_send
.send(SyncMessage::ParentLookupFailed(peer_id))
.unwrap_or_else(|_| {
// on failure, inform to downvote the peer
debug!(
log,
"Block processor could not inform parent lookup result. Likely shutting down."
);
});
}
(_, Ok(_)) => {
debug!(log, "Parent lookup processed successfully");
}
}
}
}
});
}
/// Helper function to process blocks batches which only consumes the chain and blocks to process.
fn process_blocks<
'a,
T: BeaconChainTypes,
I: Iterator<Item = &'a SignedBeaconBlock<T::EthSpec>>,
>(
chain: Weak<BeaconChain<T>>,
downloaded_blocks: I,
log: &slog::Logger,
) -> (usize, Result<(), String>) {
if let Some(chain) = chain.upgrade() {
let blocks = downloaded_blocks.cloned().collect::<Vec<_>>();
let (imported_blocks, r) = match chain.process_chain_segment(blocks) {
ChainSegmentResult::Successful { imported_blocks } => {
if imported_blocks == 0 {
debug!(log, "All blocks already known");
} else {
debug!(
log, "Imported blocks from network";
"count" => imported_blocks,
);
// Batch completed successfully with at least one block, run fork choice.
run_fork_choice(chain, log);
}
(imported_blocks, Ok(()))
}
ChainSegmentResult::Failed {
imported_blocks,
error,
} => {
let r = handle_failed_chain_segment(error, log);
if imported_blocks > 0 {
run_fork_choice(chain, log);
}
(imported_blocks, r)
}
};
return (imported_blocks, r);
}
(0, Ok(()))
}
/// Runs fork-choice on a given chain. This is used during block processing after one successful
/// block import.
fn run_fork_choice<T: BeaconChainTypes>(chain: Arc<BeaconChain<T>>, log: &slog::Logger) {
match chain.fork_choice() {
Ok(()) => trace!(
log,
"Fork choice success";
"location" => "batch processing"
),
Err(e) => error!(
log,
"Fork choice failed";
"error" => format!("{:?}", e),
"location" => "batch import error"
),
}
}
/// Helper function to handle a `BlockError` from `process_chain_segment`
fn handle_failed_chain_segment<T: EthSpec>(
error: BlockError<T>,
log: &slog::Logger,
) -> Result<(), String> {
match error {
BlockError::ParentUnknown(block) => {
// blocks should be sequential and all parents should exist
Err(format!(
"Block has an unknown parent: {}",
block.parent_root()
))
}
BlockError::BlockIsAlreadyKnown => {
// This can happen for many reasons. Head sync's can download multiples and parent
// lookups can download blocks before range sync
Ok(())
}
BlockError::FutureSlot {
present_slot,
block_slot,
} => {
if present_slot + FUTURE_SLOT_TOLERANCE >= block_slot {
// The block is too far in the future, drop it.
warn!(
log, "Block is ahead of our slot clock";
"msg" => "block for future slot rejected, check your time",
"present_slot" => present_slot,
"block_slot" => block_slot,
"FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
);
} else {
// The block is in the future, but not too far.
debug!(
log, "Block is slightly ahead of our slot clock, ignoring.";
"present_slot" => present_slot,
"block_slot" => block_slot,
"FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
);
}
Err(format!(
"Block with slot {} is higher than the current slot {}",
block_slot, present_slot
))
}
BlockError::WouldRevertFinalizedSlot { .. } => {
debug!( log, "Finalized or earlier block processed";);
Ok(())
}
BlockError::GenesisBlock => {
debug!(log, "Genesis block was processed");
Ok(())
}
BlockError::BeaconChainError(e) => {
warn!(
log, "BlockProcessingFailure";
"msg" => "unexpected condition in processing block.",
"outcome" => format!("{:?}", e)
);
Err(format!("Internal error whilst processing block: {:?}", e))
}
other => {
debug!(
log, "Invalid block received";
"msg" => "peer sent invalid block",
"outcome" => format!("{:?}", other),
);
Err(format!("Peer sent invalid block. Reason: {:?}", other))
}
}
}

View File

@@ -33,11 +33,11 @@
//! if an attestation references an unknown block) this manager can search for the block and
//! subsequently search for parents if needed.
use super::block_processor::{spawn_block_processor, BatchProcessResult, ProcessId};
use super::network_context::SyncNetworkContext;
use super::peer_sync_info::{PeerSyncInfo, PeerSyncType};
use super::range_sync::{BatchId, ChainId, RangeSync, EPOCHS_PER_BATCH};
use super::RequestId;
use crate::beacon_processor::{ProcessId, WorkEvent as BeaconWorkEvent};
use crate::service::NetworkMessage;
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError};
use eth2_libp2p::rpc::{methods::MAX_REQUEST_BLOCKS, BlocksByRootRequest, GoodbyeReason};
@@ -109,6 +109,18 @@ pub enum SyncMessage<T: EthSpec> {
ParentLookupFailed(PeerId),
}
/// The result of processing a multiple blocks (a chain segment).
// TODO: When correct batch error handling occurs, we will include an error type.
#[derive(Debug)]
pub enum BatchProcessResult {
/// The batch was completed successfully.
Success,
/// The batch processing failed.
Failed,
/// The batch processing failed but managed to import at least one block.
Partial,
}
/// Maintains a sequential list of parents to lookup and the lookup's current state.
struct ParentRequests<T: EthSpec> {
/// The blocks that have currently been downloaded.
@@ -158,8 +170,8 @@ pub struct SyncManager<T: BeaconChainTypes> {
/// The logger for the import manager.
log: Logger,
/// The sending part of input_channel
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
}
/// Object representing a single block lookup request.
@@ -187,6 +199,7 @@ pub fn spawn<T: BeaconChainTypes>(
beacon_chain: Arc<BeaconChain<T>>,
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
network_send: mpsc::UnboundedSender<NetworkMessage<T::EthSpec>>,
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
log: slog::Logger,
) -> mpsc::UnboundedSender<SyncMessage<T::EthSpec>> {
assert!(
@@ -201,7 +214,7 @@ pub fn spawn<T: BeaconChainTypes>(
range_sync: RangeSync::new(
beacon_chain.clone(),
network_globals.clone(),
sync_send.clone(),
beacon_processor_send.clone(),
log.clone(),
),
network: SyncNetworkContext::new(network_send, network_globals.clone(), log.clone()),
@@ -211,7 +224,7 @@ pub fn spawn<T: BeaconChainTypes>(
parent_queue: SmallVec::new(),
single_block_lookups: FnvHashMap::default(),
log: log.clone(),
sync_send: sync_send.clone(),
beacon_processor_send,
};
// spawn the sync manager thread
@@ -300,7 +313,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
/// There are two reasons we could have received a BlocksByRoot response
/// - We requested a single hash and have received a response for the single_block_lookup
/// - We are looking up parent blocks in parent lookup search
fn blocks_by_root_response(
async fn blocks_by_root_response(
&mut self,
peer_id: PeerId,
request_id: RequestId,
@@ -318,7 +331,8 @@ impl<T: BeaconChainTypes> SyncManager<T> {
single_block_hash = Some(block_request.hash);
}
if let Some(block_hash) = single_block_hash {
self.single_block_lookup_response(peer_id, block, block_hash);
self.single_block_lookup_response(peer_id, block, block_hash)
.await;
return;
}
@@ -340,7 +354,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// add the block to response
parent_request.downloaded_blocks.push(block);
// queue for processing
self.process_parent_request(parent_request);
self.process_parent_request(parent_request).await;
}
None => {
// this is a stream termination
@@ -381,10 +395,40 @@ impl<T: BeaconChainTypes> SyncManager<T> {
}
}
async fn process_block_async(
&mut self,
block: SignedBeaconBlock<T::EthSpec>,
) -> Option<Result<Hash256, BlockError<T::EthSpec>>> {
let (event, rx) = BeaconWorkEvent::rpc_beacon_block(Box::new(block));
match self.beacon_processor_send.try_send(event) {
Ok(_) => {}
Err(e) => {
error!(
self.log,
"Failed to send sync block to processor";
"error" => format!("{:?}", e)
);
return None;
}
}
match rx.await {
Ok(block_result) => Some(block_result),
Err(_) => {
warn!(
self.log,
"Sync block not processed";
"msg" => "likely due to system resource exhaustion"
);
None
}
}
}
/// Processes the response obtained from a single block lookup search. If the block is
/// processed or errors, the search ends. If the blocks parent is unknown, a block parent
/// lookup search is started.
fn single_block_lookup_response(
async fn single_block_lookup_response(
&mut self,
peer_id: PeerId,
block: SignedBeaconBlock<T::EthSpec>,
@@ -399,8 +443,13 @@ impl<T: BeaconChainTypes> SyncManager<T> {
return;
}
let block_result = match self.process_block_async(block.clone()).await {
Some(block_result) => block_result,
None => return,
};
// we have the correct block, try and process it
match self.chain.process_block(block.clone()) {
match block_result {
Ok(block_root) => {
info!(self.log, "Processed block"; "block" => format!("{}", block_root));
@@ -599,7 +648,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// manager
/// A new block has been received for a parent lookup query, process it.
fn process_parent_request(&mut self, mut parent_request: ParentRequests<T::EthSpec>) {
async fn process_parent_request(&mut self, mut parent_request: ParentRequests<T::EthSpec>) {
// verify the last added block is the parent of the last requested block
if parent_request.downloaded_blocks.len() < 2 {
@@ -652,7 +701,13 @@ impl<T: BeaconChainTypes> SyncManager<T> {
.downloaded_blocks
.pop()
.expect("There is always at least one block in the queue");
match self.chain.process_block(newest_block.clone()) {
let block_result = match self.process_block_async(newest_block.clone()).await {
Some(block_result) => block_result,
None => return,
};
match block_result {
Err(BlockError::ParentUnknown { .. }) => {
// need to keep looking for parents
// add the block back to the queue and continue the search
@@ -660,13 +715,23 @@ impl<T: BeaconChainTypes> SyncManager<T> {
self.request_parent(parent_request);
}
Ok(_) | Err(BlockError::BlockIsAlreadyKnown { .. }) => {
spawn_block_processor(
Arc::downgrade(&self.chain),
ProcessId::ParentLookup(parent_request.last_submitted_peer.clone()),
parent_request.downloaded_blocks,
self.sync_send.clone(),
self.log.clone(),
);
let process_id =
ProcessId::ParentLookup(parent_request.last_submitted_peer.clone());
let blocks = parent_request.downloaded_blocks;
match self
.beacon_processor_send
.try_send(BeaconWorkEvent::chain_segment(process_id, blocks))
{
Ok(_) => {}
Err(e) => {
error!(
self.log,
"Failed to send chain segment to processor";
"error" => format!("{:?}", e)
);
}
}
}
Err(outcome) => {
// all else we consider the chain a failure and downvote the peer that sent
@@ -760,7 +825,8 @@ impl<T: BeaconChainTypes> SyncManager<T> {
request_id,
beacon_block,
} => {
self.blocks_by_root_response(peer_id, request_id, beacon_block.map(|b| *b));
self.blocks_by_root_response(peer_id, request_id, beacon_block.map(|b| *b))
.await;
}
SyncMessage::UnknownBlock(peer_id, block) => {
self.add_unknown_block(peer_id, *block);

View File

@@ -1,14 +1,14 @@
//! Syncing for lighthouse.
//!
//! Stores the various syncing methods for the beacon chain.
mod block_processor;
pub mod manager;
mod network_context;
mod peer_sync_info;
mod range_sync;
pub use manager::SyncMessage;
pub use manager::{BatchProcessResult, SyncMessage};
pub use peer_sync_info::PeerSyncInfo;
pub use range_sync::{BatchId, ChainId};
/// Type of id of rpc requests sent by sync
pub type RequestId = usize;

View File

@@ -1,11 +1,12 @@
use super::batch::{Batch, BatchId, PendingBatches};
use crate::sync::block_processor::{spawn_block_processor, BatchProcessResult, ProcessId};
use crate::sync::network_context::SyncNetworkContext;
use crate::sync::{RequestId, SyncMessage};
use crate::beacon_processor::ProcessId;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::sync::RequestId;
use crate::sync::{network_context::SyncNetworkContext, BatchProcessResult};
use beacon_chain::{BeaconChain, BeaconChainTypes};
use eth2_libp2p::{PeerAction, PeerId};
use rand::prelude::*;
use slog::{crit, debug, warn};
use slog::{crit, debug, error, warn};
use std::collections::HashSet;
use std::sync::Arc;
use tokio::sync::mpsc;
@@ -84,9 +85,8 @@ pub struct SyncingChain<T: BeaconChainTypes> {
/// The current processing batch, if any.
current_processing_batch: Option<Batch<T::EthSpec>>,
/// A send channel to the sync manager. This is given to the batch processor thread to report
/// back once batch processing has completed.
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
/// A reference to the underlying beacon chain.
chain: Arc<BeaconChain<T>>,
@@ -111,7 +111,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
target_head_slot: Slot,
target_head_root: Hash256,
peer_id: PeerId,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
chain: Arc<BeaconChain<T>>,
log: slog::Logger,
) -> Self {
@@ -131,7 +131,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
to_be_processed_id: BatchId(1),
state: ChainSyncingState::Stopped,
current_processing_batch: None,
sync_send,
beacon_processor_send,
chain,
log,
}
@@ -255,18 +255,23 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
}
}
/// Sends a batch to the batch processor.
/// Sends a batch to the beacon processor for async processing in a queue.
fn process_batch(&mut self, mut batch: Batch<T::EthSpec>) {
let downloaded_blocks = std::mem::replace(&mut batch.downloaded_blocks, Vec::new());
let blocks = std::mem::replace(&mut batch.downloaded_blocks, Vec::new());
let process_id = ProcessId::RangeBatchId(self.id, batch.id);
self.current_processing_batch = Some(batch);
spawn_block_processor(
Arc::downgrade(&self.chain.clone()),
process_id,
downloaded_blocks,
self.sync_send.clone(),
self.log.clone(),
);
if let Err(e) = self
.beacon_processor_send
.try_send(BeaconWorkEvent::chain_segment(process_id, blocks))
{
error!(
self.log,
"Failed to send chain segment to processor";
"msg" => "process_batch",
"error" => format!("{:?}", e)
);
}
}
/// The block processor has completed processing a batch. This function handles the result

View File

@@ -4,7 +4,7 @@
//! with this struct to to simplify the logic of the other layers of sync.
use super::chain::{ChainSyncingState, SyncingChain};
use crate::sync::manager::SyncMessage;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::sync::network_context::SyncNetworkContext;
use crate::sync::PeerSyncInfo;
use beacon_chain::{BeaconChain, BeaconChainTypes};
@@ -302,7 +302,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
target_head: Hash256,
target_slot: Slot,
peer_id: PeerId,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
) {
let chain_id = rand::random();
self.finalized_chains.push(SyncingChain::new(
@@ -311,7 +311,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
target_slot,
target_head,
peer_id,
sync_send,
beacon_processor_send,
self.beacon_chain.clone(),
self.log.clone(),
));
@@ -326,7 +326,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
target_head: Hash256,
target_slot: Slot,
peer_id: PeerId,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
) {
// remove the peer from any other head chains
@@ -342,7 +342,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
target_slot,
target_head,
peer_id,
sync_send,
beacon_processor_send,
self.beacon_chain.clone(),
self.log.clone(),
);

View File

@@ -43,9 +43,9 @@ use super::chain::{ChainId, ProcessingResult};
use super::chain_collection::{ChainCollection, RangeSyncState};
use super::sync_type::RangeSyncType;
use super::BatchId;
use crate::sync::block_processor::BatchProcessResult;
use crate::sync::manager::SyncMessage;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::sync::network_context::SyncNetworkContext;
use crate::sync::BatchProcessResult;
use crate::sync::PeerSyncInfo;
use crate::sync::RequestId;
use beacon_chain::{BeaconChain, BeaconChainTypes};
@@ -69,9 +69,8 @@ pub struct RangeSync<T: BeaconChainTypes> {
/// finalized chain(s) complete, these peer's get STATUS'ed to update their head slot before
/// the head chains are formed and downloaded.
awaiting_head_peers: HashSet<PeerId>,
/// The sync manager channel, allowing the batch processor thread to callback the sync task
/// once complete.
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
/// The syncing logger.
log: slog::Logger,
}
@@ -80,14 +79,14 @@ impl<T: BeaconChainTypes> RangeSync<T> {
pub fn new(
beacon_chain: Arc<BeaconChain<T>>,
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
log: slog::Logger,
) -> Self {
RangeSync {
beacon_chain: beacon_chain.clone(),
chains: ChainCollection::new(beacon_chain, network_globals, log.clone()),
awaiting_head_peers: HashSet::new(),
sync_send,
beacon_processor_send,
log,
}
}
@@ -181,7 +180,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
remote_info.finalized_root,
remote_finalized_slot,
peer_id,
self.sync_send.clone(),
self.beacon_processor_send.clone(),
);
self.chains.update_finalized(network);
// update the global sync state
@@ -228,7 +227,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
remote_info.head_root,
remote_info.head_slot,
peer_id,
self.sync_send.clone(),
self.beacon_processor_send.clone(),
);
}
self.chains.update_finalized(network);