move the parent lookup process to a dedicated thread (#906)

* Upgrade the parent lookup logic

* Apply reviewer suggestions

* move the parent lookup process to a dedicated thread

* move the logic of parent lookup and range syncing to a block processor

* review suggestions

* more review suggestions

* Add small logging changes

* Process parent lookups in reverse

Co-authored-by: Age Manning <Age@AgeManning.com>
This commit is contained in:
divma
2020-03-22 20:07:41 -05:00
committed by GitHub
parent a5fbaef469
commit 47aef629d1
6 changed files with 157 additions and 133 deletions

View File

@@ -1,193 +0,0 @@
use super::batch::Batch;
use crate::message_processor::FUTURE_SLOT_TOLERANCE;
use crate::sync::manager::SyncMessage;
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessingOutcome};
use slog::{debug, error, trace, warn};
use std::sync::{Arc, Weak};
use tokio::sync::mpsc;
/// The result of attempting to process a batch of blocks.
// TODO: When correct batch error handling occurs, we will include an error type.
#[derive(Debug)]
pub enum BatchProcessResult {
/// The batch was completed successfully.
Success,
/// The batch processing failed.
Failed,
}
// TODO: Refactor to async fn, with stable futures
pub fn spawn_batch_processor<T: BeaconChainTypes>(
chain: Weak<BeaconChain<T>>,
process_id: u64,
batch: Batch<T::EthSpec>,
mut sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
log: slog::Logger,
) {
std::thread::spawn(move || {
debug!(log, "Processing batch"; "id" => *batch.id);
let result = match process_batch(chain, &batch, &log) {
Ok(_) => BatchProcessResult::Success,
Err(_) => BatchProcessResult::Failed,
};
debug!(log, "Batch processed"; "id" => *batch.id, "result" => format!("{:?}", result));
sync_send
.try_send(SyncMessage::BatchProcessed {
process_id,
batch: Box::new(batch),
result,
})
.unwrap_or_else(|_| {
debug!(
log,
"Batch result could not inform sync. Likely shutting down."
);
});
});
}
// Helper function to process block batches which only consumes the chain and blocks to process
fn process_batch<T: BeaconChainTypes>(
chain: Weak<BeaconChain<T>>,
batch: &Batch<T::EthSpec>,
log: &slog::Logger,
) -> Result<(), String> {
let mut successful_block_import = false;
for block in &batch.downloaded_blocks {
if let Some(chain) = chain.upgrade() {
let processing_result = chain.process_block(block.clone());
if let Ok(outcome) = processing_result {
match outcome {
BlockProcessingOutcome::Processed { block_root } => {
// The block was valid and we processed it successfully.
trace!(
log, "Imported block from network";
"slot" => block.slot(),
"block_root" => format!("{}", block_root),
);
successful_block_import = true;
}
BlockProcessingOutcome::ParentUnknown { parent, .. } => {
// blocks should be sequential and all parents should exist
warn!(
log, "Parent block is unknown";
"parent_root" => format!("{}", parent),
"baby_block_slot" => block.slot(),
);
if successful_block_import {
run_fork_choice(chain, log);
}
return Err(format!(
"Block at slot {} has an unknown parent.",
block.slot()
));
}
BlockProcessingOutcome::BlockIsAlreadyKnown => {
// this block is already known to us, move to the next
debug!(
log, "Imported a block that is already known";
"block_slot" => block.slot(),
);
}
BlockProcessingOutcome::FutureSlot {
present_slot,
block_slot,
} => {
if present_slot + FUTURE_SLOT_TOLERANCE >= block_slot {
// The block is too far in the future, drop it.
warn!(
log, "Block is ahead of our slot clock";
"msg" => "block for future slot rejected, check your time",
"present_slot" => present_slot,
"block_slot" => block_slot,
"FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
);
if successful_block_import {
run_fork_choice(chain, log);
}
return Err(format!(
"Block at slot {} is too far in the future",
block.slot()
));
} else {
// The block is in the future, but not too far.
debug!(
log, "Block is slightly ahead of our slot clock, ignoring.";
"present_slot" => present_slot,
"block_slot" => block_slot,
"FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
);
}
}
BlockProcessingOutcome::WouldRevertFinalizedSlot { .. } => {
debug!(
log, "Finalized or earlier block processed";
"outcome" => format!("{:?}", outcome),
);
// block reached our finalized slot or was earlier, move to the next block
}
BlockProcessingOutcome::GenesisBlock => {
debug!(
log, "Genesis block was processed";
"outcome" => format!("{:?}", outcome),
);
}
_ => {
warn!(
log, "Invalid block received";
"msg" => "peer sent invalid block",
"outcome" => format!("{:?}", outcome),
);
if successful_block_import {
run_fork_choice(chain, log);
}
return Err(format!("Invalid block at slot {}", block.slot()));
}
}
} else {
warn!(
log, "BlockProcessingFailure";
"msg" => "unexpected condition in processing block.",
"outcome" => format!("{:?}", processing_result)
);
if successful_block_import {
run_fork_choice(chain, log);
}
return Err(format!(
"Unexpected block processing error: {:?}",
processing_result
));
}
} else {
return Ok(()); // terminate early due to dropped beacon chain
}
}
// Batch completed successfully, run fork choice.
if let Some(chain) = chain.upgrade() {
run_fork_choice(chain, log);
}
Ok(())
}
/// Runs fork-choice on a given chain. This is used during block processing after one successful
/// block import.
fn run_fork_choice<T: BeaconChainTypes>(chain: Arc<BeaconChain<T>>, log: &slog::Logger) {
match chain.fork_choice() {
Ok(()) => trace!(
log,
"Fork choice success";
"location" => "batch processing"
),
Err(e) => error!(
log,
"Fork choice failed";
"error" => format!("{:?}", e),
"location" => "batch import error"
),
}
}

View File

@@ -1,5 +1,5 @@
use super::batch::{Batch, BatchId, PendingBatches};
use super::batch_processing::{spawn_batch_processor, BatchProcessResult};
use crate::sync::block_processor::{spawn_block_processor, BatchProcessResult, ProcessId};
use crate::sync::network_context::SyncNetworkContext;
use crate::sync::SyncMessage;
use beacon_chain::{BeaconChain, BeaconChainTypes};
@@ -76,7 +76,7 @@ pub struct SyncingChain<T: BeaconChainTypes> {
/// A random id given to a batch process request. This is None if there is no ongoing batch
/// process.
current_processing_id: Option<u64>,
current_processing_batch: Option<Batch<T::EthSpec>>,
/// A send channel to the sync manager. This is given to the batch processor thread to report
/// back once batch processing has completed.
@@ -120,7 +120,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
to_be_downloaded_id: BatchId(1),
to_be_processed_id: BatchId(1),
state: ChainSyncingState::Stopped,
current_processing_id: None,
current_processing_batch: None,
sync_send,
chain,
log,
@@ -167,15 +167,16 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// An entire batch of blocks has been received. This functions checks to see if it can be processed,
// remove any batches waiting to be verified and if this chain is syncing, request new
// blocks for the peer.
debug!(self.log, "Completed batch received"; "id"=> *batch.id, "blocks"=>batch.downloaded_blocks.len(), "awaiting_batches" => self.completed_batches.len());
debug!(self.log, "Completed batch received"; "id"=> *batch.id, "blocks" => &batch.downloaded_blocks.len(), "awaiting_batches" => self.completed_batches.len());
// verify the range of received blocks
// Note that the order of blocks is verified in block processing
if let Some(last_slot) = batch.downloaded_blocks.last().map(|b| b.slot()) {
// the batch is non-empty
if batch.start_slot > batch.downloaded_blocks[0].slot() || batch.end_slot < last_slot {
let first_slot = batch.downloaded_blocks[0].slot();
if batch.start_slot > first_slot || batch.end_slot < last_slot {
warn!(self.log, "BlocksByRange response returned out of range blocks";
"response_initial_slot" => batch.downloaded_blocks[0].slot(),
"response_initial_slot" => first_slot,
"requested_initial_slot" => batch.start_slot);
network.downvote_peer(batch.current_peer);
self.to_be_processed_id = batch.id; // reset the id back to here, when incrementing, it will check against completed batches
@@ -218,7 +219,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
}
// Only process one batch at a time
if self.current_processing_id.is_some() {
if self.current_processing_batch.is_some() {
return;
}
@@ -238,14 +239,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
}
/// Sends a batch to the batch processor.
fn process_batch(&mut self, batch: Batch<T::EthSpec>) {
// only spawn one instance at a time
let processing_id: u64 = rand::random();
self.current_processing_id = Some(processing_id);
spawn_batch_processor(
fn process_batch(&mut self, mut batch: Batch<T::EthSpec>) {
let downloaded_blocks = std::mem::replace(&mut batch.downloaded_blocks, Vec::new());
let batch_id = ProcessId::RangeBatchId(batch.id.clone());
self.current_processing_batch = Some(batch);
spawn_block_processor(
self.chain.clone(),
processing_id,
batch,
batch_id,
downloaded_blocks,
self.sync_send.clone(),
self.log.clone(),
);
@@ -256,30 +257,41 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
pub fn on_batch_process_result(
&mut self,
network: &mut SyncNetworkContext,
processing_id: u64,
batch: &mut Option<Batch<T::EthSpec>>,
batch_id: BatchId,
downloaded_blocks: &mut Option<Vec<SignedBeaconBlock<T::EthSpec>>>,
result: &BatchProcessResult,
) -> Option<ProcessingResult> {
if Some(processing_id) != self.current_processing_id {
// batch process doesn't belong to this chain
if let Some(current_batch) = &self.current_processing_batch {
if current_batch.id != batch_id {
// batch process does not belong to this chain
return None;
}
// Continue. This is our processing request
} else {
// not waiting on a processing result
return None;
}
// Consume the batch option
let batch = batch.take().or_else(|| {
// claim the result by consuming the option
let downloaded_blocks = downloaded_blocks.take().or_else(|| {
// if taken by another chain, we are no longer waiting on a result.
self.current_processing_batch = None;
crit!(self.log, "Processed batch taken by another chain");
None
})?;
// No longer waiting on a processing result
let mut batch = self.current_processing_batch.take().unwrap();
// These are the blocks of this batch
batch.downloaded_blocks = downloaded_blocks;
// double check batches are processed in order TODO: Remove for prod
if batch.id != self.to_be_processed_id {
crit!(self.log, "Batch processed out of order";
"processed_batch_id" => *batch.id,
"expected_id" => *self.to_be_processed_id);
"processed_batch_id" => *batch.id,
"expected_id" => *self.to_be_processed_id);
}
self.current_processing_id = None;
let res = match result {
BatchProcessResult::Success => {
*self.to_be_processed_id += 1;

View File

@@ -2,11 +2,10 @@
//! peers.
mod batch;
mod batch_processing;
mod chain;
mod chain_collection;
mod range;
pub use batch::Batch;
pub use batch_processing::BatchProcessResult;
pub use batch::BatchId;
pub use range::RangeSync;

View File

@@ -41,8 +41,9 @@
use super::chain::ProcessingResult;
use super::chain_collection::{ChainCollection, SyncState};
use super::{Batch, BatchProcessResult};
use super::BatchId;
use crate::message_processor::PeerSyncInfo;
use crate::sync::block_processor::BatchProcessResult;
use crate::sync::manager::SyncMessage;
use crate::sync::network_context::SyncNetworkContext;
use beacon_chain::{BeaconChain, BeaconChainTypes};
@@ -130,8 +131,8 @@ impl<T: BeaconChainTypes> RangeSync<T> {
},
None => {
return warn!(self.log,
"Beacon chain dropped. Peer not considered for sync";
"peer_id" => format!("{:?}", peer_id));
"Beacon chain dropped. Peer not considered for sync";
"peer_id" => format!("{:?}", peer_id));
}
};
@@ -256,15 +257,15 @@ impl<T: BeaconChainTypes> RangeSync<T> {
pub fn handle_block_process_result(
&mut self,
network: &mut SyncNetworkContext,
processing_id: u64,
batch: Batch<T::EthSpec>,
batch_id: BatchId,
downloaded_blocks: Vec<SignedBeaconBlock<T::EthSpec>>,
result: BatchProcessResult,
) {
// build an option for passing the batch to each chain
let mut batch = Some(batch);
// build an option for passing the downloaded_blocks to each chain
let mut downloaded_blocks = Some(downloaded_blocks);
match self.chains.finalized_request(|chain| {
chain.on_batch_process_result(network, processing_id, &mut batch, &result)
chain.on_batch_process_result(network, batch_id, &mut downloaded_blocks, &result)
}) {
Some((index, ProcessingResult::RemoveChain)) => {
let chain = self.chains.remove_finalized_chain(index);
@@ -293,7 +294,12 @@ impl<T: BeaconChainTypes> RangeSync<T> {
Some((_, ProcessingResult::KeepChain)) => {}
None => {
match self.chains.head_request(|chain| {
chain.on_batch_process_result(network, processing_id, &mut batch, &result)
chain.on_batch_process_result(
network,
batch_id,
&mut downloaded_blocks,
&result,
)
}) {
Some((index, ProcessingResult::RemoveChain)) => {
let chain = self.chains.remove_head_chain(index);
@@ -308,7 +314,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
None => {
// This can happen if a chain gets purged due to being out of date whilst a
// batch process is in progress.
debug!(self.log, "No chains match the block processing id"; "id" => processing_id);
debug!(self.log, "No chains match the block processing id"; "id" => *batch_id);
}
}
}