move the parent lookup process to a dedicated thread (#906)

* Upgrade the parent lookup logic * Apply reviewer suggestions * move the parent lookup process to a dedicated thread * move the logic of parent lookup and range syncing to a block processor * review suggestions * more review suggestions * Add small logging changes * Process parent lookups in reverse Co-authored-by: Age Manning <Age@AgeManning.com>
2026-04-29 18:53:32 +00:00 · 2020-03-22 20:07:41 -05:00
parent a5fbaef469
commit 47aef629d1
6 changed files with 157 additions and 133 deletions
--- a/beacon_node/network/src/sync/range_sync/batch_processing.rs
+++ b/beacon_node/network/src/sync/range_sync/batch_processing.rs
@@ -1,193 +0,0 @@
-use super::batch::Batch;
-use crate::message_processor::FUTURE_SLOT_TOLERANCE;
-use crate::sync::manager::SyncMessage;
-use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessingOutcome};
-use slog::{debug, error, trace, warn};
-use std::sync::{Arc, Weak};
-use tokio::sync::mpsc;
-
-/// The result of attempting to process a batch of blocks.
-// TODO: When correct batch error handling occurs, we will include an error type.
-#[derive(Debug)]
-pub enum BatchProcessResult {
-    /// The batch was completed successfully.
-    Success,
-    /// The batch processing failed.
-    Failed,
-}
-
-// TODO: Refactor to async fn, with stable futures
-pub fn spawn_batch_processor<T: BeaconChainTypes>(
-    chain: Weak<BeaconChain<T>>,
-    process_id: u64,
-    batch: Batch<T::EthSpec>,
-    mut sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
-    log: slog::Logger,
-) {
-    std::thread::spawn(move || {
-        debug!(log, "Processing batch"; "id" => *batch.id);
-        let result = match process_batch(chain, &batch, &log) {
-            Ok(_) => BatchProcessResult::Success,
-            Err(_) => BatchProcessResult::Failed,
-        };
-
-        debug!(log, "Batch processed"; "id" => *batch.id, "result" => format!("{:?}", result));
-
-        sync_send
-            .try_send(SyncMessage::BatchProcessed {
-                process_id,
-                batch: Box::new(batch),
-                result,
-            })
-            .unwrap_or_else(|_| {
-                debug!(
-                    log,
-                    "Batch result could not inform sync. Likely shutting down."
-                );
-            });
-    });
-}
-
-// Helper function to process block batches which only consumes the chain and blocks to process
-fn process_batch<T: BeaconChainTypes>(
-    chain: Weak<BeaconChain<T>>,
-    batch: &Batch<T::EthSpec>,
-    log: &slog::Logger,
-) -> Result<(), String> {
-    let mut successful_block_import = false;
-    for block in &batch.downloaded_blocks {
-        if let Some(chain) = chain.upgrade() {
-            let processing_result = chain.process_block(block.clone());
-
-            if let Ok(outcome) = processing_result {
-                match outcome {
-                    BlockProcessingOutcome::Processed { block_root } => {
-                        // The block was valid and we processed it successfully.
-                        trace!(
-                            log, "Imported block from network";
-                            "slot" => block.slot(),
-                            "block_root" => format!("{}", block_root),
-                        );
-                        successful_block_import = true;
-                    }
-                    BlockProcessingOutcome::ParentUnknown { parent, .. } => {
-                        // blocks should be sequential and all parents should exist
-                        warn!(
-                            log, "Parent block is unknown";
-                            "parent_root" => format!("{}", parent),
-                            "baby_block_slot" => block.slot(),
-                        );
-                        if successful_block_import {
-                            run_fork_choice(chain, log);
-                        }
-                        return Err(format!(
-                            "Block at slot {} has an unknown parent.",
-                            block.slot()
-                        ));
-                    }
-                    BlockProcessingOutcome::BlockIsAlreadyKnown => {
-                        // this block is already known to us, move to the next
-                        debug!(
-                            log, "Imported a block that is already known";
-                            "block_slot" => block.slot(),
-                        );
-                    }
-                    BlockProcessingOutcome::FutureSlot {
-                        present_slot,
-                        block_slot,
-                    } => {
-                        if present_slot + FUTURE_SLOT_TOLERANCE >= block_slot {
-                            // The block is too far in the future, drop it.
-                            warn!(
-                                log, "Block is ahead of our slot clock";
-                                "msg" => "block for future slot rejected, check your time",
-                                "present_slot" => present_slot,
-                                "block_slot" => block_slot,
-                                "FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
-                            );
-                            if successful_block_import {
-                                run_fork_choice(chain, log);
-                            }
-                            return Err(format!(
-                                "Block at slot {} is too far in the future",
-                                block.slot()
-                            ));
-                        } else {
-                            // The block is in the future, but not too far.
-                            debug!(
-                                log, "Block is slightly ahead of our slot clock, ignoring.";
-                                "present_slot" => present_slot,
-                                "block_slot" => block_slot,
-                                "FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
-                            );
-                        }
-                    }
-                    BlockProcessingOutcome::WouldRevertFinalizedSlot { .. } => {
-                        debug!(
-                            log, "Finalized or earlier block processed";
-                            "outcome" => format!("{:?}", outcome),
-                        );
-                        // block reached our finalized slot or was earlier, move to the next block
-                    }
-                    BlockProcessingOutcome::GenesisBlock => {
-                        debug!(
-                            log, "Genesis block was processed";
-                            "outcome" => format!("{:?}", outcome),
-                        );
-                    }
-                    _ => {
-                        warn!(
-                            log, "Invalid block received";
-                            "msg" => "peer sent invalid block",
-                            "outcome" => format!("{:?}", outcome),
-                        );
-                        if successful_block_import {
-                            run_fork_choice(chain, log);
-                        }
-                        return Err(format!("Invalid block at slot {}", block.slot()));
-                    }
-                }
-            } else {
-                warn!(
-                    log, "BlockProcessingFailure";
-                    "msg" => "unexpected condition in processing block.",
-                    "outcome" => format!("{:?}", processing_result)
-                );
-                if successful_block_import {
-                    run_fork_choice(chain, log);
-                }
-                return Err(format!(
-                    "Unexpected block processing error: {:?}",
-                    processing_result
-                ));
-            }
-        } else {
-            return Ok(()); // terminate early due to dropped beacon chain
-        }
-    }
-
-    // Batch completed successfully, run fork choice.
-    if let Some(chain) = chain.upgrade() {
-        run_fork_choice(chain, log);
-    }
-
-    Ok(())
-}
-
-/// Runs fork-choice on a given chain. This is used during block processing after one successful
-/// block import.
-fn run_fork_choice<T: BeaconChainTypes>(chain: Arc<BeaconChain<T>>, log: &slog::Logger) {
-    match chain.fork_choice() {
-        Ok(()) => trace!(
-            log,
-            "Fork choice success";
-            "location" => "batch processing"
-        ),
-        Err(e) => error!(
-            log,
-            "Fork choice failed";
-            "error" => format!("{:?}", e),
-            "location" => "batch import error"
-        ),
-    }
-}
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -1,5 +1,5 @@
 use super::batch::{Batch, BatchId, PendingBatches};
-use super::batch_processing::{spawn_batch_processor, BatchProcessResult};
+use crate::sync::block_processor::{spawn_block_processor, BatchProcessResult, ProcessId};
 use crate::sync::network_context::SyncNetworkContext;
 use crate::sync::SyncMessage;
 use beacon_chain::{BeaconChain, BeaconChainTypes};
@@ -76,7 +76,7 @@ pub struct SyncingChain<T: BeaconChainTypes> {

    /// A random id given to a batch process request. This is None if there is no ongoing batch
    /// process.
-    current_processing_id: Option<u64>,
+    current_processing_batch: Option<Batch<T::EthSpec>>,

    /// A send channel to the sync manager. This is given to the batch processor thread to report
    /// back once batch processing has completed.
@@ -120,7 +120,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
            to_be_downloaded_id: BatchId(1),
            to_be_processed_id: BatchId(1),
            state: ChainSyncingState::Stopped,
-            current_processing_id: None,
+            current_processing_batch: None,
            sync_send,
            chain,
            log,
@@ -167,15 +167,16 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        // An entire batch of blocks has been received. This functions checks to see if it can be processed,
        // remove any batches waiting to be verified and if this chain is syncing, request new
        // blocks for the peer.
-        debug!(self.log, "Completed batch received"; "id"=> *batch.id, "blocks"=>batch.downloaded_blocks.len(), "awaiting_batches" => self.completed_batches.len());
+        debug!(self.log, "Completed batch received"; "id"=> *batch.id, "blocks" => &batch.downloaded_blocks.len(), "awaiting_batches" => self.completed_batches.len());

        // verify the range of received blocks
        // Note that the order of blocks is verified in block processing
        if let Some(last_slot) = batch.downloaded_blocks.last().map(|b| b.slot()) {
            // the batch is non-empty
-            if batch.start_slot > batch.downloaded_blocks[0].slot() || batch.end_slot < last_slot {
+            let first_slot = batch.downloaded_blocks[0].slot();
+            if batch.start_slot > first_slot || batch.end_slot < last_slot {
                warn!(self.log, "BlocksByRange response returned out of range blocks";
-                          "response_initial_slot" => batch.downloaded_blocks[0].slot(),
+                          "response_initial_slot" => first_slot,
                          "requested_initial_slot" => batch.start_slot);
                network.downvote_peer(batch.current_peer);
                self.to_be_processed_id = batch.id; // reset the id back to here, when incrementing, it will check against completed batches
@@ -218,7 +219,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        }

        // Only process one batch at a time
-        if self.current_processing_id.is_some() {
+        if self.current_processing_batch.is_some() {
            return;
        }

@@ -238,14 +239,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
    }

    /// Sends a batch to the batch processor.
-    fn process_batch(&mut self, batch: Batch<T::EthSpec>) {
-        // only spawn one instance at a time
-        let processing_id: u64 = rand::random();
-        self.current_processing_id = Some(processing_id);
-        spawn_batch_processor(
+    fn process_batch(&mut self, mut batch: Batch<T::EthSpec>) {
+        let downloaded_blocks = std::mem::replace(&mut batch.downloaded_blocks, Vec::new());
+        let batch_id = ProcessId::RangeBatchId(batch.id.clone());
+        self.current_processing_batch = Some(batch);
+        spawn_block_processor(
            self.chain.clone(),
-            processing_id,
-            batch,
+            batch_id,
+            downloaded_blocks,
            self.sync_send.clone(),
            self.log.clone(),
        );
@@ -256,30 +257,41 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
    pub fn on_batch_process_result(
        &mut self,
        network: &mut SyncNetworkContext,
-        processing_id: u64,
-        batch: &mut Option<Batch<T::EthSpec>>,
+        batch_id: BatchId,
+        downloaded_blocks: &mut Option<Vec<SignedBeaconBlock<T::EthSpec>>>,
        result: &BatchProcessResult,
    ) -> Option<ProcessingResult> {
-        if Some(processing_id) != self.current_processing_id {
-            // batch process doesn't belong to this chain
+        if let Some(current_batch) = &self.current_processing_batch {
+            if current_batch.id != batch_id {
+                // batch process does not belong to this chain
+                return None;
+            }
+        // Continue. This is our processing request
+        } else {
+            // not waiting on a processing result
            return None;
        }

-        // Consume the batch option
-        let batch = batch.take().or_else(|| {
+        // claim the result by consuming the option
+        let downloaded_blocks = downloaded_blocks.take().or_else(|| {
+            // if taken by another chain, we are no longer waiting on a result.
+            self.current_processing_batch = None;
            crit!(self.log, "Processed batch taken by another chain");
            None
        })?;

+        // No longer waiting on a processing result
+        let mut batch = self.current_processing_batch.take().unwrap();
+        // These are the blocks of this batch
+        batch.downloaded_blocks = downloaded_blocks;
+
        // double check batches are processed in order TODO: Remove for prod
        if batch.id != self.to_be_processed_id {
            crit!(self.log, "Batch processed out of order";
-            "processed_batch_id" => *batch.id,
-            "expected_id" => *self.to_be_processed_id);
+                "processed_batch_id" => *batch.id,
+                "expected_id" => *self.to_be_processed_id);
        }

-        self.current_processing_id = None;
-
        let res = match result {
            BatchProcessResult::Success => {
                *self.to_be_processed_id += 1;
--- a/beacon_node/network/src/sync/range_sync/mod.rs
+++ b/beacon_node/network/src/sync/range_sync/mod.rs
@@ -2,11 +2,10 @@
 //! peers.

 mod batch;
-mod batch_processing;
 mod chain;
 mod chain_collection;
 mod range;

 pub use batch::Batch;
-pub use batch_processing::BatchProcessResult;
+pub use batch::BatchId;
 pub use range::RangeSync;
--- a/beacon_node/network/src/sync/range_sync/range.rs
+++ b/beacon_node/network/src/sync/range_sync/range.rs
@@ -41,8 +41,9 @@

 use super::chain::ProcessingResult;
 use super::chain_collection::{ChainCollection, SyncState};
-use super::{Batch, BatchProcessResult};
+use super::BatchId;
 use crate::message_processor::PeerSyncInfo;
+use crate::sync::block_processor::BatchProcessResult;
 use crate::sync::manager::SyncMessage;
 use crate::sync::network_context::SyncNetworkContext;
 use beacon_chain::{BeaconChain, BeaconChainTypes};
@@ -130,8 +131,8 @@ impl<T: BeaconChainTypes> RangeSync<T> {
            },
            None => {
                return warn!(self.log,
-                      "Beacon chain dropped. Peer not considered for sync";
-                      "peer_id" => format!("{:?}", peer_id));
+                    "Beacon chain dropped. Peer not considered for sync";
+                    "peer_id" => format!("{:?}", peer_id));
            }
        };

@@ -256,15 +257,15 @@ impl<T: BeaconChainTypes> RangeSync<T> {
    pub fn handle_block_process_result(
        &mut self,
        network: &mut SyncNetworkContext,
-        processing_id: u64,
-        batch: Batch<T::EthSpec>,
+        batch_id: BatchId,
+        downloaded_blocks: Vec<SignedBeaconBlock<T::EthSpec>>,
        result: BatchProcessResult,
    ) {
-        // build an option for passing the batch to each chain
-        let mut batch = Some(batch);
+        // build an option for passing the downloaded_blocks to each chain
+        let mut downloaded_blocks = Some(downloaded_blocks);

        match self.chains.finalized_request(|chain| {
-            chain.on_batch_process_result(network, processing_id, &mut batch, &result)
+            chain.on_batch_process_result(network, batch_id, &mut downloaded_blocks, &result)
        }) {
            Some((index, ProcessingResult::RemoveChain)) => {
                let chain = self.chains.remove_finalized_chain(index);
@@ -293,7 +294,12 @@ impl<T: BeaconChainTypes> RangeSync<T> {
            Some((_, ProcessingResult::KeepChain)) => {}
            None => {
                match self.chains.head_request(|chain| {
-                    chain.on_batch_process_result(network, processing_id, &mut batch, &result)
+                    chain.on_batch_process_result(
+                        network,
+                        batch_id,
+                        &mut downloaded_blocks,
+                        &result,
+                    )
                }) {
                    Some((index, ProcessingResult::RemoveChain)) => {
                        let chain = self.chains.remove_head_chain(index);
@@ -308,7 +314,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
                    None => {
                        // This can happen if a chain gets purged due to being out of date whilst a
                        // batch process is in progress.
-                        debug!(self.log, "No chains match the block processing id"; "id" => processing_id);
+                        debug!(self.log, "No chains match the block processing id"; "id" => *batch_id);
                    }
                }
            }