Implement checkpoint sync (#2244)

## Issue Addressed Closes #1891 Closes #1784 ## Proposed Changes Implement checkpoint sync for Lighthouse, enabling it to start from a weak subjectivity checkpoint. ## Additional Info - [x] Return unavailable status for out-of-range blocks requested by peers (#2561) - [x] Implement sync daemon for fetching historical blocks (#2561) - [x] Verify chain hashes (either in `historical_blocks.rs` or the calling module) - [x] Consistency check for initial block + state - [x] Fetch the initial state and block from a beacon node HTTP endpoint - [x] Don't crash fetching beacon states by slot from the API - [x] Background service for state reconstruction, triggered by CLI flag or API call. Considered out of scope for this PR: - Drop the requirement to provide the `--checkpoint-block` (this would require some pretty heavy refactoring of block verification) Co-authored-by: Diva M <divma@protonmail.com>
2026-04-21 23:08:23 +00:00 · 2021-09-22 00:37:28 +00:00
parent 280e4fe23d
commit 9667dc2f03
71 changed files with 4012 additions and 459 deletions
--- a/beacon_node/network/src/beacon_processor/worker/rpc_methods.rs
+++ b/beacon_node/network/src/beacon_processor/worker/rpc_methods.rs
@@ -2,7 +2,7 @@ use crate::beacon_processor::worker::FUTURE_SLOT_TOLERANCE;
 use crate::service::NetworkMessage;
 use crate::status::ToStatusMessage;
 use crate::sync::SyncMessage;
-use beacon_chain::{BeaconChainError, BeaconChainTypes, WhenSlotSkipped};
+use beacon_chain::{BeaconChainError, BeaconChainTypes, HistoricalBlockError, WhenSlotSkipped};
 use eth2_libp2p::rpc::StatusMessage;
 use eth2_libp2p::rpc::*;
 use eth2_libp2p::{PeerId, PeerRequestId, ReportSource, Response, SyncInfo};
@@ -38,6 +38,21 @@ impl<T: BeaconChainTypes> Worker<T> {
        })
    }

+    pub fn send_error_response(
+        &self,
+        peer_id: PeerId,
+        error: RPCResponseErrorCode,
+        reason: String,
+        id: PeerRequestId,
+    ) {
+        self.send_network_message(NetworkMessage::SendErrorResponse {
+            peer_id,
+            error,
+            reason,
+            id,
+        })
+    }
+
    /* Processing functions */

    /// Process a `Status` message to determine if a peer is relevant to us. If the peer is
@@ -163,6 +178,20 @@ impl<T: BeaconChainTypes> Worker<T> {
            .forwards_iter_block_roots(Slot::from(req.start_slot))
        {
            Ok(iter) => iter,
+            Err(BeaconChainError::HistoricalBlockError(
+                HistoricalBlockError::BlockOutOfRange {
+                    slot,
+                    oldest_block_slot,
+                },
+            )) => {
+                debug!(self.log, "Range request failed during backfill"; "requested_slot" => slot, "oldest_known_slot" => oldest_block_slot);
+                return self.send_error_response(
+                    peer_id,
+                    RPCResponseErrorCode::ResourceUnavailable,
+                    "Backfilling".into(),
+                    request_id,
+                );
+            }
            Err(e) => return error!(self.log, "Unable to obtain root iter"; "error" => ?e),
        };

--- a/beacon_node/network/src/beacon_processor/worker/sync_methods.rs
+++ b/beacon_node/network/src/beacon_processor/worker/sync_methods.rs
@@ -2,9 +2,11 @@ use super::{super::work_reprocessing_queue::ReprocessQueueMessage, Worker};
 use crate::beacon_processor::worker::FUTURE_SLOT_TOLERANCE;
 use crate::beacon_processor::BlockResultSender;
 use crate::metrics;
-use crate::sync::manager::SyncMessage;
+use crate::sync::manager::{SyncMessage, SyncRequestType};
 use crate::sync::{BatchProcessResult, ChainId};
-use beacon_chain::{BeaconChainTypes, BlockError, ChainSegmentResult};
+use beacon_chain::{
+    BeaconChainError, BeaconChainTypes, BlockError, ChainSegmentResult, HistoricalBlockError,
+};
 use eth2_libp2p::PeerId;
 use slog::{crit, debug, error, info, trace, warn};
 use tokio::sync::mpsc;
@@ -15,6 +17,8 @@ use types::{Epoch, Hash256, SignedBeaconBlock};
 pub enum ProcessId {
    /// Processing Id of a range syncing batch.
    RangeBatchId(ChainId, Epoch),
+    /// Processing ID for a backfill syncing batch.
+    BackSyncBatchId(Epoch),
    /// Processing Id of the parent lookup of a block.
    ParentLookup(PeerId, Hash256),
 }
@@ -99,11 +103,40 @@ impl<T: BeaconChainTypes> Worker<T> {
                    }
                };

-                self.send_sync_message(SyncMessage::BatchProcessed {
-                    chain_id,
-                    epoch,
-                    result,
-                });
+                let sync_type = SyncRequestType::RangeSync(epoch, chain_id);
+
+                self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result });
+            }
+            // this a request from the Backfill sync
+            ProcessId::BackSyncBatchId(epoch) => {
+                let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64());
+                let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64());
+                let sent_blocks = downloaded_blocks.len();
+
+                let result = match self.process_backfill_blocks(&downloaded_blocks) {
+                    (_, Ok(_)) => {
+                        debug!(self.log, "Backfill batch processed";
+                            "batch_epoch" => epoch,
+                            "first_block_slot" => start_slot,
+                            "last_block_slot" => end_slot,
+                            "processed_blocks" => sent_blocks,
+                            "service"=> "sync");
+                        BatchProcessResult::Success(sent_blocks > 0)
+                    }
+                    (_, Err(e)) => {
+                        debug!(self.log, "Backfill batch processing failed";
+                            "batch_epoch" => epoch,
+                            "first_block_slot" => start_slot,
+                            "last_block_slot" => end_slot,
+                            "error" => e,
+                            "service" => "sync");
+                        BatchProcessResult::Failed(false)
+                    }
+                };
+
+                let sync_type = SyncRequestType::BackFillSync(epoch);
+
+                self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result });
            }
            // this is a parent lookup request from the sync manager
            ProcessId::ParentLookup(peer_id, chain_head) => {
@@ -160,6 +193,80 @@ impl<T: BeaconChainTypes> Worker<T> {
        }
    }

+    /// Helper function to process backfill block batches which only consumes the chain and blocks to process.
+    fn process_backfill_blocks(
+        &self,
+        blocks: &[SignedBeaconBlock<T::EthSpec>],
+    ) -> (usize, Result<(), String>) {
+        match self.chain.import_historical_block_batch(blocks) {
+            Ok(imported_blocks) => {
+                metrics::inc_counter(
+                    &metrics::BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_SUCCESS_TOTAL,
+                );
+
+                (imported_blocks, Ok(()))
+            }
+            Err(error) => {
+                metrics::inc_counter(
+                    &metrics::BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_FAILED_TOTAL,
+                );
+                let err = match error {
+                    // Handle the historical block errors specifically
+                    BeaconChainError::HistoricalBlockError(e) => match e {
+                        HistoricalBlockError::MismatchedBlockRoot {
+                            block_root,
+                            expected_block_root,
+                        } => {
+                            debug!(
+                                self.log,
+                                "Backfill batch processing error";
+                                "error" => "mismatched_block_root",
+                                "block_root" => ?block_root,
+                                "expected_root" => ?expected_block_root
+                            );
+                            String::from("mismatched_block_root")
+                        }
+                        HistoricalBlockError::InvalidSignature
+                        | HistoricalBlockError::SignatureSet(_) => {
+                            warn!(
+                                self.log,
+                                "Backfill batch processing error";
+                                "error" => ?e
+                            );
+                            "invalid_signature".into()
+                        }
+                        HistoricalBlockError::ValidatorPubkeyCacheTimeout => {
+                            warn!(
+                                self.log,
+                                "Backfill batch processing error";
+                                "error" => "pubkey_cache_timeout"
+                            );
+                            "pubkey_cache_timeout".into()
+                        }
+                        HistoricalBlockError::NoAnchorInfo => {
+                            warn!(self.log, "Backfill not required");
+                            String::from("no_anchor_info")
+                        }
+                        HistoricalBlockError::IndexOutOfBounds
+                        | HistoricalBlockError::BlockOutOfRange { .. } => {
+                            error!(
+                                self.log,
+                                "Backfill batch processing error";
+                                "error" => ?e,
+                            );
+                            String::from("logic_error")
+                        }
+                    },
+                    other => {
+                        warn!(self.log, "Backfill batch processing error"; "error" => ?other);
+                        format!("{:?}", other)
+                    }
+                };
+                (0, Err(err))
+            }
+        }
+    }
+
    /// Runs fork-choice on a given chain. This is used during block processing after one successful
    /// block import.
    fn run_fork_choice(&self) {
--- a/beacon_node/network/src/metrics.rs
+++ b/beacon_node/network/src/metrics.rs
@@ -338,10 +338,18 @@ lazy_static! {
        "beacon_processor_chain_segment_success_total",
        "Total number of chain segments successfully processed."
    );
+    pub static ref BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_SUCCESS_TOTAL: Result<IntCounter> = try_create_int_counter(
+        "beacon_processor_backfill_chain_segment_success_total",
+        "Total number of chain segments successfully processed."
+    );
    pub static ref BEACON_PROCESSOR_CHAIN_SEGMENT_FAILED_TOTAL: Result<IntCounter> = try_create_int_counter(
        "beacon_processor_chain_segment_failed_total",
        "Total number of chain segments that failed processing."
    );
+    pub static ref BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_FAILED_TOTAL: Result<IntCounter> = try_create_int_counter(
+        "beacon_processor_backfill_chain_segment_failed_total",
+        "Total number of backfill chain segments that failed processing."
+    );
    // Unaggregated attestations.
    pub static ref BEACON_PROCESSOR_UNAGGREGATED_ATTESTATION_QUEUE_TOTAL: Result<IntGauge> = try_create_int_gauge(
        "beacon_processor_unaggregated_attestation_queue_total",
--- a/beacon_node/network/src/router/processor.rs
+++ b/beacon_node/network/src/router/processor.rs
@@ -418,7 +418,7 @@ impl<T: EthSpec> HandlerNetworkContext<T> {
        error: RPCResponseErrorCode,
        reason: String,
    ) {
-        self.inform_network(NetworkMessage::SendError {
+        self.inform_network(NetworkMessage::SendErrorResponse {
            peer_id,
            error,
            id,
--- a/beacon_node/network/src/service.rs
+++ b/beacon_node/network/src/service.rs
@@ -63,10 +63,8 @@ pub enum NetworkMessage<T: EthSpec> {
        response: Response<T>,
        id: PeerRequestId,
    },
-    /// Respond to a peer's request with an error.
-    SendError {
-        // NOTE: Currently this is never used, we just say goodbye without nicely closing the
-        // stream assigned to the request
+    /// Sends an error response to an RPC request.
+    SendErrorResponse {
        peer_id: PeerId,
        error: RPCResponseErrorCode,
        reason: String,
@@ -386,7 +384,7 @@ fn spawn_service<T: BeaconChainTypes>(
                        NetworkMessage::SendResponse{ peer_id, response, id } => {
                            service.libp2p.send_response(peer_id, id, response);
                        }
-                        NetworkMessage::SendError{ peer_id, error, id, reason } => {
+                        NetworkMessage::SendErrorResponse{ peer_id, error, id, reason } => {
                            service.libp2p.respond_with_error(peer_id, id, error, reason);
                        }
                        NetworkMessage::UPnPMappingEstablished { tcp_socket, udp_socket} => {
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -33,6 +33,7 @@
 //! needs to be searched for (i.e if an attestation references an unknown block) this manager can
 //! search for the block and subsequently search for parents if needed.

+use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart};
 use super::network_context::SyncNetworkContext;
 use super::peer_sync_info::{remote_sync_type, PeerSyncType};
 use super::range_sync::{ChainId, RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
@@ -77,14 +78,14 @@ pub enum SyncMessage<T: EthSpec> {
    /// A useful peer has been discovered.
    AddPeer(PeerId, SyncInfo),

-    /// A `BlocksByRange` response has been received.
+    /// A [`BlocksByRange`] response has been received.
    BlocksByRangeResponse {
        peer_id: PeerId,
        request_id: RequestId,
        beacon_block: Option<Box<SignedBeaconBlock<T>>>,
    },

-    /// A `BlocksByRoot` response has been received.
+    /// A [`BlocksByRoot`] response has been received.
    BlocksByRootResponse {
        peer_id: PeerId,
        request_id: RequestId,
@@ -106,8 +107,7 @@ pub enum SyncMessage<T: EthSpec> {

    /// A batch has been processed by the block processor thread.
    BatchProcessed {
-        chain_id: ChainId,
-        epoch: Epoch,
+        sync_type: SyncRequestType,
        result: BatchProcessResult,
    },

@@ -120,6 +120,15 @@ pub enum SyncMessage<T: EthSpec> {
    },
 }

+/// The type of sync request made
+#[derive(Debug, Clone)]
+pub enum SyncRequestType {
+    /// Request was from the backfill sync algorithm.
+    BackFillSync(Epoch),
+    /// The request was from a chain in the range sync algorithm.
+    RangeSync(Epoch, ChainId),
+}
+
 /// The result of processing a multiple blocks (a chain segment).
 #[derive(Debug)]
 pub enum BatchProcessResult {
@@ -166,6 +175,9 @@ pub struct SyncManager<T: BeaconChainTypes> {
    /// The object handling long-range batch load-balanced syncing.
    range_sync: RangeSync<T>,

+    /// Backfill syncing.
+    backfill_sync: BackFillSync<T>,
+
    /// A collection of parent block lookups.
    parent_queue: SmallVec<[ParentRequests<T::EthSpec>; 3]>,

@@ -227,6 +239,12 @@ pub fn spawn<T: BeaconChainTypes>(
            beacon_processor_send.clone(),
            log.clone(),
        ),
+        backfill_sync: BackFillSync::new(
+            beacon_chain.clone(),
+            network_globals.clone(),
+            beacon_processor_send.clone(),
+            log.clone(),
+        ),
        network: SyncNetworkContext::new(network_send, network_globals.clone(), log.clone()),
        chain: beacon_chain,
        network_globals,
@@ -576,6 +594,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        }
    }

+    /// Handles RPC errors related to requests that were emitted from the sync manager.
    fn inject_error(&mut self, peer_id: PeerId, request_id: RequestId) {
        trace!(self.log, "Sync manager received a failed RPC");
        // remove any single block lookups
@@ -597,14 +616,16 @@ impl<T: BeaconChainTypes> SyncManager<T> {
            return;
        }

-        // otherwise, this is a range sync issue, notify the range sync
-        self.range_sync
-            .inject_error(&mut self.network, peer_id, request_id);
-        self.update_sync_state();
+        // Otherwise this error matches no known request.
+        trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
    }

    fn peer_disconnect(&mut self, peer_id: &PeerId) {
        self.range_sync.peer_disconnect(&mut self.network, peer_id);
+        // Regardless of the outcome, we update the sync status.
+        let _ = self
+            .backfill_sync
+            .peer_disconnected(peer_id, &mut self.network);
        self.update_sync_state();
    }

@@ -624,12 +645,18 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        if let Some(peer_info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
            let new_state = sync_type.as_sync_status(remote_sync_info);
            let rpr = new_state.as_str();
-            let was_updated = peer_info.sync_status.update(new_state);
+            let was_updated = peer_info.sync_status.update(new_state.clone());
            if was_updated {
                debug!(self.log, "Peer transitioned sync state"; "peer_id" => %peer_id, "new_state" => rpr,
                    "our_head_slot" => local_sync_info.head_slot, "out_finalized_epoch" => local_sync_info.finalized_epoch,
                    "their_head_slot" => remote_sync_info.head_slot, "their_finalized_epoch" => remote_sync_info.finalized_epoch,
                    "is_connected" => peer_info.is_connected());
+
+                // A peer has transitioned its sync state. If the new state is "synced" we
+                // inform the backfill sync that a new synced peer has joined us.
+                if new_state.is_synced() {
+                    self.backfill_sync.fully_synced_peer_joined();
+                }
            }
            peer_info.is_connected()
        } else {
@@ -638,7 +665,17 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        }
    }

-    /// Updates the global sync state and logs any changes.
+    /// Updates the global sync state, optionally instigating or pausing a backfill sync as well as
+    /// logging any changes.
+    ///
+    /// The logic for which sync should be running is as follows:
+    /// - If there is a range-sync running (or required) pause any backfill and let range-sync
+    /// complete.
+    /// - If there is no current range sync, check for any requirement to backfill and either
+    /// start/resume a backfill sync if required. The global state will be BackFillSync if a
+    /// backfill sync is running.
+    /// - If there is no range sync and no required backfill and we have synced up to the currently
+    /// known peers, we consider ourselves synced.
    fn update_sync_state(&mut self) {
        let new_state: SyncState = match self.range_sync.state() {
            Err(e) => {
@@ -647,41 +684,75 @@ impl<T: BeaconChainTypes> SyncManager<T> {
            }
            Ok(state) => match state {
                None => {
-                    // no range sync, decide if we are stalled or synced.
+                    // No range sync, so we decide if we are stalled or synced.
                    // For this we check if there is at least one advanced peer. An advanced peer
                    // with Idle range is possible since a peer's status is updated periodically.
                    // If we synced a peer between status messages, most likely the peer has
                    // advanced and will produce a head chain on re-status. Otherwise it will shift
                    // to being synced
-                    let head = self.chain.best_slot().unwrap_or_else(|_| Slot::new(0));
-                    let current_slot = self.chain.slot().unwrap_or_else(|_| Slot::new(0));
+                    let mut sync_state = {
+                        let head = self.chain.best_slot().unwrap_or_else(|_| Slot::new(0));
+                        let current_slot = self.chain.slot().unwrap_or_else(|_| Slot::new(0));

-                    let peers = self.network_globals.peers.read();
-                    if current_slot >= head
-                        && current_slot.sub(head) <= (SLOT_IMPORT_TOLERANCE as u64)
-                        && head > 0
-                    {
-                        SyncState::Synced
-                    } else if peers.advanced_peers().next().is_some() {
-                        SyncState::SyncTransition
-                    } else if peers.synced_peers().next().is_none() {
-                        SyncState::Stalled
-                    } else {
-                        // There are no peers that require syncing and we have at least one synced
-                        // peer
-                        SyncState::Synced
+                        let peers = self.network_globals.peers.read();
+                        if current_slot >= head
+                            && current_slot.sub(head) <= (SLOT_IMPORT_TOLERANCE as u64)
+                            && head > 0
+                        {
+                            SyncState::Synced
+                        } else if peers.advanced_peers().next().is_some() {
+                            SyncState::SyncTransition
+                        } else if peers.synced_peers().next().is_none() {
+                            SyncState::Stalled
+                        } else {
+                            // There are no peers that require syncing and we have at least one synced
+                            // peer
+                            SyncState::Synced
+                        }
+                    };
+
+                    // If we would otherwise be synced, first check if we need to perform or
+                    // complete a backfill sync.
+                    if matches!(sync_state, SyncState::Synced) {
+                        // Determine if we need to start/resume/restart a backfill sync.
+                        match self.backfill_sync.start(&mut self.network) {
+                            Ok(SyncStart::Syncing {
+                                completed,
+                                remaining,
+                            }) => {
+                                sync_state = SyncState::BackFillSyncing {
+                                    completed,
+                                    remaining,
+                                };
+                            }
+                            Ok(SyncStart::NotSyncing) => {} // Ignore updating the state if the backfill sync state didn't start.
+                            Err(e) => {
+                                error!(self.log, "Backfill sync failed to start"; "error" => ?e);
+                            }
+                        }
                    }
+
+                    // Return the sync state if backfilling is not required.
+                    sync_state
                }
                Some((RangeSyncType::Finalized, start_slot, target_slot)) => {
+                    // If there is a backfill sync in progress pause it.
+                    self.backfill_sync.pause();
+
                    SyncState::SyncingFinalized {
                        start_slot,
                        target_slot,
                    }
                }
-                Some((RangeSyncType::Head, start_slot, target_slot)) => SyncState::SyncingHead {
-                    start_slot,
-                    target_slot,
-                },
+                Some((RangeSyncType::Head, start_slot, target_slot)) => {
+                    // If there is a backfill sync in progress pause it.
+                    self.backfill_sync.pause();
+
+                    SyncState::SyncingHead {
+                        start_slot,
+                        target_slot,
+                    }
+                }
            },
        };

@@ -690,7 +761,14 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        if !new_state.eq(&old_state) {
            info!(self.log, "Sync state updated"; "old_state" => %old_state, "new_state" => %new_state);
            // If we have become synced - Subscribe to all the core subnet topics
-            if new_state.is_synced() {
+            // We don't need to subscribe if the old state is a state that would have already
+            // invoked this call.
+            if new_state.is_synced()
+                && !matches!(
+                    old_state,
+                    SyncState::Synced { .. } | SyncState::BackFillSyncing { .. }
+                )
+            {
                self.network.subscribe_core_topics();
            }
        }
@@ -828,14 +906,13 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                // peer. We don't consider this chain a failure and prevent retries with another
                // peer.
                "too many failed attempts"
-            } else {
-                if !parent_request.downloaded_blocks.is_empty() {
-                    self.failed_chains
-                        .insert(parent_request.downloaded_blocks[0].canonical_root());
-                } else {
-                    crit!(self.log, "Parent lookup has no blocks");
-                }
+            } else if !parent_request.downloaded_blocks.is_empty() {
+                self.failed_chains
+                    .insert(parent_request.downloaded_blocks[0].canonical_root());
                "reached maximum lookup-depth"
+            } else {
+                crit!(self.log, "Parent lookup has no blocks");
+                "no blocks"
            };

            debug!(self.log, "Parent import failed";
@@ -887,13 +964,44 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                        request_id,
                        beacon_block,
                    } => {
-                        self.range_sync.blocks_by_range_response(
-                            &mut self.network,
-                            peer_id,
-                            request_id,
-                            beacon_block.map(|b| *b),
-                        );
-                        self.update_sync_state();
+                        let beacon_block = beacon_block.map(|b| *b);
+                        // Obtain which sync requested these blocks and divert accordingly.
+                        match self
+                            .network
+                            .blocks_by_range_response(request_id, beacon_block.is_none())
+                        {
+                            Some(SyncRequestType::RangeSync(batch_id, chain_id)) => {
+                                self.range_sync.blocks_by_range_response(
+                                    &mut self.network,
+                                    peer_id,
+                                    chain_id,
+                                    batch_id,
+                                    request_id,
+                                    beacon_block,
+                                );
+                                self.update_sync_state();
+                            }
+                            Some(SyncRequestType::BackFillSync(batch_id)) => {
+                                match self.backfill_sync.on_block_response(
+                                    &mut self.network,
+                                    batch_id,
+                                    &peer_id,
+                                    request_id,
+                                    beacon_block,
+                                ) {
+                                    Ok(ProcessResult::SyncCompleted) => self.update_sync_state(),
+                                    Ok(ProcessResult::Successful) => {}
+                                    Err(_error) => {
+                                        // The backfill sync has failed, errors are reported
+                                        // within.
+                                        self.update_sync_state();
+                                    }
+                                }
+                            }
+                            None => {
+                                trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
+                            }
+                        }
                    }
                    SyncMessage::BlocksByRootResponse {
                        peer_id,
@@ -913,21 +1021,63 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                        self.peer_disconnect(&peer_id);
                    }
                    SyncMessage::RPCError(peer_id, request_id) => {
-                        self.inject_error(peer_id, request_id);
-                    }
-                    SyncMessage::BatchProcessed {
-                        chain_id,
-                        epoch,
-                        result,
-                    } => {
-                        self.range_sync.handle_block_process_result(
-                            &mut self.network,
-                            chain_id,
-                            epoch,
-                            result,
-                        );
-                        self.update_sync_state();
+                        // Redirect to a sync mechanism if the error is related to one of their
+                        // requests.
+                        match self.network.blocks_by_range_response(request_id, true) {
+                            Some(SyncRequestType::RangeSync(batch_id, chain_id)) => {
+                                self.range_sync.inject_error(
+                                    &mut self.network,
+                                    peer_id,
+                                    batch_id,
+                                    chain_id,
+                                    request_id,
+                                );
+                                self.update_sync_state();
+                            }
+                            Some(SyncRequestType::BackFillSync(batch_id)) => {
+                                match self.backfill_sync.inject_error(
+                                    &mut self.network,
+                                    batch_id,
+                                    &peer_id,
+                                    request_id,
+                                ) {
+                                    Ok(_) => {}
+                                    Err(_) => self.update_sync_state(),
+                                }
+                            }
+                            None => {
+                                // This is a request not belonging to a sync algorithm.
+                                // Process internally.
+                                self.inject_error(peer_id, request_id);
+                            }
+                        }
                    }
+                    SyncMessage::BatchProcessed { sync_type, result } => match sync_type {
+                        SyncRequestType::RangeSync(epoch, chain_id) => {
+                            self.range_sync.handle_block_process_result(
+                                &mut self.network,
+                                chain_id,
+                                epoch,
+                                result,
+                            );
+                            self.update_sync_state();
+                        }
+                        SyncRequestType::BackFillSync(epoch) => {
+                            match self.backfill_sync.on_batch_process_result(
+                                &mut self.network,
+                                epoch,
+                                &result,
+                            ) {
+                                Ok(ProcessResult::Successful) => {}
+                                Ok(ProcessResult::SyncCompleted) => self.update_sync_state(),
+                                Err(error) => {
+                                    error!(self.log, "Backfill sync failed"; "error" => ?error);
+                                    // Update the global status
+                                    self.update_sync_state();
+                                }
+                            }
+                        }
+                    },
                    SyncMessage::ParentLookupFailed {
                        chain_head,
                        peer_id,
--- a/beacon_node/network/src/sync/mod.rs
+++ b/beacon_node/network/src/sync/mod.rs
@@ -1,6 +1,7 @@
 //! Syncing for lighthouse.
 //!
 //! Stores the various syncing methods for the beacon chain.
+mod backfill_sync;
 pub mod manager;
 mod network_context;
 mod peer_sync_info;
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -1,6 +1,7 @@
 //! Provides network functionality for the Syncing thread. This fundamentally wraps a network
 //! channel and stores a global RPC ID to perform requests.

+use super::manager::SyncRequestType;
 use super::range_sync::{BatchId, ChainId};
 use super::RequestId as SyncRequestId;
 use crate::service::NetworkMessage;
@@ -26,8 +27,8 @@ pub struct SyncNetworkContext<T: EthSpec> {
    /// A sequential ID for all RPC requests.
    request_id: SyncRequestId,

-    /// BlocksByRange requests made by range syncing chains.
-    range_requests: FnvHashMap<SyncRequestId, (ChainId, BatchId)>,
+    /// BlocksByRange requests made by syncing algorithms.
+    range_requests: FnvHashMap<SyncRequestId, SyncRequestType>,

    /// Logger for the `SyncNetworkContext`.
    log: slog::Logger,
@@ -81,6 +82,7 @@ impl<T: EthSpec> SyncNetworkContext<T> {
        }
    }

+    /// A blocks by range request for the range sync algorithm.
    pub fn blocks_by_range_request(
        &mut self,
        peer_id: PeerId,
@@ -96,15 +98,37 @@ impl<T: EthSpec> SyncNetworkContext<T> {
            "peer" => %peer_id,
        );
        let req_id = self.send_rpc_request(peer_id, Request::BlocksByRange(request))?;
-        self.range_requests.insert(req_id, (chain_id, batch_id));
+        self.range_requests
+            .insert(req_id, SyncRequestType::RangeSync(batch_id, chain_id));
        Ok(req_id)
    }

+    /// A blocks by range request sent by the backfill sync algorithm
+    pub fn backfill_blocks_by_range_request(
+        &mut self,
+        peer_id: PeerId,
+        request: BlocksByRangeRequest,
+        batch_id: BatchId,
+    ) -> Result<SyncRequestId, &'static str> {
+        trace!(
+            self.log,
+            "Sending backfill BlocksByRange Request";
+            "method" => "BlocksByRange",
+            "count" => request.count,
+            "peer" => %peer_id,
+        );
+        let req_id = self.send_rpc_request(peer_id, Request::BlocksByRange(request))?;
+        self.range_requests
+            .insert(req_id, SyncRequestType::BackFillSync(batch_id));
+        Ok(req_id)
+    }
+
+    /// Received a blocks by range response.
    pub fn blocks_by_range_response(
        &mut self,
        request_id: usize,
        remove: bool,
-    ) -> Option<(ChainId, BatchId)> {
+    ) -> Option<SyncRequestType> {
        // NOTE: we can't guarantee that the request must be registered as it could receive more
        // than an error, and be removed after receiving the first one.
        // FIXME: https://github.com/sigp/lighthouse/issues/1634
@@ -115,6 +139,7 @@ impl<T: EthSpec> SyncNetworkContext<T> {
        }
    }

+    /// Sends a blocks by root request.
    pub fn blocks_by_root_request(
        &mut self,
        peer_id: PeerId,
@@ -130,6 +155,7 @@ impl<T: EthSpec> SyncNetworkContext<T> {
        self.send_rpc_request(peer_id, Request::BlocksByRoot(request))
    }

+    /// Terminates the connection with the peer and bans them.
    pub fn goodbye_peer(&mut self, peer_id: PeerId, reason: GoodbyeReason) {
        self.network_send
            .send(NetworkMessage::GoodbyePeer {
@@ -142,6 +168,7 @@ impl<T: EthSpec> SyncNetworkContext<T> {
            });
    }

+    /// Reports to the scoring algorithm the behaviour of a peer.
    pub fn report_peer(&mut self, peer_id: PeerId, action: PeerAction) {
        debug!(self.log, "Sync reporting peer"; "peer_id" => %peer_id, "action" => %action);
        self.network_send
@@ -155,7 +182,8 @@ impl<T: EthSpec> SyncNetworkContext<T> {
            });
    }

-    pub fn send_rpc_request(
+    /// Sends an RPC request.
+    fn send_rpc_request(
        &mut self,
        peer_id: PeerId,
        request: Request,
@@ -170,6 +198,7 @@ impl<T: EthSpec> SyncNetworkContext<T> {
        Ok(request_id)
    }

+    /// Subscribes to core topics.
    pub fn subscribe_core_topics(&mut self) {
        self.network_send
            .send(NetworkMessage::SubscribeCoreTopics)
@@ -178,6 +207,7 @@ impl<T: EthSpec> SyncNetworkContext<T> {
            });
    }

+    /// Sends an arbitrary network message.
    fn send_network_msg(&mut self, msg: NetworkMessage<T>) -> Result<(), &'static str> {
        self.network_send.send(msg).map_err(|_| {
            debug!(self.log, "Could not send message to the network service");
--- a/beacon_node/network/src/sync/range_sync/batch.rs
+++ b/beacon_node/network/src/sync/range_sync/batch.rs
@@ -14,15 +14,34 @@ const MAX_BATCH_DOWNLOAD_ATTEMPTS: u8 = 5;
 /// after `MAX_BATCH_PROCESSING_ATTEMPTS` times, it is considered faulty.
 const MAX_BATCH_PROCESSING_ATTEMPTS: u8 = 3;

+/// Allows customisation of the above constants used in other sync methods such as BackFillSync.
+pub trait BatchConfig {
+    /// The maximum batch download attempts.
+    fn max_batch_download_attempts() -> u8;
+    /// The max batch processing attempts.
+    fn max_batch_processing_attempts() -> u8;
+}
+
+pub struct RangeSyncBatchConfig {}
+
+impl BatchConfig for RangeSyncBatchConfig {
+    fn max_batch_download_attempts() -> u8 {
+        MAX_BATCH_DOWNLOAD_ATTEMPTS
+    }
+    fn max_batch_processing_attempts() -> u8 {
+        MAX_BATCH_PROCESSING_ATTEMPTS
+    }
+}
+
 /// Error type of a batch in a wrong state.
 // Such errors should never be encountered.
-pub struct WrongState(pub(super) String);
+pub struct WrongState(pub(crate) String);

 /// Auxiliary type alias for readability.
 type IsFailed = bool;

 /// A segment of a chain.
-pub struct BatchInfo<T: EthSpec> {
+pub struct BatchInfo<T: EthSpec, B: BatchConfig = RangeSyncBatchConfig> {
    /// Start slot of the batch.
    start_slot: Slot,
    /// End slot of the batch.
@@ -33,6 +52,8 @@ pub struct BatchInfo<T: EthSpec> {
    failed_download_attempts: Vec<PeerId>,
    /// State of the batch.
    state: BatchState<T>,
+    /// Pin the generic
+    marker: std::marker::PhantomData<B>,
 }

 /// Current state of a batch
@@ -73,7 +94,7 @@ impl<T: EthSpec> BatchState<T> {
    }
 }

-impl<T: EthSpec> BatchInfo<T> {
+impl<T: EthSpec, B: BatchConfig> BatchInfo<T, B> {
    /// Batches are downloaded excluding the first block of the epoch assuming it has already been
    /// downloaded.
    ///
@@ -91,6 +112,7 @@ impl<T: EthSpec> BatchInfo<T> {
            failed_processing_attempts: Vec::new(),
            failed_download_attempts: Vec::new(),
            state: BatchState::AwaitingDownload,
+            marker: std::marker::PhantomData,
        }
    }

@@ -120,6 +142,7 @@ impl<T: EthSpec> BatchInfo<T> {
        false
    }

+    /// Returns the peer that is currently responsible for progressing the state of the batch.
    pub fn current_peer(&self) -> Option<&PeerId> {
        match &self.state {
            BatchState::AwaitingDownload | BatchState::Failed => None,
@@ -131,6 +154,7 @@ impl<T: EthSpec> BatchInfo<T> {
        }
    }

+    /// Returns a BlocksByRange request associated with the batch.
    pub fn to_blocks_by_range_request(&self) -> BlocksByRangeRequest {
        BlocksByRangeRequest {
            start_slot: self.start_slot.into(),
@@ -192,7 +216,7 @@ impl<T: EthSpec> BatchInfo<T> {
                        // can be tried again
                        self.failed_download_attempts.push(peer);
                        self.state = if self.failed_download_attempts.len()
-                            >= MAX_BATCH_DOWNLOAD_ATTEMPTS as usize
+                            >= B::max_batch_download_attempts() as usize
                        {
                            BatchState::Failed
                        } else {
@@ -219,14 +243,21 @@ impl<T: EthSpec> BatchInfo<T> {
        }
    }

+    /// Mark the batch as failed and return whether we can attempt a re-download.
+    ///
+    /// This can happen if a peer disconnects or some error occurred that was not the peers fault.
+    /// THe `mark_failed` parameter, when set to false, does not increment the failed attempts of
+    /// this batch and register the peer, rather attempts a re-download.
    #[must_use = "Batch may have failed"]
-    pub fn download_failed(&mut self) -> Result<IsFailed, WrongState> {
+    pub fn download_failed(&mut self, mark_failed: bool) -> Result<IsFailed, WrongState> {
        match self.state.poison() {
            BatchState::Downloading(peer, _, _request_id) => {
                // register the attempt and check if the batch can be tried again
-                self.failed_download_attempts.push(peer);
+                if mark_failed {
+                    self.failed_download_attempts.push(peer);
+                }
                self.state = if self.failed_download_attempts.len()
-                    >= MAX_BATCH_DOWNLOAD_ATTEMPTS as usize
+                    >= B::max_batch_download_attempts as usize
                {
                    BatchState::Failed
                } else {
@@ -294,7 +325,7 @@ impl<T: EthSpec> BatchInfo<T> {

                    // check if the batch can be downloaded again
                    if self.failed_processing_attempts.len()
-                        >= MAX_BATCH_PROCESSING_ATTEMPTS as usize
+                        >= B::max_batch_processing_attempts() as usize
                    {
                        BatchState::Failed
                    } else {
@@ -324,7 +355,7 @@ impl<T: EthSpec> BatchInfo<T> {

                // check if the batch can be downloaded again
                self.state = if self.failed_processing_attempts.len()
-                    >= MAX_BATCH_PROCESSING_ATTEMPTS as usize
+                    >= B::max_batch_processing_attempts() as usize
                {
                    BatchState::Failed
                } else {
@@ -365,7 +396,7 @@ impl Attempt {
    }
 }

-impl<T: EthSpec> slog::KV for &mut BatchInfo<T> {
+impl<T: EthSpec, B: BatchConfig> slog::KV for &mut BatchInfo<T, B> {
    fn serialize(
        &self,
        record: &slog::Record,
@@ -375,7 +406,7 @@ impl<T: EthSpec> slog::KV for &mut BatchInfo<T> {
    }
 }

-impl<T: EthSpec> slog::KV for BatchInfo<T> {
+impl<T: EthSpec, B: BatchConfig> slog::KV for BatchInfo<T, B> {
    fn serialize(
        &self,
        record: &slog::Record,
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -181,7 +181,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
            // fail the batches
            for id in batch_ids {
                if let Some(batch) = self.batches.get_mut(&id) {
-                    if batch.download_failed()? {
+                    if batch.download_failed(true)? {
                        return Err(RemoveChain::ChainFailed(id));
                    }
                    self.retry_batch_download(network, id)?;
@@ -273,7 +273,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        }
    }

-    /// Sends to process the batch with the given id.
+    /// Processes the batch with the given id.
    /// The batch must exist and be ready for processing
    fn process_batch(
        &mut self,
@@ -794,7 +794,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
            if let Some(active_requests) = self.peers.get_mut(peer_id) {
                active_requests.remove(&batch_id);
            }
-            if batch.download_failed()? {
+            if batch.download_failed(true)? {
                return Err(RemoveChain::ChainFailed(batch_id));
            }
            self.retry_batch_download(network, batch_id)
@@ -837,7 +837,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        }
    }

-    /// Requests the batch asigned to the given id from a given peer.
+    /// Requests the batch assigned to the given id from a given peer.
    pub fn send_batch(
        &mut self,
        network: &mut SyncNetworkContext<T::EthSpec>,
@@ -883,7 +883,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                    self.peers
                        .get_mut(&peer)
                        .map(|request| request.remove(&batch_id));
-                    if batch.download_failed()? {
+                    if batch.download_failed(true)? {
                        return Err(RemoveChain::ChainFailed(batch_id));
                    } else {
                        return self.retry_batch_download(network, batch_id);
@@ -990,7 +990,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        // this batch could have been included already being an optimistic batch
        match self.batches.entry(batch_id) {
            Entry::Occupied(_) => {
-                // this batch doesn't need downlading, let this same function decide the next batch
+                // this batch doesn't need downloading, let this same function decide the next batch
                self.to_be_downloaded += EPOCHS_PER_BATCH;
                self.include_next_batch()
            }
--- a/beacon_node/network/src/sync/range_sync/mod.rs
+++ b/beacon_node/network/src/sync/range_sync/mod.rs
@@ -7,7 +7,7 @@ mod chain_collection;
 mod range;
 mod sync_type;

-pub use batch::BatchInfo;
+pub use batch::{BatchConfig, BatchInfo, BatchState};
 pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
 pub use range::RangeSync;
 pub use sync_type::RangeSyncType;
--- a/beacon_node/network/src/sync/range_sync/range.rs
+++ b/beacon_node/network/src/sync/range_sync/range.rs
@@ -39,7 +39,7 @@
 //!  Each chain is downloaded in batches of blocks. The batched blocks are processed sequentially
 //!  and further batches are requested as current blocks are being processed.

-use super::chain::{ChainId, RemoveChain, SyncingChain};
+use super::chain::{BatchId, ChainId, RemoveChain, SyncingChain};
 use super::chain_collection::ChainCollection;
 use super::sync_type::RangeSyncType;
 use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
@@ -194,34 +194,29 @@ impl<T: BeaconChainTypes> RangeSync<T> {
        &mut self,
        network: &mut SyncNetworkContext<T::EthSpec>,
        peer_id: PeerId,
+        chain_id: ChainId,
+        batch_id: BatchId,
        request_id: RequestId,
        beacon_block: Option<SignedBeaconBlock<T::EthSpec>>,
    ) {
-        // get the chain and batch for which this response belongs
-        if let Some((chain_id, batch_id)) =
-            network.blocks_by_range_response(request_id, beacon_block.is_none())
-        {
-            // check if this chunk removes the chain
-            match self.chains.call_by_id(chain_id, |chain| {
-                chain.on_block_response(network, batch_id, &peer_id, request_id, beacon_block)
-            }) {
-                Ok((removed_chain, sync_type)) => {
-                    if let Some((removed_chain, remove_reason)) = removed_chain {
-                        self.on_chain_removed(
-                            removed_chain,
-                            sync_type,
-                            remove_reason,
-                            network,
-                            "block response",
-                        );
-                    }
-                }
-                Err(_) => {
-                    trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
+        // check if this chunk removes the chain
+        match self.chains.call_by_id(chain_id, |chain| {
+            chain.on_block_response(network, batch_id, &peer_id, request_id, beacon_block)
+        }) {
+            Ok((removed_chain, sync_type)) => {
+                if let Some((removed_chain, remove_reason)) = removed_chain {
+                    self.on_chain_removed(
+                        removed_chain,
+                        sync_type,
+                        remove_reason,
+                        network,
+                        "block response",
+                    );
                }
            }
-        } else {
-            trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
+            Err(_) => {
+                trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
+            }
        }
    }

@@ -298,31 +293,28 @@ impl<T: BeaconChainTypes> RangeSync<T> {
        &mut self,
        network: &mut SyncNetworkContext<T::EthSpec>,
        peer_id: PeerId,
+        batch_id: BatchId,
+        chain_id: ChainId,
        request_id: RequestId,
    ) {
-        // get the chain and batch for which this response belongs
-        if let Some((chain_id, batch_id)) = network.blocks_by_range_response(request_id, true) {
-            // check that this request is pending
-            match self.chains.call_by_id(chain_id, |chain| {
-                chain.inject_error(network, batch_id, &peer_id, request_id)
-            }) {
-                Ok((removed_chain, sync_type)) => {
-                    if let Some((removed_chain, remove_reason)) = removed_chain {
-                        self.on_chain_removed(
-                            removed_chain,
-                            sync_type,
-                            remove_reason,
-                            network,
-                            "RPC error",
-                        );
-                    }
-                }
-                Err(_) => {
-                    trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
+        // check that this request is pending
+        match self.chains.call_by_id(chain_id, |chain| {
+            chain.inject_error(network, batch_id, &peer_id, request_id)
+        }) {
+            Ok((removed_chain, sync_type)) => {
+                if let Some((removed_chain, remove_reason)) = removed_chain {
+                    self.on_chain_removed(
+                        removed_chain,
+                        sync_type,
+                        remove_reason,
+                        network,
+                        "RPC error",
+                    );
                }
            }
-        } else {
-            trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
+            Err(_) => {
+                trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
+            }
        }
    }