lighthouse/beacon_node/network/src/sync/manager.rs

//! The `SyncManager` facilities the block syncing logic of lighthouse. The current networking
//! specification provides two methods from which to obtain blocks from peers. The `BlocksByRange`
//! request and the `BlocksByRoot` request. The former is used to obtain a large number of
//! blocks and the latter allows for searching for blocks given a block-hash.
//!
//! These two RPC methods are designed for two type of syncing.
//! - Long range (batch) sync, when a client is out of date and needs to the latest head.
//! - Parent lookup - when a peer provides us a block whose parent is unknown to us.
//!
//! Both of these syncing strategies are built into the `SyncManager`.
//!
//! Currently the long-range (batch) syncing method functions by opportunistically downloading
//! batches blocks from all peers who know about a chain that we do not. When a new peer connects
//! which has a later head that is greater than `SLOT_IMPORT_TOLERANCE` from our current head slot,
//! the manager's state becomes `Syncing` and begins a batch syncing process with this peer. If
//! further peers connect, this process is run in parallel with those peers, until our head is
//! within `SLOT_IMPORT_TOLERANCE` of all connected peers.
//!
//! ## Batch Syncing
//!
//! See `RangeSync` for further details.
//!
//! ## Parent Lookup
//!
//! When a block with an unknown parent is received and we are in `Regular` sync mode, the block is
//! queued for lookup. A round-robin approach is used to request the parent from the known list of
//! fully sync'd peers. If `PARENT_FAIL_TOLERANCE` attempts at requesting the block fails, we
//! drop the propagated block and downvote the peer that sent it to us.
//!
//! Block Lookup
//!
//! To keep the logic maintained to the syncing thread (and manage the request_ids), when a block
//! needs to be searched for (i.e if an attestation references an unknown block) this manager can
//! search for the block and subsequently search for parents if needed.

use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart};
use super::block_lookups::BlockLookups;
use super::network_context::{
    CustodyByRootResult, RangeBlockComponent, RangeRequestId, RpcEvent, SyncNetworkContext,
};
use super::peer_sync_info::{remote_sync_type, PeerSyncType};
use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor};
use crate::service::NetworkMessage;
use crate::status::ToStatusMessage;
use crate::sync::block_lookups::{
    BlobRequestState, BlockComponent, BlockRequestState, CustodyRequestState, DownloadResult,
};
use crate::sync::network_context::PeerGroup;
use beacon_chain::block_verification_types::AsBlock;
use beacon_chain::validator_monitor::timestamp_now;
use beacon_chain::{
    AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError, EngineState,
};
use futures::StreamExt;
use lighthouse_network::rpc::RPCError;
use lighthouse_network::service::api_types::{
    BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId, CustodyRequester,
    DataColumnsByRangeRequestId, DataColumnsByRootRequestId, DataColumnsByRootRequester, Id,
    SingleLookupReqId, SyncRequestId,
};
use lighthouse_network::types::{NetworkGlobals, SyncState};
use lighthouse_network::SyncInfo;
use lighthouse_network::{PeerAction, PeerId};
use logging::crit;
use lru_cache::LRUTimeCache;
use std::ops::Sub;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc;
use tracing::{debug, error, info, info_span, trace, Instrument};
use types::{
    BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, Hash256, SignedBeaconBlock, Slot,
};

/// The number of slots ahead of us that is allowed before requesting a long-range (batch)  Sync
/// from a peer. If a peer is within this tolerance (forwards or backwards), it is treated as a
/// fully sync'd peer.
///
/// This means that we consider ourselves synced (and hence subscribe to all subnets and block
/// gossip if no peers are further than this range ahead of us that we have not already downloaded
/// blocks for.
pub const SLOT_IMPORT_TOLERANCE: usize = 32;

/// Suppress duplicated `UnknownBlockHashFromAttestation` events for some duration of time. In
/// practice peers are likely to send the same root during a single slot. 30 seconds is a rather
/// arbitrary number that covers a full slot, but allows recovery if sync get stuck for a few slots.
const NOTIFIED_UNKNOWN_ROOT_EXPIRY_SECONDS: u64 = 30;

#[derive(Debug)]
/// A message that can be sent to the sync manager thread.
pub enum SyncMessage<E: EthSpec> {
    /// A useful peer has been discovered.
    AddPeer(PeerId, SyncInfo),

    /// Force trigger range sync for a set of peers given a head they claim to have imported. Used
    /// by block lookup to trigger range sync if a parent chain grows too large.
    AddPeersForceRangeSync {
        peers: Vec<PeerId>,
        head_root: Hash256,
        /// Sync lookup may not know the Slot of this head. However this situation is very rare.
        head_slot: Option<Slot>,
    },

    /// Peer manager has received a MetaData of a peer with a new or updated CGC value.
    UpdatedPeerCgc(PeerId),

    /// A block has been received from the RPC.
    RpcBlock {
        sync_request_id: SyncRequestId,
        peer_id: PeerId,
        beacon_block: Option<Arc<SignedBeaconBlock<E>>>,
        seen_timestamp: Duration,
    },

    /// A blob has been received from the RPC.
    RpcBlob {
        sync_request_id: SyncRequestId,
        peer_id: PeerId,
        blob_sidecar: Option<Arc<BlobSidecar<E>>>,
        seen_timestamp: Duration,
    },

    /// A data columns has been received from the RPC
    RpcDataColumn {
        sync_request_id: SyncRequestId,
        peer_id: PeerId,
        data_column: Option<Arc<DataColumnSidecar<E>>>,
        seen_timestamp: Duration,
    },

    /// A block with an unknown parent has been received.
    UnknownParentBlock(PeerId, Arc<SignedBeaconBlock<E>>, Hash256),

    /// A blob with an unknown parent has been received.
    UnknownParentBlob(PeerId, Arc<BlobSidecar<E>>),

    /// A data column with an unknown parent has been received.
    UnknownParentDataColumn(PeerId, Arc<DataColumnSidecar<E>>),

    /// A peer has sent an attestation that references a block that is unknown. This triggers the
    /// manager to attempt to find the block matching the unknown hash.
    UnknownBlockHashFromAttestation(PeerId, Hash256),

    /// A peer has disconnected.
    Disconnect(PeerId),

    /// An RPC Error has occurred on a request.
    RpcError {
        peer_id: PeerId,
        sync_request_id: SyncRequestId,
        error: RPCError,
    },

    /// A batch has been processed by the block processor thread.
    BatchProcessed {
        sync_type: ChainSegmentProcessId,
        result: BatchProcessResult,
    },

    /// Block processed
    BlockComponentProcessed {
        process_type: BlockProcessType,
        result: BlockProcessingResult,
    },

    /// A block from gossip has completed processing,
    GossipBlockProcessResult { block_root: Hash256, imported: bool },
}

/// The type of processing specified for a received block.
#[derive(Debug, Clone)]
pub enum BlockProcessType {
    SingleBlock { id: Id },
    SingleBlob { id: Id },
    SingleCustodyColumn(Id),
}

impl BlockProcessType {
    pub fn id(&self) -> Id {
        match self {
            BlockProcessType::SingleBlock { id }
            | BlockProcessType::SingleBlob { id }
            | BlockProcessType::SingleCustodyColumn(id) => *id,
        }
    }
}

#[derive(Debug)]
pub enum BlockProcessingResult {
    Ok(AvailabilityProcessingStatus),
    Err(BlockError),
    Ignored,
}

/// The result of processing multiple blocks (a chain segment).
#[derive(Debug)]
pub enum BatchProcessResult {
    /// The batch was completed successfully. It carries whether the sent batch contained blocks.
    Success {
        sent_blocks: usize,
        imported_blocks: usize,
    },
    /// The batch processing failed. It carries whether the processing imported any block.
    FaultyFailure {
        imported_blocks: usize,
        penalty: PeerAction,
    },
    NonFaultyFailure,
}

/// The primary object for handling and driving all the current syncing logic. It maintains the
/// current state of the syncing process, the number of useful peers, downloaded blocks and
/// controls the logic behind both the long-range (batch) sync and the on-going potential parent
/// look-up of blocks.
pub struct SyncManager<T: BeaconChainTypes> {
    /// A reference to the underlying beacon chain.
    chain: Arc<BeaconChain<T>>,

    /// A receiving channel sent by the message processor thread.
    input_channel: mpsc::UnboundedReceiver<SyncMessage<T::EthSpec>>,

    /// A network context to contact the network service.
    network: SyncNetworkContext<T>,

    /// The object handling long-range batch load-balanced syncing.
    range_sync: RangeSync<T>,

    /// Backfill syncing.
    backfill_sync: BackFillSync<T>,

    block_lookups: BlockLookups<T>,
    /// debounce duplicated `UnknownBlockHashFromAttestation` for the same root peer tuple. A peer
    /// may forward us thousands of a attestations, each one triggering an individual event. Only
    /// one event is useful, the rest generating log noise and wasted cycles
    notified_unknown_roots: LRUTimeCache<(PeerId, Hash256)>,
}

/// Spawns a new `SyncManager` thread which has a weak reference to underlying beacon
/// chain. This allows the chain to be
/// dropped during the syncing process which will gracefully end the `SyncManager`.
pub fn spawn<T: BeaconChainTypes>(
    executor: task_executor::TaskExecutor,
    beacon_chain: Arc<BeaconChain<T>>,
    network_send: mpsc::UnboundedSender<NetworkMessage<T::EthSpec>>,
    beacon_processor: Arc<NetworkBeaconProcessor<T>>,
    sync_recv: mpsc::UnboundedReceiver<SyncMessage<T::EthSpec>>,
    fork_context: Arc<ForkContext>,
) {
    assert!(
        beacon_chain.spec.max_request_blocks(fork_context.current_fork_name()) as u64 >= T::EthSpec::slots_per_epoch() * EPOCHS_PER_BATCH,
        "Max blocks that can be requested in a single batch greater than max allowed blocks in a single request"
    );

    // create an instance of the SyncManager
    let mut sync_manager = SyncManager::new(
        beacon_chain,
        network_send,
        beacon_processor,
        sync_recv,
        fork_context,
    );

    // spawn the sync manager thread
    debug!("Sync Manager started");
    executor.spawn(
        async move {
            Box::pin(sync_manager.main())
                .instrument(info_span!("", service = "sync"))
                .await
        },
        "sync",
    );
}

impl<T: BeaconChainTypes> SyncManager<T> {
    pub(crate) fn new(
        beacon_chain: Arc<BeaconChain<T>>,
        network_send: mpsc::UnboundedSender<NetworkMessage<T::EthSpec>>,
        beacon_processor: Arc<NetworkBeaconProcessor<T>>,
        sync_recv: mpsc::UnboundedReceiver<SyncMessage<T::EthSpec>>,
        fork_context: Arc<ForkContext>,
    ) -> Self {
        let network_globals = beacon_processor.network_globals.clone();
        Self {
            chain: beacon_chain.clone(),
            input_channel: sync_recv,
            network: SyncNetworkContext::new(
                network_send,
                beacon_processor.clone(),
                beacon_chain.clone(),
                fork_context.clone(),
            ),
            range_sync: RangeSync::new(beacon_chain.clone()),
            backfill_sync: BackFillSync::new(beacon_chain.clone(), network_globals),
            block_lookups: BlockLookups::new(),
            notified_unknown_roots: LRUTimeCache::new(Duration::from_secs(
                NOTIFIED_UNKNOWN_ROOT_EXPIRY_SECONDS,
            )),
        }
    }

    #[cfg(test)]
    pub(crate) fn active_single_lookups(&self) -> Vec<super::block_lookups::BlockLookupSummary> {
        self.block_lookups.active_single_lookups()
    }

    #[cfg(test)]
    pub(crate) fn active_parent_lookups(&self) -> Vec<Vec<Hash256>> {
        self.block_lookups
            .active_parent_lookups()
            .iter()
            .map(|c| c.chain.clone())
            .collect()
    }

    #[cfg(test)]
    pub(crate) fn get_range_sync_chains(
        &self,
    ) -> Result<Option<(RangeSyncType, Slot, Slot)>, &'static str> {
        self.range_sync.state()
    }

    #[cfg(test)]
    pub(crate) fn range_sync_state(&self) -> super::range_sync::SyncChainStatus {
        self.range_sync.state()
    }

    #[cfg(test)]
    pub(crate) fn __range_failed_chains(&mut self) -> Vec<Hash256> {
        self.range_sync.__failed_chains()
    }

    #[cfg(test)]
    pub(crate) fn get_failed_chains(&mut self) -> Vec<Hash256> {
        self.block_lookups.get_failed_chains()
    }

    #[cfg(test)]
    pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) {
        self.block_lookups.insert_failed_chain(block_root);
    }

    #[cfg(test)]
    pub(crate) fn update_execution_engine_state(&mut self, state: EngineState) {
        self.handle_new_execution_engine_state(state);
    }

    fn network_globals(&self) -> &NetworkGlobals<T::EthSpec> {
        self.network.network_globals()
    }

    /* Input Handling Functions */

    /// A peer has connected which has blocks that are unknown to us.
    ///
    /// This function handles the logic associated with the connection of a new peer. If the peer
    /// is sufficiently ahead of our current head, a range-sync (batch) sync is started and
    /// batches of blocks are queued to download from the peer. Batched blocks begin at our latest
    /// finalized head.
    ///
    /// If the peer is within the `SLOT_IMPORT_TOLERANCE`, then it's head is sufficiently close to
    /// ours that we consider it fully sync'd with respect to our current chain.
    fn add_peer(&mut self, peer_id: PeerId, remote: SyncInfo) {
        // ensure the beacon chain still exists
        let status = self.chain.status_message();
        let local = SyncInfo {
            head_slot: *status.head_slot(),
            head_root: *status.head_root(),
            finalized_epoch: *status.finalized_epoch(),
            finalized_root: *status.finalized_root(),
            earliest_available_slot: status.earliest_available_slot().ok().cloned(),
        };

        let sync_type = remote_sync_type(&local, &remote, &self.chain);

        // update the state of the peer.
        let is_still_connected = self.update_peer_sync_state(&peer_id, &local, &remote, &sync_type);
        if is_still_connected {
            match sync_type {
                PeerSyncType::Behind => {} // Do nothing
                PeerSyncType::Advanced => {
                    self.range_sync
                        .add_peer(&mut self.network, local, peer_id, remote);
                }
                PeerSyncType::FullySynced => {
                    // Sync considers this peer close enough to the head to not trigger range sync.
                    // Range sync handles well syncing large ranges of blocks, of a least a few blocks.
                    // However this peer may be in a fork that we should sync but we have not discovered
                    // yet. If the head of the peer is unknown, attempt block lookup first. If the
                    // unknown head turns out to be on a longer fork, it will trigger range sync.
                    //
                    // A peer should always be considered `Advanced` if its finalized root is
                    // unknown and ahead of ours, so we don't check for that root here.
                    //
                    // TODO: This fork-choice check is potentially duplicated, review code
                    if !self.chain.block_is_known_to_fork_choice(&remote.head_root) {
                        self.handle_unknown_block_root(peer_id, remote.head_root);
                    }
                }
            }
        }

        self.update_sync_state();

        // Try to make progress on custody requests that are waiting for peers
        for (id, result) in self.network.continue_custody_by_root_requests() {
            self.on_custody_by_root_result(id, result);
        }
    }

    /// Trigger range sync for a set of peers that claim to have imported a head unknown to us.
    fn add_peers_force_range_sync(
        &mut self,
        peers: &[PeerId],
        head_root: Hash256,
        head_slot: Option<Slot>,
    ) {
        let status = self.chain.status_message();
        let local = SyncInfo {
            head_slot: *status.head_slot(),
            head_root: *status.head_root(),
            finalized_epoch: *status.finalized_epoch(),
            finalized_root: *status.finalized_root(),
            earliest_available_slot: status.earliest_available_slot().ok().cloned(),
        };

        let head_slot = head_slot.unwrap_or_else(|| {
            debug!(
                local_head_slot = %local.head_slot,
                ?head_root,
                "On add peers force range sync assuming local head_slot"
            );
            local.head_slot
        });

        let remote = SyncInfo {
            head_slot,
            head_root,
            // Set finalized to same as local to trigger Head sync
            finalized_epoch: local.finalized_epoch,
            finalized_root: local.finalized_root,
            earliest_available_slot: local.earliest_available_slot,
        };

        for peer_id in peers {
            self.range_sync
                .add_peer(&mut self.network, local.clone(), *peer_id, remote.clone());
        }
    }

    fn updated_peer_cgc(&mut self, _peer_id: PeerId) {
        // Try to make progress on custody requests that are waiting for peers
        for (id, result) in self.network.continue_custody_by_root_requests() {
            self.on_custody_by_root_result(id, result);
        }

        // Attempt to resume range sync too
        self.range_sync.resume(&mut self.network);
    }

    /// Handles RPC errors related to requests that were emitted from the sync manager.
    fn inject_error(&mut self, peer_id: PeerId, sync_request_id: SyncRequestId, error: RPCError) {
        trace!("Sync manager received a failed RPC");
        match sync_request_id {
            SyncRequestId::SingleBlock { id } => {
                self.on_single_block_response(id, peer_id, RpcEvent::RPCError(error))
            }
            SyncRequestId::SingleBlob { id } => {
                self.on_single_blob_response(id, peer_id, RpcEvent::RPCError(error))
            }
            SyncRequestId::DataColumnsByRoot(req_id) => {
                self.on_data_columns_by_root_response(req_id, peer_id, RpcEvent::RPCError(error))
            }
            SyncRequestId::BlocksByRange(req_id) => {
                self.on_blocks_by_range_response(req_id, peer_id, RpcEvent::RPCError(error))
            }
            SyncRequestId::BlobsByRange(req_id) => {
                self.on_blobs_by_range_response(req_id, peer_id, RpcEvent::RPCError(error))
            }
            SyncRequestId::DataColumnsByRange(req_id) => {
                self.on_data_columns_by_range_response(req_id, peer_id, RpcEvent::RPCError(error))
            }
        }
    }

    /// Handles a peer disconnect.
    ///
    /// It is important that a peer disconnect retries all the batches/lookups as
    /// there is no way to guarantee that libp2p always emits a error along with
    /// the disconnect.
    fn peer_disconnect(&mut self, peer_id: &PeerId) {
        // Inject a Disconnected error on all requests associated with the disconnected peer
        // to retry all batches/lookups
        for sync_request_id in self.network.peer_disconnected(peer_id) {
            self.inject_error(*peer_id, sync_request_id, RPCError::Disconnected);
        }

        // Remove peer from all data structures
        self.range_sync.peer_disconnect(&mut self.network, peer_id);
        let _ = self.backfill_sync.peer_disconnected(peer_id);
        self.block_lookups.peer_disconnected(peer_id);

        // Regardless of the outcome, we update the sync status.
        self.update_sync_state();
    }

    /// Prune stale requests that are waiting for peers
    fn prune_requests(&mut self) {
        // continue_custody_by_root_requests attempts to make progress on all requests. If some
        // exceed the stale duration limit they will fail and return a result. Re-using
        // `continue_custody_by_root_requests` is just a convenience to have less code.
        for (id, result) in self.network.continue_custody_by_root_requests() {
            self.on_custody_by_root_result(id, result);
        }
    }

    /// Updates the syncing state of a peer.
    /// Return true if the peer is still connected and known to the peers DB
    fn update_peer_sync_state(
        &mut self,
        peer_id: &PeerId,
        local_sync_info: &SyncInfo,
        remote_sync_info: &SyncInfo,
        sync_type: &PeerSyncType,
    ) -> bool {
        // NOTE: here we are gracefully handling two race conditions: Receiving the status message
        // of a peer that is 1) disconnected 2) not in the PeerDB.

        let new_state = sync_type.as_sync_status(remote_sync_info);
        let rpr = new_state.as_str();
        // Drop the write lock
        let update_sync_status = self
            .network_globals()
            .peers
            .write()
            .update_sync_status(peer_id, new_state.clone());
        if let Some(was_updated) = update_sync_status {
            let is_connected = self.network_globals().peers.read().is_connected(peer_id);
            if was_updated {
                debug!(
                    %peer_id,
                    new_state = rpr,
                    our_head_slot = %local_sync_info.head_slot,
                    our_finalized_epoch = %local_sync_info.finalized_epoch,
                    their_head_slot = %remote_sync_info.head_slot,
                    their_finalized_epoch = %remote_sync_info.finalized_epoch,
                    is_connected,
                    "Peer transitioned sync state"
                );

                // A peer has transitioned its sync state. If the new state is "synced" we
                // inform the backfill sync that a new synced peer has joined us.
                if new_state.is_synced() {
                    self.backfill_sync.fully_synced_peer_joined();
                }
            }
            is_connected
        } else {
            error!(%peer_id, "Status'd peer is unknown");
            false
        }
    }

    /// Updates the global sync state, optionally instigating or pausing a backfill sync as well as
    /// logging any changes.
    ///
    /// The logic for which sync should be running is as follows:
    /// - If there is a range-sync running (or required) pause any backfill and let range-sync
    ///   complete.
    /// - If there is no current range sync, check for any requirement to backfill and either
    ///   start/resume a backfill sync if required. The global state will be BackFillSync if a
    ///   backfill sync is running.
    /// - If there is no range sync and no required backfill and we have synced up to the currently
    ///   known peers, we consider ourselves synced.
    fn update_sync_state(&mut self) {
        let new_state: SyncState = match self.range_sync.state() {
            Err(e) => {
                crit!(error = %e, "Error getting range sync state");
                return;
            }
            Ok(state) => match state {
                None => {
                    // No range sync, so we decide if we are stalled or synced.
                    // For this we check if there is at least one advanced peer. An advanced peer
                    // with Idle range is possible since a peer's status is updated periodically.
                    // If we synced a peer between status messages, most likely the peer has
                    // advanced and will produce a head chain on re-status. Otherwise it will shift
                    // to being synced
                    let mut sync_state = {
                        let head = self.chain.best_slot();
                        let current_slot = self.chain.slot().unwrap_or_else(|_| Slot::new(0));

                        let peers = self.network_globals().peers.read();
                        if current_slot >= head
                            && current_slot.sub(head) <= (SLOT_IMPORT_TOLERANCE as u64)
                            && head > 0
                        {
                            SyncState::Synced
                        } else if peers.advanced_peers().next().is_some() {
                            SyncState::SyncTransition
                        } else if peers.synced_peers().next().is_none() {
                            SyncState::Stalled
                        } else {
                            // There are no peers that require syncing and we have at least one synced
                            // peer
                            SyncState::Synced
                        }
                    };

                    // If we would otherwise be synced, first check if we need to perform or
                    // complete a backfill sync.
                    #[cfg(not(feature = "disable-backfill"))]
                    if matches!(sync_state, SyncState::Synced) {
                        // Determine if we need to start/resume/restart a backfill sync.
                        match self.backfill_sync.start(&mut self.network) {
                            Ok(SyncStart::Syncing {
                                completed,
                                remaining,
                            }) => {
                                sync_state = SyncState::BackFillSyncing {
                                    completed,
                                    remaining,
                                };
                            }
                            Ok(SyncStart::NotSyncing) => {} // Ignore updating the state if the backfill sync state didn't start.
                            Err(e) => {
                                error!(error = ?e, "Backfill sync failed to start");
                            }
                        }
                    }

                    // Return the sync state if backfilling is not required.
                    sync_state
                }
                Some((RangeSyncType::Finalized, start_slot, target_slot)) => {
                    // If there is a backfill sync in progress pause it.
                    #[cfg(not(feature = "disable-backfill"))]
                    self.backfill_sync.pause();

                    SyncState::SyncingFinalized {
                        start_slot,
                        target_slot,
                    }
                }
                Some((RangeSyncType::Head, start_slot, target_slot)) => {
                    // If there is a backfill sync in progress pause it.
                    #[cfg(not(feature = "disable-backfill"))]
                    self.backfill_sync.pause();

                    SyncState::SyncingHead {
                        start_slot,
                        target_slot,
                    }
                }
            },
        };

        let old_state = self.network_globals().set_sync_state(new_state);
        let new_state = self.network_globals().sync_state.read().clone();
        if !new_state.eq(&old_state) {
            info!(%old_state, %new_state, "Sync state updated");
            // If we have become synced - Subscribe to all the core subnet topics
            // We don't need to subscribe if the old state is a state that would have already
            // invoked this call.
            if new_state.is_synced()
                && !matches!(
                    old_state,
                    SyncState::Synced | SyncState::BackFillSyncing { .. }
                )
            {
                self.network.subscribe_core_topics();
            }
        }
    }

    /// The main driving future for the sync manager.
    async fn main(&mut self) {
        let check_ee = self.chain.execution_layer.is_some();
        let mut check_ee_stream = {
            // some magic to have an instance implementing stream even if there is no execution layer
            let ee_responsiveness_watch: futures::future::OptionFuture<_> = self
                .chain
                .execution_layer
                .as_ref()
                .map(|el| el.get_responsiveness_watch())
                .into();
            futures::stream::iter(ee_responsiveness_watch.await).flatten()
        };

        // min(LOOKUP_MAX_DURATION_*) is 15 seconds. The cost of calling prune_lookups more often is
        // one iteration over the single lookups HashMap. This map is supposed to be very small < 10
        // unless there is a bug.
        let mut prune_lookups_interval = tokio::time::interval(Duration::from_secs(15));

        let mut prune_requests = tokio::time::interval(Duration::from_secs(15));

        let mut register_metrics_interval = tokio::time::interval(Duration::from_secs(5));

        // process any inbound messages
        loop {
            tokio::select! {
                Some(sync_message) = self.input_channel.recv() => {
                    self.handle_message(sync_message);
                },
                Some(engine_state) = check_ee_stream.next(), if check_ee => {
                    self.handle_new_execution_engine_state(engine_state);
                }
                _ = prune_lookups_interval.tick() => {
                    self.block_lookups.prune_lookups();
                }
                _ = prune_requests.tick() => {
                    self.prune_requests();
                }
                _ = register_metrics_interval.tick() => {
                    self.network.register_metrics();
                }
            }
        }
    }

    pub(crate) fn handle_message(&mut self, sync_message: SyncMessage<T::EthSpec>) {
        match sync_message {
            SyncMessage::AddPeer(peer_id, info) => {
                self.add_peer(peer_id, info);
            }
            SyncMessage::AddPeersForceRangeSync {
                peers,
                head_root,
                head_slot,
            } => {
                self.add_peers_force_range_sync(&peers, head_root, head_slot);
            }
            SyncMessage::UpdatedPeerCgc(peer_id) => {
                debug!(
                    peer_id = ?peer_id,
                    "Received updated peer CGC message"
                );
                self.updated_peer_cgc(peer_id);
            }
            SyncMessage::RpcBlock {
                sync_request_id,
                peer_id,
                beacon_block,
                seen_timestamp,
            } => {
                self.rpc_block_received(sync_request_id, peer_id, beacon_block, seen_timestamp);
            }
            SyncMessage::RpcBlob {
                sync_request_id,
                peer_id,
                blob_sidecar,
                seen_timestamp,
            } => self.rpc_blob_received(sync_request_id, peer_id, blob_sidecar, seen_timestamp),
            SyncMessage::RpcDataColumn {
                sync_request_id,
                peer_id,
                data_column,
                seen_timestamp,
            } => {
                self.rpc_data_column_received(sync_request_id, peer_id, data_column, seen_timestamp)
            }
            SyncMessage::UnknownParentBlock(peer_id, block, block_root) => {
                let block_slot = block.slot();
                let parent_root = block.parent_root();
                debug!(%block_root, %parent_root, "Received unknown parent block message");
                self.handle_unknown_parent(
                    peer_id,
                    block_root,
                    parent_root,
                    block_slot,
                    BlockComponent::Block(DownloadResult {
                        value: block.block_cloned(),
                        block_root,
                        seen_timestamp: timestamp_now(),
                        peer_group: PeerGroup::from_single(peer_id),
                    }),
                );
            }
            SyncMessage::UnknownParentBlob(peer_id, blob) => {
                let blob_slot = blob.slot();
                let block_root = blob.block_root();
                let parent_root = blob.block_parent_root();
                debug!(%block_root, %parent_root, "Received unknown parent blob message");
                self.handle_unknown_parent(
                    peer_id,
                    block_root,
                    parent_root,
                    blob_slot,
                    BlockComponent::Blob(DownloadResult {
                        value: blob,
                        block_root,
                        seen_timestamp: timestamp_now(),
                        peer_group: PeerGroup::from_single(peer_id),
                    }),
                );
            }
            SyncMessage::UnknownParentDataColumn(peer_id, data_column) => {
                let data_column_slot = data_column.slot();
                let block_root = data_column.block_root();
                let parent_root = data_column.block_parent_root();
                debug!(%block_root, %parent_root, "Received unknown parent data column message");
                self.handle_unknown_parent(
                    peer_id,
                    block_root,
                    parent_root,
                    data_column_slot,
                    BlockComponent::DataColumn(DownloadResult {
                        value: data_column,
                        block_root,
                        seen_timestamp: timestamp_now(),
                        peer_group: PeerGroup::from_single(peer_id),
                    }),
                );
            }
            SyncMessage::UnknownBlockHashFromAttestation(peer_id, block_root) => {
                if !self.notified_unknown_roots.contains(&(peer_id, block_root)) {
                    self.notified_unknown_roots.insert((peer_id, block_root));
                    debug!(?block_root, ?peer_id, "Received unknown block hash message");
                    self.handle_unknown_block_root(peer_id, block_root);
                }
            }
            SyncMessage::Disconnect(peer_id) => {
                debug!(%peer_id, "Received disconnected message");
                self.peer_disconnect(&peer_id);
            }
            SyncMessage::RpcError {
                peer_id,
                sync_request_id,
                error,
            } => self.inject_error(peer_id, sync_request_id, error),
            SyncMessage::BlockComponentProcessed {
                process_type,
                result,
            } => self
                .block_lookups
                .on_processing_result(process_type, result, &mut self.network),
            SyncMessage::GossipBlockProcessResult {
                block_root,
                imported,
            } => self.block_lookups.on_external_processing_result(
                block_root,
                imported,
                &mut self.network,
            ),
            SyncMessage::BatchProcessed { sync_type, result } => match sync_type {
                ChainSegmentProcessId::RangeBatchId(chain_id, epoch) => {
                    self.range_sync.handle_block_process_result(
                        &mut self.network,
                        chain_id,
                        epoch,
                        result,
                    );
                    self.update_sync_state();
                }
                ChainSegmentProcessId::BackSyncBatchId(epoch) => {
                    match self.backfill_sync.on_batch_process_result(
                        &mut self.network,
                        epoch,
                        &result,
                    ) {
                        Ok(ProcessResult::Successful) => {}
                        Ok(ProcessResult::SyncCompleted) => self.update_sync_state(),
                        Err(error) => {
                            error!(error = ?error, "Backfill sync failed");
                            // Update the global status
                            self.update_sync_state();
                        }
                    }
                }
            },
        }
    }

    fn handle_unknown_parent(
        &mut self,
        peer_id: PeerId,
        block_root: Hash256,
        parent_root: Hash256,
        slot: Slot,
        block_component: BlockComponent<T::EthSpec>,
    ) {
        match self.should_search_for_block(Some(slot), &peer_id) {
            Ok(_) => {
                if self.block_lookups.search_child_and_parent(
                    block_root,
                    block_component,
                    peer_id,
                    &mut self.network,
                ) {
                    // Lookup created. No need to log here it's logged in `new_current_lookup`
                } else {
                    debug!(
                        ?block_root,
                        ?parent_root,
                        "No lookup created for child and parent"
                    );
                }
            }
            Err(reason) => {
                debug!(%block_root, %parent_root, reason, "Ignoring unknown parent request");
            }
        }
    }

    fn handle_unknown_block_root(&mut self, peer_id: PeerId, block_root: Hash256) {
        match self.should_search_for_block(None, &peer_id) {
            Ok(_) => {
                if self.block_lookups.search_unknown_block(
                    block_root,
                    &[peer_id],
                    &mut self.network,
                ) {
                    // Lookup created. No need to log here it's logged in `new_current_lookup`
                } else {
                    debug!(?block_root, "No lookup created for unknown block");
                }
            }
            Err(reason) => {
                debug!(%block_root, reason, "Ignoring unknown block request");
            }
        }
    }

    fn should_search_for_block(
        &mut self,
        block_slot: Option<Slot>,
        peer_id: &PeerId,
    ) -> Result<(), &'static str> {
        if !self.network_globals().sync_state.read().is_synced() {
            let Some(block_slot) = block_slot else {
                return Err("not synced");
            };

            let head_slot = self.chain.canonical_head.cached_head().head_slot();

            // if the block is far in the future, ignore it. If its within the slot tolerance of
            // our current head, regardless of the syncing state, fetch it.
            if (head_slot >= block_slot
                && head_slot.sub(block_slot).as_usize() > SLOT_IMPORT_TOLERANCE)
                || (head_slot < block_slot
                    && block_slot.sub(head_slot).as_usize() > SLOT_IMPORT_TOLERANCE)
            {
                return Err("not synced");
            }
        }

        if !self.network_globals().peers.read().is_connected(peer_id) {
            return Err("peer not connected");
        }
        if !self.network.is_execution_engine_online() {
            return Err("execution engine offline");
        }
        Ok(())
    }

    fn handle_new_execution_engine_state(&mut self, engine_state: EngineState) {
        self.network.update_execution_engine_state(engine_state);

        match engine_state {
            EngineState::Online => {
                // Resume sync components.

                // - Block lookups:
                //   We start searching for blocks again. This is done by updating the stored ee online
                //   state. No further action required.

                // - Parent lookups:
                //   We start searching for parents again. This is done by updating the stored ee
                //   online state. No further action required.

                // - Range:
                //   Actively resume.
                self.range_sync.resume(&mut self.network);

                // - Backfill:
                //   Not affected by ee states, nothing to do.
            }

            EngineState::Offline => {
                // Pause sync components.

                // - Block lookups:
                //   Disabled while in this state. We drop current requests and don't search for new
                //   blocks.
                let dropped_single_blocks_requests =
                    self.block_lookups.drop_single_block_requests();

                // - Range:
                //   We still send found peers to range so that it can keep track of potential chains
                //   with respect to our current peers. Range will stop processing batches in the
                //   meantime. No further action from the manager is required for this.

                // - Backfill: Not affected by ee states, nothing to do.

                // Some logs.
                if dropped_single_blocks_requests > 0 {
                    debug!(
                        dropped_single_blocks_requests,
                        "Execution engine not online. Dropping active requests."
                    );
                }
            }
        }
    }

    fn rpc_block_received(
        &mut self,
        sync_request_id: SyncRequestId,
        peer_id: PeerId,
        block: Option<Arc<SignedBeaconBlock<T::EthSpec>>>,
        seen_timestamp: Duration,
    ) {
        match sync_request_id {
            SyncRequestId::SingleBlock { id } => self.on_single_block_response(
                id,
                peer_id,
                RpcEvent::from_chunk(block, seen_timestamp),
            ),
            SyncRequestId::BlocksByRange(id) => self.on_blocks_by_range_response(
                id,
                peer_id,
                RpcEvent::from_chunk(block, seen_timestamp),
            ),
            _ => {
                crit!(%peer_id, "bad request id for block");
            }
        }
    }

    fn on_single_block_response(
        &mut self,
        id: SingleLookupReqId,
        peer_id: PeerId,
        block: RpcEvent<Arc<SignedBeaconBlock<T::EthSpec>>>,
    ) {
        if let Some(resp) = self.network.on_single_block_response(id, peer_id, block) {
            self.block_lookups
                .on_download_response::<BlockRequestState<T::EthSpec>>(
                    id,
                    resp.map(|(value, seen_timestamp)| {
                        (value, PeerGroup::from_single(peer_id), seen_timestamp)
                    }),
                    &mut self.network,
                )
        }
    }

    fn rpc_blob_received(
        &mut self,
        sync_request_id: SyncRequestId,
        peer_id: PeerId,
        blob: Option<Arc<BlobSidecar<T::EthSpec>>>,
        seen_timestamp: Duration,
    ) {
        match sync_request_id {
            SyncRequestId::SingleBlob { id } => self.on_single_blob_response(
                id,
                peer_id,
                RpcEvent::from_chunk(blob, seen_timestamp),
            ),
            SyncRequestId::BlobsByRange(id) => self.on_blobs_by_range_response(
                id,
                peer_id,
                RpcEvent::from_chunk(blob, seen_timestamp),
            ),
            _ => {
                crit!(%peer_id, "bad request id for blob");
            }
        }
    }

    fn rpc_data_column_received(
        &mut self,
        sync_request_id: SyncRequestId,
        peer_id: PeerId,
        data_column: Option<Arc<DataColumnSidecar<T::EthSpec>>>,
        seen_timestamp: Duration,
    ) {
        match sync_request_id {
            SyncRequestId::DataColumnsByRoot(req_id) => {
                self.on_data_columns_by_root_response(
                    req_id,
                    peer_id,
                    RpcEvent::from_chunk(data_column, seen_timestamp),
                );
            }
            SyncRequestId::DataColumnsByRange(id) => self.on_data_columns_by_range_response(
                id,
                peer_id,
                RpcEvent::from_chunk(data_column, seen_timestamp),
            ),
            _ => {
                crit!(%peer_id, "bad request id for data_column");
            }
        }
    }

    fn on_single_blob_response(
        &mut self,
        id: SingleLookupReqId,
        peer_id: PeerId,
        blob: RpcEvent<Arc<BlobSidecar<T::EthSpec>>>,
    ) {
        if let Some(resp) = self.network.on_single_blob_response(id, peer_id, blob) {
            self.block_lookups
                .on_download_response::<BlobRequestState<T::EthSpec>>(
                    id,
                    resp.map(|(value, seen_timestamp)| {
                        (value, PeerGroup::from_single(peer_id), seen_timestamp)
                    }),
                    &mut self.network,
                )
        }
    }

    fn on_data_columns_by_root_response(
        &mut self,
        req_id: DataColumnsByRootRequestId,
        peer_id: PeerId,
        data_column: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
    ) {
        if let Some(resp) =
            self.network
                .on_data_columns_by_root_response(req_id, peer_id, data_column)
        {
            match req_id.requester {
                DataColumnsByRootRequester::Custody(custody_id) => {
                    if let Some(result) = self
                        .network
                        .on_custody_by_root_response(custody_id, req_id, peer_id, resp)
                    {
                        self.on_custody_by_root_result(custody_id.requester, result);
                    }
                }
            }
        }
    }

    fn on_blocks_by_range_response(
        &mut self,
        id: BlocksByRangeRequestId,
        peer_id: PeerId,
        block: RpcEvent<Arc<SignedBeaconBlock<T::EthSpec>>>,
    ) {
        if let Some(resp) = self.network.on_blocks_by_range_response(id, peer_id, block) {
            self.on_range_components_response(
                id.parent_request_id,
                peer_id,
                RangeBlockComponent::Block(id, resp),
            );
        }
    }

    fn on_blobs_by_range_response(
        &mut self,
        id: BlobsByRangeRequestId,
        peer_id: PeerId,
        blob: RpcEvent<Arc<BlobSidecar<T::EthSpec>>>,
    ) {
        if let Some(resp) = self.network.on_blobs_by_range_response(id, peer_id, blob) {
            self.on_range_components_response(
                id.parent_request_id,
                peer_id,
                RangeBlockComponent::Blob(id, resp),
            );
        }
    }

    fn on_data_columns_by_range_response(
        &mut self,
        id: DataColumnsByRangeRequestId,
        peer_id: PeerId,
        data_column: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
    ) {
        if let Some(resp) = self
            .network
            .on_data_columns_by_range_response(id, peer_id, data_column)
        {
            self.on_range_components_response(
                id.parent_request_id,
                peer_id,
                RangeBlockComponent::CustodyColumns(id, resp),
            );
        }
    }

    fn on_custody_by_root_result(
        &mut self,
        requester: CustodyRequester,
        response: CustodyByRootResult<T::EthSpec>,
    ) {
        self.block_lookups
            .on_download_response::<CustodyRequestState<T::EthSpec>>(
                requester.0,
                response,
                &mut self.network,
            );
    }

    /// Handles receiving a response for a range sync request that should have both blocks and
    /// blobs.
    fn on_range_components_response(
        &mut self,
        range_request_id: ComponentsByRangeRequestId,
        peer_id: PeerId,
        range_block_component: RangeBlockComponent<T::EthSpec>,
    ) {
        if let Some(resp) = self
            .network
            .range_block_component_response(range_request_id, range_block_component)
        {
            match resp {
                Ok(blocks) => {
                    match range_request_id.requester {
                        RangeRequestId::RangeSync { chain_id, batch_id } => {
                            self.range_sync.blocks_by_range_response(
                                &mut self.network,
                                peer_id,
                                chain_id,
                                batch_id,
                                range_request_id.id,
                                blocks,
                            );
                            self.update_sync_state();
                        }
                        RangeRequestId::BackfillSync { batch_id } => {
                            match self.backfill_sync.on_block_response(
                                &mut self.network,
                                batch_id,
                                &peer_id,
                                range_request_id.id,
                                blocks,
                            ) {
                                Ok(ProcessResult::SyncCompleted) => self.update_sync_state(),
                                Ok(ProcessResult::Successful) => {}
                                Err(_error) => {
                                    // The backfill sync has failed, errors are reported
                                    // within.
                                    self.update_sync_state();
                                }
                            }
                        }
                    }
                }
                Err(e) => match range_request_id.requester {
                    RangeRequestId::RangeSync { chain_id, batch_id } => {
                        self.range_sync.inject_error(
                            &mut self.network,
                            peer_id,
                            batch_id,
                            chain_id,
                            range_request_id.id,
                            e,
                        );
                        self.update_sync_state();
                    }
                    RangeRequestId::BackfillSync { batch_id } => {
                        match self.backfill_sync.inject_error(
                            &mut self.network,
                            batch_id,
                            &peer_id,
                            range_request_id.id,
                            e,
                        ) {
                            Ok(_) => {}
                            Err(_) => self.update_sync_state(),
                        }
                    }
                },
            }
        }
    }
}

impl From<Result<AvailabilityProcessingStatus, BlockError>> for BlockProcessingResult {
    fn from(result: Result<AvailabilityProcessingStatus, BlockError>) -> Self {
        match result {
            Ok(status) => BlockProcessingResult::Ok(status),
            Err(e) => BlockProcessingResult::Err(e),
        }
    }
}

impl From<BlockError> for BlockProcessingResult {
    fn from(e: BlockError) -> Self {
        BlockProcessingResult::Err(e)
    }
}