Implement reliable range sync for PeerDAS

2026-04-25 16:58:28 +00:00 · 2025-05-21 23:34:28 -05:00
parent b014675b7a
commit 4fb2ae658a
23 changed files with 2580 additions and 701 deletions
--- a/beacon_node/beacon_chain/src/block_verification_types.rs
+++ b/beacon_node/beacon_chain/src/block_verification_types.rs
@@ -199,7 +199,7 @@ impl<E: EthSpec> RpcBlock<E> {
        custody_columns: Vec<CustodyDataColumn<E>>,
        expected_custody_indices: Vec<ColumnIndex>,
        spec: &ChainSpec,
-    ) -> Result<Self, AvailabilityCheckError> {
+    ) -> Result<Self, String> {
        let block_root = block_root.unwrap_or_else(|| get_block_root(&block));

        let custody_columns_count = expected_custody_indices.len();
@@ -209,11 +209,7 @@ impl<E: EthSpec> RpcBlock<E> {
                custody_columns,
                spec.number_of_columns as usize,
            )
-            .map_err(|e| {
-                AvailabilityCheckError::Unexpected(format!(
-                    "custody_columns len exceeds number_of_columns: {e:?}"
-                ))
-            })?,
+            .map_err(|e| format!("custody_columns len exceeds number_of_columns: {e:?}"))?,
            expected_custody_indices,
        };
        Ok(Self {
--- a/beacon_node/beacon_chain/src/test_utils.rs
+++ b/beacon_node/beacon_chain/src/test_utils.rs
@@ -2418,7 +2418,8 @@ where
                    columns,
                    expected_custody_indices,
                    &self.spec,
-                )?
+                )
+                .map_err(BlockError::InternalError)?
            } else {
                RpcBlock::new_without_blobs(Some(block_root), block, sampling_column_count)
            }
--- a/beacon_node/lighthouse_network/src/service/api_types.rs
+++ b/beacon_node/lighthouse_network/src/service/api_types.rs
@@ -59,6 +59,14 @@ pub struct BlobsByRangeRequestId {
 pub struct DataColumnsByRangeRequestId {
    /// Id to identify this attempt at a data_columns_by_range request for `parent_request_id`
    pub id: Id,
+    /// The Id of the parent custody by range request that issued this data_columns_by_range request
+    pub parent_request_id: CustodyByRangeRequestId,
+}
+
+#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
+pub struct CustodyByRangeRequestId {
+    /// Id to identify this attempt at a meta custody by range request for `parent_request_id`
+    pub id: Id,
    /// The Id of the overall By Range request for block components.
    pub parent_request_id: ComponentsByRangeRequestId,
 }
@@ -221,6 +229,7 @@ macro_rules! impl_display {
 impl_display!(BlocksByRangeRequestId, "{}/{}", id, parent_request_id);
 impl_display!(BlobsByRangeRequestId, "{}/{}", id, parent_request_id);
 impl_display!(DataColumnsByRangeRequestId, "{}/{}", id, parent_request_id);
+impl_display!(CustodyByRangeRequestId, "{}/{}", id, parent_request_id);
 impl_display!(ComponentsByRangeRequestId, "{}/{}", id, requester);
 impl_display!(DataColumnsByRootRequestId, "{}/{}", id, requester);
 impl_display!(SingleLookupReqId, "{}/Lookup/{}", req_id, lookup_id);
@@ -299,14 +308,17 @@ mod tests {
    fn display_id_data_columns_by_range() {
        let id = DataColumnsByRangeRequestId {
            id: 123,
-            parent_request_id: ComponentsByRangeRequestId {
+            parent_request_id: CustodyByRangeRequestId {
                id: 122,
-                requester: RangeRequestId::RangeSync {
-                    chain_id: 54,
-                    batch_id: Epoch::new(0),
+                parent_request_id: ComponentsByRangeRequestId {
+                    id: 121,
+                    requester: RangeRequestId::RangeSync {
+                        chain_id: 54,
+                        batch_id: Epoch::new(0),
+                    },
                },
            },
        };
-        assert_eq!(format!("{id}"), "123/122/RangeSync/0/54");
+        assert_eq!(format!("{id}"), "123/122/121/RangeSync/0/54");
    }
 }
--- a/beacon_node/lighthouse_network/src/types/globals.rs
+++ b/beacon_node/lighthouse_network/src/types/globals.rs
@@ -245,6 +245,25 @@ impl<E: EthSpec> NetworkGlobals<E> {
        Self::new_test_globals_with_metadata(trusted_peers, metadata, config, spec)
    }

+    pub fn new_test_globals_as_supernode(
+        trusted_peers: Vec<PeerId>,
+        config: Arc<NetworkConfig>,
+        spec: Arc<ChainSpec>,
+        is_supernode: bool,
+    ) -> NetworkGlobals<E> {
+        let metadata = MetaData::V3(MetaDataV3 {
+            seq_number: 0,
+            attnets: Default::default(),
+            syncnets: Default::default(),
+            custody_group_count: if is_supernode {
+                spec.number_of_custody_groups
+            } else {
+                spec.custody_requirement
+            },
+        });
+        Self::new_test_globals_with_metadata(trusted_peers, metadata, config, spec)
+    }
+
    pub(crate) fn new_test_globals_with_metadata(
        trusted_peers: Vec<PeerId>,
        metadata: MetaData<E>,
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@@ -21,11 +21,11 @@ use beacon_chain::{BeaconChain, BeaconChainTypes};
 use itertools::Itertools;
 use lighthouse_network::service::api_types::Id;
 use lighthouse_network::types::{BackFillState, NetworkGlobals};
-use lighthouse_network::{PeerAction, PeerId};
+use lighthouse_network::PeerAction;
 use logging::crit;
 use std::collections::{
    btree_map::{BTreeMap, Entry},
-    HashSet,
+    HashMap, HashSet,
 };
 use std::sync::Arc;
 use tracing::{debug, error, info, instrument, warn};
@@ -312,7 +312,6 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
        &mut self,
        network: &mut SyncNetworkContext<T>,
        batch_id: BatchId,
-        peer_id: &PeerId,
        request_id: Id,
        err: RpcResponseError,
    ) -> Result<(), BackFillError> {
@@ -326,11 +325,18 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                return Ok(());
            }
            debug!(batch_epoch = %batch_id, error = ?err, "Batch download failed");
-            match batch.download_failed(Some(*peer_id)) {
+            // TODO(das): Is it necessary for the batch to track failed peers? Can we make this
+            // mechanism compatible with PeerDAS and before PeerDAS?
+            match batch.download_failed(None) {
                Err(e) => self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0)),
-                Ok(BatchOperationOutcome::Failed { blacklist: _ }) => {
-                    self.fail_sync(BackFillError::BatchDownloadFailed(batch_id))
-                }
+                Ok(BatchOperationOutcome::Failed { blacklist: _ }) => self.fail_sync(match err {
+                    RpcResponseError::RpcError(_)
+                    | RpcResponseError::VerifyError(_)
+                    | RpcResponseError::InternalError(_) => {
+                        BackFillError::BatchDownloadFailed(batch_id)
+                    }
+                    RpcResponseError::RequestExpired(_) => BackFillError::Paused,
+                }),
                Ok(BatchOperationOutcome::Continue) => self.send_batch(network, batch_id),
            }
        } else {
@@ -929,6 +935,8 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                RangeRequestId::BackfillSync { batch_id },
                &synced_peers,
                &failed_peers,
+                // Does not track total requests per peers for now
+                &HashMap::new(),
            ) {
                Ok(request_id) => {
                    // inform the batch about the new request
@@ -940,15 +948,9 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                    return Ok(());
                }
                Err(e) => match e {
-                    RpcRequestSendError::NoPeer(no_peer) => {
-                        // If we are here the chain has no more synced peers
-                        info!(
-                            "reason" = format!("insufficient_synced_peers({no_peer:?})"),
-                            "Backfill sync paused"
-                        );
-                        self.set_state(BackFillState::Paused);
-                        return Err(BackFillError::Paused);
-                    }
+                    // TODO(das): block_components_by_range requests can now hang out indefinitely.
+                    // Is that fine? Maybe we should fail the requests from the network_context
+                    // level without involving the BackfillSync itself.
                    RpcRequestSendError::InternalError(e) => {
                        // NOTE: under normal conditions this shouldn't happen but we handle it anyway
                        warn!(%batch_id, error = ?e, %batch,"Could not send batch request");
--- a/beacon_node/network/src/sync/block_lookups/mod.rs
+++ b/beacon_node/network/src/sync/block_lookups/mod.rs
@@ -494,7 +494,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
        let Some(lookup) = self.single_block_lookups.get_mut(&id.lookup_id) else {
            // We don't have the ability to cancel in-flight RPC requests. So this can happen
            // if we started this RPC request, and later saw the block/blobs via gossip.
-            debug!(?id, "Block returned for single block lookup not present");
+            debug!(%id, "Block returned for single block lookup not present");
            return Err(LookupRequestError::UnknownLookup);
        };

@@ -507,7 +507,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
            Ok((response, peer_group, seen_timestamp)) => {
                debug!(
                    ?block_root,
-                    ?id,
+                    %id,
                    ?peer_group,
                    ?response_type,
                    "Received lookup download success"
@@ -540,7 +540,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
                // the peer and the request ID which is linked to this `id` value here.
                debug!(
                    ?block_root,
-                    ?id,
+                    %id,
                    ?response_type,
                    error = ?e,
                    "Received lookup download failure"
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -36,7 +36,8 @@
 use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart};
 use super::block_lookups::BlockLookups;
 use super::network_context::{
-    CustodyByRootResult, RangeBlockComponent, RangeRequestId, RpcEvent, SyncNetworkContext,
+    CustodyByRangeResult, CustodyByRootResult, RangeBlockComponent, RangeRequestId, RpcEvent,
+    SyncNetworkContext,
 };
 use super::peer_sampling::{Sampling, SamplingConfig, SamplingResult};
 use super::peer_sync_info::{remote_sync_type, PeerSyncType};
@@ -58,9 +59,10 @@ use beacon_chain::{
 use futures::StreamExt;
 use lighthouse_network::rpc::RPCError;
 use lighthouse_network::service::api_types::{
-    BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId, CustodyRequester,
-    DataColumnsByRangeRequestId, DataColumnsByRootRequestId, DataColumnsByRootRequester, Id,
-    SamplingId, SamplingRequester, SingleLookupReqId, SyncRequestId,
+    BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
+    CustodyByRangeRequestId, CustodyRequester, DataColumnsByRangeRequestId,
+    DataColumnsByRootRequestId, DataColumnsByRootRequester, Id, SamplingId, SamplingRequester,
+    SingleLookupReqId, SyncRequestId,
 };
 use lighthouse_network::types::{NetworkGlobals, SyncState};
 use lighthouse_network::PeerId;
@@ -336,23 +338,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
            .collect()
    }

-    #[cfg(test)]
-    pub(crate) fn get_range_sync_chains(
-        &self,
-    ) -> Result<Option<(RangeSyncType, Slot, Slot)>, &'static str> {
-        self.range_sync.state()
-    }
-
-    #[cfg(test)]
-    pub(crate) fn range_sync_state(&self) -> super::range_sync::SyncChainStatus {
-        self.range_sync.state()
-    }
-
-    #[cfg(test)]
-    pub(crate) fn __range_failed_chains(&mut self) -> Vec<Hash256> {
-        self.range_sync.__failed_chains()
-    }
-
    #[cfg(test)]
    pub(crate) fn get_failed_chains(&mut self) -> Vec<Hash256> {
        self.block_lookups.get_failed_chains()
@@ -377,6 +362,18 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        self.sampling.get_request_status(block_root, index)
    }

+    // Leak the full network context to prevent having to add many cfg(test) methods here
+    #[cfg(test)]
+    pub(crate) fn network(&mut self) -> &mut SyncNetworkContext<T> {
+        &mut self.network
+    }
+
+    // Leak the full range_sync to prevent having to add many cfg(test) methods here
+    #[cfg(test)]
+    pub(crate) fn range_sync(&mut self) -> &mut RangeSync<T> {
+        &mut self.range_sync
+    }
+
    #[cfg(test)]
    pub(crate) fn update_execution_engine_state(&mut self, state: EngineState) {
        self.handle_new_execution_engine_state(state);
@@ -442,6 +439,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        for (id, result) in self.network.continue_custody_by_root_requests() {
            self.on_custody_by_root_result(id, result);
        }
+        for (id, result) in self.network.continue_custody_by_range_requests() {
+            self.on_custody_by_range_result(id, result);
+        }
    }

    /// Trigger range sync for a set of peers that claim to have imported a head unknown to us.
@@ -545,6 +545,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        for (id, result) in self.network.continue_custody_by_root_requests() {
            self.on_custody_by_root_result(id, result);
        }
+        for (id, result) in self.network.continue_custody_by_range_requests() {
+            self.on_custody_by_range_result(id, result);
+        }
    }

    /// Updates the syncing state of a peer.
@@ -1186,10 +1189,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        block: RpcEvent<Arc<SignedBeaconBlock<T::EthSpec>>>,
    ) {
        if let Some(resp) = self.network.on_blocks_by_range_response(id, peer_id, block) {
-            self.on_range_components_response(
+            self.on_block_components_by_range_response(
                id.parent_request_id,
-                peer_id,
-                RangeBlockComponent::Block(id, resp),
+                RangeBlockComponent::Block(id, resp, peer_id),
            );
        }
    }
@@ -1201,10 +1203,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        blob: RpcEvent<Arc<BlobSidecar<T::EthSpec>>>,
    ) {
        if let Some(resp) = self.network.on_blobs_by_range_response(id, peer_id, blob) {
-            self.on_range_components_response(
+            self.on_block_components_by_range_response(
                id.parent_request_id,
-                peer_id,
-                RangeBlockComponent::Blob(id, resp),
+                RangeBlockComponent::Blob(id, resp, peer_id),
            );
        }
    }
@@ -1215,18 +1216,46 @@ impl<T: BeaconChainTypes> SyncManager<T> {
        peer_id: PeerId,
        data_column: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
    ) {
+        // data_columns_by_range returns either an Ok list of data columns, or an RpcResponseError
        if let Some(resp) = self
            .network
            .on_data_columns_by_range_response(id, peer_id, data_column)
        {
-            self.on_range_components_response(
-                id.parent_request_id,
-                peer_id,
-                RangeBlockComponent::CustodyColumns(id, resp),
-            );
+            // custody_by_range accumulates the results of multiple data_columns_by_range requests
+            // returning a bigger list of data columns across all the column indices this node has
+            // to custody
+            if let Some(result) =
+                self.network
+                    .on_custody_by_range_response(id.parent_request_id, id, peer_id, resp)
+            {
+                self.on_custody_by_range_result(id.parent_request_id, result);
+            }
        }
    }

+    fn on_custody_by_range_result(
+        &mut self,
+        id: CustodyByRangeRequestId,
+        result: CustodyByRangeResult<T::EthSpec>,
+    ) {
+        // TODO(das): Improve the type of RangeBlockComponent::CustodyColumns, not
+        // not have to pass a PeerGroup in case of error
+        let peers = match &result {
+            Ok((_, peers, _)) => peers.clone(),
+            // TODO(das): this PeerGroup with no peers incorrect
+            Err(_) => PeerGroup::from_set(<_>::default()),
+        };
+
+        self.on_block_components_by_range_response(
+            id.parent_request_id,
+            RangeBlockComponent::CustodyColumns(
+                id,
+                result.map(|(data, _peers, timestamp)| (data, timestamp)),
+                peers,
+            ),
+        );
+    }
+
    fn on_custody_by_root_result(
        &mut self,
        requester: CustodyRequester,
@@ -1267,17 +1296,15 @@ impl<T: BeaconChainTypes> SyncManager<T> {

    /// Handles receiving a response for a range sync request that should have both blocks and
    /// blobs.
-    fn on_range_components_response(
+    fn on_block_components_by_range_response(
        &mut self,
        range_request_id: ComponentsByRangeRequestId,
-        peer_id: PeerId,
        range_block_component: RangeBlockComponent<T::EthSpec>,
    ) {
-        if let Some(resp) = self.network.range_block_component_response(
-            range_request_id,
-            peer_id,
-            range_block_component,
-        ) {
+        if let Some(resp) = self
+            .network
+            .on_block_components_by_range_response(range_request_id, range_block_component)
+        {
            match resp {
                Ok((blocks, batch_peers)) => {
                    match range_request_id.requester {
@@ -1315,7 +1342,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                    RangeRequestId::RangeSync { chain_id, batch_id } => {
                        self.range_sync.inject_error(
                            &mut self.network,
-                            peer_id,
                            batch_id,
                            chain_id,
                            range_request_id.id,
@@ -1327,7 +1353,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                        match self.backfill_sync.inject_error(
                            &mut self.network,
                            batch_id,
-                            &peer_id,
                            range_request_id.id,
                            e,
                        ) {
--- a/beacon_node/network/src/sync/mod.rs
+++ b/beacon_node/network/src/sync/mod.rs
@@ -3,7 +3,6 @@
 //! Stores the various syncing methods for the beacon chain.
 mod backfill_sync;
 mod block_lookups;
-mod block_sidecar_coupling;
 pub mod manager;
 mod network_context;
 mod peer_sampling;
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -1,11 +1,11 @@
 //! Provides network functionality for the Syncing thread. This fundamentally wraps a network
 //! channel and stores a global RPC ID to perform requests.

-use self::custody::{ActiveCustodyRequest, Error as CustodyRequestError};
+use self::custody_by_range::{ActiveCustodyByRangeRequest, CustodyByRangeRequestResult};
+use self::custody_by_root::{ActiveCustodyByRootRequest, CustodyByRootRequestResult};
 pub use self::requests::{BlocksByRootSingleRequest, DataColumnsByRootSingleBlockRequest};
-use super::block_sidecar_coupling::RangeBlockComponentsRequest;
 use super::manager::BlockProcessType;
-use super::range_sync::{BatchPeers, ByRangeRequestType};
+use super::range_sync::BatchPeers;
 use super::SyncMessage;
 use crate::metrics;
 use crate::network_beacon_processor::NetworkBeaconProcessor;
@@ -17,15 +17,17 @@ use crate::sync::block_lookups::SingleLookupId;
 use crate::sync::network_context::requests::BlobsByRootSingleBlockRequest;
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
-use custody::CustodyRequestResult;
+pub use block_components_by_range::BlockComponentsByRangeRequest;
+#[cfg(test)]
+pub use block_components_by_range::BlockComponentsByRangeRequestStep;
 use fnv::FnvHashMap;
 use lighthouse_network::rpc::methods::{BlobsByRangeRequest, DataColumnsByRangeRequest};
 use lighthouse_network::rpc::{BlocksByRangeRequest, GoodbyeReason, RPCError, RequestType};
 pub use lighthouse_network::service::api_types::RangeRequestId;
 use lighthouse_network::service::api_types::{
    AppRequestId, BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
-    CustodyId, CustodyRequester, DataColumnsByRangeRequestId, DataColumnsByRootRequestId,
-    DataColumnsByRootRequester, Id, SingleLookupReqId, SyncRequestId,
+    CustodyByRangeRequestId, CustodyId, CustodyRequester, DataColumnsByRangeRequestId,
+    DataColumnsByRootRequestId, DataColumnsByRootRequester, Id, SingleLookupReqId, SyncRequestId,
 };
 use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource};
 use parking_lot::RwLock;
@@ -36,7 +38,6 @@ use requests::{
 };
 #[cfg(test)]
 use slot_clock::SlotClock;
-use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet};
 use std::fmt::Debug;
 use std::sync::Arc;
@@ -47,11 +48,13 @@ use tokio::sync::mpsc;
 use tracing::{debug, error, span, warn, Level};
 use types::blob_sidecar::FixedBlobSidecarList;
 use types::{
-    BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, ForkContext,
-    Hash256, SignedBeaconBlock, Slot,
+    BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec,
+    ForkContext, Hash256, SignedBeaconBlock, SignedBeaconBlockHeader, Slot,
 };

-pub mod custody;
+pub mod block_components_by_range;
+pub mod custody_by_range;
+pub mod custody_by_root;
 mod requests;

 #[derive(Debug)]
@@ -72,32 +75,29 @@ impl<T> RpcEvent<T> {

 pub type RpcResponseResult<T> = Result<(T, Duration), RpcResponseError>;

-/// Duration = latest seen timestamp of all received data columns
-pub type CustodyByRootResult<T> =
-    Result<(DataColumnSidecarList<T>, PeerGroup, Duration), RpcResponseError>;
+pub type RpcResponseBatchResult<T> = Result<(T, PeerGroup, Duration), RpcResponseError>;

-#[derive(Debug)]
+/// Duration = latest seen timestamp of all received data columns
+pub type CustodyByRootResult<T> = RpcResponseBatchResult<DataColumnSidecarList<T>>;
+
+pub type CustodyByRangeResult<T> = RpcResponseBatchResult<DataColumnSidecarList<T>>;
+
+#[derive(Debug, Clone)]
 pub enum RpcResponseError {
    RpcError(#[allow(dead_code)] RPCError),
    VerifyError(LookupVerifyError),
-    CustodyRequestError(#[allow(dead_code)] CustodyRequestError),
-    BlockComponentCouplingError(#[allow(dead_code)] String),
+    RequestExpired(String),
+    InternalError(#[allow(dead_code)] String),
 }

 #[derive(Debug, PartialEq, Eq)]
 pub enum RpcRequestSendError {
-    /// No peer available matching the required criteria
-    NoPeer(NoPeerError),
    /// These errors should never happen, including unreachable custody errors or network send
    /// errors.
    InternalError(String),
-}
-
-/// Type of peer missing that caused a `RpcRequestSendError::NoPeers`
-#[derive(Debug, PartialEq, Eq)]
-pub enum NoPeerError {
-    BlockPeer,
-    CustodyPeer(ColumnIndex),
+    // If RpcRequestSendError has a single variant `InternalError` it's to signal to downstream
+    // consumers that sends are expected to be infallible. If this assumption changes in the future,
+    // add a new variant.
 }

 #[derive(Debug, PartialEq, Eq)]
@@ -150,6 +150,17 @@ impl PeerGroup {
            }
        })
    }
+
+    pub fn as_reversed_map(&self) -> HashMap<u64, PeerId> {
+        // TODO(das): should we change PeerGroup to hold this map?
+        let mut index_to_peer = HashMap::<u64, PeerId>::new();
+        for (peer, indices) in self.peers.iter() {
+            for &index in indices {
+                index_to_peer.insert(index as u64, *peer);
+            }
+        }
+        index_to_peer
+    }
 }

 /// Sequential ID that uniquely identifies ReqResp outgoing requests
@@ -195,12 +206,15 @@ pub struct SyncNetworkContext<T: BeaconChainTypes> {
    data_columns_by_range_requests:
        ActiveRequests<DataColumnsByRangeRequestId, DataColumnsByRangeRequestItems<T::EthSpec>>,

-    /// Mapping of active custody column requests for a block root
-    custody_by_root_requests: FnvHashMap<CustodyRequester, ActiveCustodyRequest<T>>,
+    /// Mapping of active custody column by root requests for a block root
+    custody_by_root_requests: FnvHashMap<CustodyRequester, ActiveCustodyByRootRequest<T>>,
+
+    /// Mapping of active custody column by range requests
+    custody_by_range_requests: FnvHashMap<CustodyByRangeRequestId, ActiveCustodyByRangeRequest<T>>,

    /// BlocksByRange requests paired with other ByRange requests for data components
-    components_by_range_requests:
-        FnvHashMap<ComponentsByRangeRequestId, RangeBlockComponentsRequest<T::EthSpec>>,
+    block_components_by_range_requests:
+        FnvHashMap<ComponentsByRangeRequestId, BlockComponentsByRangeRequest<T>>,

    /// Whether the ee is online. If it's not, we don't allow access to the
    /// `beacon_processor_send`.
@@ -219,14 +233,17 @@ pub enum RangeBlockComponent<E: EthSpec> {
    Block(
        BlocksByRangeRequestId,
        RpcResponseResult<Vec<Arc<SignedBeaconBlock<E>>>>,
+        PeerId,
    ),
    Blob(
        BlobsByRangeRequestId,
        RpcResponseResult<Vec<Arc<BlobSidecar<E>>>>,
+        PeerId,
    ),
    CustodyColumns(
-        DataColumnsByRangeRequestId,
+        CustodyByRangeRequestId,
        RpcResponseResult<Vec<Arc<DataColumnSidecar<E>>>>,
+        PeerGroup,
    ),
 }

@@ -283,7 +300,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
            blobs_by_range_requests: ActiveRequests::new("blobs_by_range"),
            data_columns_by_range_requests: ActiveRequests::new("data_columns_by_range"),
            custody_by_root_requests: <_>::default(),
-            components_by_range_requests: FnvHashMap::default(),
+            custody_by_range_requests: <_>::default(),
+            block_components_by_range_requests: <_>::default(),
            network_beacon_processor,
            chain,
            fork_context,
@@ -297,6 +315,14 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {

    /// Returns the ids of all the requests made to the given peer_id.
    pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Vec<SyncRequestId> {
+        self.active_requests()
+            .filter(|(_, request_peer)| *request_peer == peer_id)
+            .map(|(id, _)| id)
+            .collect()
+    }
+
+    /// Returns the ids of all active requests
+    pub fn active_requests(&mut self) -> impl Iterator<Item = (SyncRequestId, &PeerId)> {
        // Note: using destructuring pattern without a default case to make sure we don't forget to
        // add new request types to this function. Otherwise, lookup sync can break and lookups
        // will get stuck if a peer disconnects during an active requests.
@@ -311,8 +337,9 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
            data_columns_by_range_requests,
            // custody_by_root_requests is a meta request of data_columns_by_root_requests
            custody_by_root_requests: _,
+            custody_by_range_requests: _,
            // components_by_range_requests is a meta request of various _by_range requests
-            components_by_range_requests: _,
+            block_components_by_range_requests: _,
            execution_engine_state: _,
            network_beacon_processor: _,
            chain: _,
@@ -320,29 +347,23 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        } = self;

        let blocks_by_root_ids = blocks_by_root_requests
-            .active_requests_of_peer(peer_id)
-            .into_iter()
-            .map(|id| SyncRequestId::SingleBlock { id: *id });
+            .active_requests()
+            .map(|(id, peer)| (SyncRequestId::SingleBlock { id: *id }, peer));
        let blobs_by_root_ids = blobs_by_root_requests
-            .active_requests_of_peer(peer_id)
-            .into_iter()
-            .map(|id| SyncRequestId::SingleBlob { id: *id });
+            .active_requests()
+            .map(|(id, peer)| (SyncRequestId::SingleBlob { id: *id }, peer));
        let data_column_by_root_ids = data_columns_by_root_requests
-            .active_requests_of_peer(peer_id)
-            .into_iter()
-            .map(|req_id| SyncRequestId::DataColumnsByRoot(*req_id));
+            .active_requests()
+            .map(|(id, peer)| (SyncRequestId::DataColumnsByRoot(*id), peer));
        let blocks_by_range_ids = blocks_by_range_requests
-            .active_requests_of_peer(peer_id)
-            .into_iter()
-            .map(|req_id| SyncRequestId::BlocksByRange(*req_id));
+            .active_requests()
+            .map(|(id, peer)| (SyncRequestId::BlocksByRange(*id), peer));
        let blobs_by_range_ids = blobs_by_range_requests
-            .active_requests_of_peer(peer_id)
-            .into_iter()
-            .map(|req_id| SyncRequestId::BlobsByRange(*req_id));
+            .active_requests()
+            .map(|(id, peer)| (SyncRequestId::BlobsByRange(*id), peer));
        let data_column_by_range_ids = data_columns_by_range_requests
-            .active_requests_of_peer(peer_id)
-            .into_iter()
-            .map(|req_id| SyncRequestId::DataColumnsByRange(*req_id));
+            .active_requests()
+            .map(|(id, peer)| (SyncRequestId::DataColumnsByRange(*id), peer));

        blocks_by_root_ids
            .chain(blobs_by_root_ids)
@@ -350,6 +371,18 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
            .chain(blocks_by_range_ids)
            .chain(blobs_by_range_ids)
            .chain(data_column_by_range_ids)
+    }
+
+    #[cfg(test)]
+    pub fn active_block_components_by_range_requests(
+        &self,
+    ) -> Vec<(
+        ComponentsByRangeRequestId,
+        BlockComponentsByRangeRequestStep,
+    )> {
+        self.block_components_by_range_requests
+            .iter()
+            .map(|(id, req)| (*id, req.state_step()))
            .collect()
    }

@@ -362,6 +395,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        &self.network_beacon_processor.network_globals
    }

+    pub fn spec(&self) -> &ChainSpec {
+        &self.chain.spec
+    }
+
    /// Returns the Client type of the peer if known
    pub fn client_type(&self, peer_id: &PeerId) -> Client {
        self.network_globals()
@@ -414,8 +451,9 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
            data_columns_by_range_requests,
            // custody_by_root_requests is a meta request of data_columns_by_root_requests
            custody_by_root_requests: _,
+            custody_by_range_requests: _,
            // components_by_range_requests is a meta request of various _by_range requests
-            components_by_range_requests: _,
+            block_components_by_range_requests: _,
            execution_engine_state: _,
            network_beacon_processor: _,
            chain: _,
@@ -447,205 +485,95 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        requester: RangeRequestId,
        peers: &HashSet<PeerId>,
        peers_to_deprioritize: &HashSet<PeerId>,
+        total_requests_per_peer: &HashMap<PeerId, usize>,
    ) -> Result<Id, RpcRequestSendError> {
-        let batch_epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch());
-        let batch_type = self.batch_type(batch_epoch);
-
-        let active_request_count_by_peer = self.active_request_count_by_peer();
-
-        let Some(block_peer) = peers
-            .iter()
-            .map(|peer| {
-                (
-                    // If contains -> 1 (order after), not contains -> 0 (order first)
-                    peers_to_deprioritize.contains(peer),
-                    // Prefer peers with less overall requests
-                    active_request_count_by_peer.get(peer).copied().unwrap_or(0),
-                    // Random factor to break ties, otherwise the PeerID breaks ties
-                    rand::random::<u32>(),
-                    peer,
-                )
-            })
-            .min()
-            .map(|(_, _, _, peer)| *peer)
-        else {
-            // Backfill and forward sync handle this condition gracefully.
-            // - Backfill sync: will pause waiting for more peers to join
-            // - Forward sync: can never happen as the chain is dropped when removing the last peer.
-            return Err(RpcRequestSendError::NoPeer(NoPeerError::BlockPeer));
-        };
-
-        // Attempt to find all required custody peers before sending any request or creating an ID
-        let columns_by_range_peers_to_request =
-            if matches!(batch_type, ByRangeRequestType::BlocksAndColumns) {
-                let column_indexes = self.network_globals().sampling_columns.clone();
-                Some(self.select_columns_by_range_peers_to_request(
-                    &column_indexes,
-                    peers,
-                    active_request_count_by_peer,
-                    peers_to_deprioritize,
-                )?)
-            } else {
-                None
-            };
-
-        // Create the overall components_by_range request ID before its individual components
        let id = ComponentsByRangeRequestId {
            id: self.next_id(),
            requester,
        };

-        let blocks_req_id = self.send_blocks_by_range_request(block_peer, request.clone(), id)?;
+        let req = BlockComponentsByRangeRequest::new(
+            id,
+            request,
+            peers,
+            peers_to_deprioritize,
+            total_requests_per_peer,
+            self,
+        )?;

-        let blobs_req_id = if matches!(batch_type, ByRangeRequestType::BlocksAndBlobs) {
-            Some(self.send_blobs_by_range_request(
-                block_peer,
-                BlobsByRangeRequest {
-                    start_slot: *request.start_slot(),
-                    count: *request.count(),
-                },
-                id,
-            )?)
-        } else {
-            None
-        };
-
-        let data_column_requests = columns_by_range_peers_to_request
-            .map(|columns_by_range_peers_to_request| {
-                let column_to_peer_map = columns_by_range_peers_to_request
-                    .iter()
-                    .flat_map(|(peer_id, columns)| columns.iter().map(|column| (*column, *peer_id)))
-                    .collect::<HashMap<ColumnIndex, PeerId>>();
-
-                let requests = columns_by_range_peers_to_request
-                    .into_iter()
-                    .map(|(peer_id, columns)| {
-                        self.send_data_columns_by_range_request(
-                            peer_id,
-                            DataColumnsByRangeRequest {
-                                start_slot: *request.start_slot(),
-                                count: *request.count(),
-                                columns,
-                            },
-                            id,
-                        )
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                Ok((requests, column_to_peer_map))
-            })
-            .transpose()?;
-
-        let info =
-            RangeBlockComponentsRequest::new(blocks_req_id, blobs_req_id, data_column_requests);
-        self.components_by_range_requests.insert(id, info);
+        self.block_components_by_range_requests.insert(id, req);

+        // TODO: use ID
        Ok(id.id)
    }

-    fn select_columns_by_range_peers_to_request(
-        &self,
-        custody_indexes: &HashSet<ColumnIndex>,
-        peers: &HashSet<PeerId>,
-        active_request_count_by_peer: HashMap<PeerId, usize>,
-        peers_to_deprioritize: &HashSet<PeerId>,
-    ) -> Result<HashMap<PeerId, Vec<ColumnIndex>>, RpcRequestSendError> {
-        let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
-
-        for column_index in custody_indexes {
-            // Strictly consider peers that are custodials of this column AND are part of this
-            // syncing chain. If the forward range sync chain has few peers, it's likely that this
-            // function will not be able to find peers on our custody columns.
-            let Some(custody_peer) = peers
-                .iter()
-                .filter(|peer| {
-                    self.network_globals()
-                        .is_custody_peer_of(*column_index, peer)
-                })
-                .map(|peer| {
-                    (
-                        // If contains -> 1 (order after), not contains -> 0 (order first)
-                        peers_to_deprioritize.contains(peer),
-                        // Prefer peers with less overall requests
-                        // Also account for requests that are not yet issued tracked in peer_id_to_request_map
-                        // We batch requests to the same peer, so count existance in the
-                        // `columns_to_request_by_peer` as a single 1 request.
-                        active_request_count_by_peer.get(peer).copied().unwrap_or(0)
-                            + columns_to_request_by_peer.get(peer).map(|_| 1).unwrap_or(0),
-                        // Random factor to break ties, otherwise the PeerID breaks ties
-                        rand::random::<u32>(),
-                        peer,
-                    )
-                })
-                .min()
-                .map(|(_, _, _, peer)| *peer)
-            else {
-                // TODO(das): this will be pretty bad UX. To improve we should:
-                // - Handle the no peers case gracefully, maybe add some timeout and give a few
-                //   minutes / seconds to the peer manager to locate peers on this subnet before
-                //   abandoing progress on the chain completely.
-                return Err(RpcRequestSendError::NoPeer(NoPeerError::CustodyPeer(
-                    *column_index,
-                )));
-            };
-
-            columns_to_request_by_peer
-                .entry(custody_peer)
-                .or_default()
-                .push(*column_index);
-        }
-
-        Ok(columns_to_request_by_peer)
-    }
-
-    /// Received a _by_range response for a request that couples blocks and its data
-    ///
-    /// `peer_id` is the peer that served this individual RPC _by_range response.
+    /// Received a blocks by range or blobs by range response for a request that couples blocks '
+    /// and blobs.
    #[allow(clippy::type_complexity)]
-    pub fn range_block_component_response(
+    pub fn on_block_components_by_range_response(
        &mut self,
        id: ComponentsByRangeRequestId,
-        peer_id: PeerId,
        range_block_component: RangeBlockComponent<T::EthSpec>,
    ) -> Option<Result<(Vec<RpcBlock<T::EthSpec>>, BatchPeers), RpcResponseError>> {
-        let Entry::Occupied(mut entry) = self.components_by_range_requests.entry(id) else {
-            metrics::inc_counter_vec(&metrics::SYNC_UNKNOWN_NETWORK_REQUESTS, &["range_blocks"]);
+        // Note: need to remove the request to borrow self again below. Otherwise we can't
+        // do nested requests
+        let Some(mut request) = self.block_components_by_range_requests.remove(&id) else {
+            metrics::inc_counter_vec(
+                &metrics::SYNC_UNKNOWN_NETWORK_REQUESTS,
+                &["block_components_by_range"],
+            );
            return None;
        };

-        if let Err(e) = {
-            let request = entry.get_mut();
-            match range_block_component {
-                RangeBlockComponent::Block(req_id, resp) => resp.and_then(|(blocks, _)| {
+        let result = match range_block_component {
+            RangeBlockComponent::Block(req_id, resp, peer_id) => resp.and_then(|(blocks, _)| {
+                request
+                    .on_blocks_by_range_result(req_id, blocks, peer_id, self)
+                    .map_err(Into::<RpcResponseError>::into)
+            }),
+            RangeBlockComponent::Blob(req_id, resp, peer_id) => resp.and_then(|(blobs, _)| {
+                request
+                    .on_blobs_by_range_result(req_id, blobs, peer_id, self)
+                    .map_err(Into::<RpcResponseError>::into)
+            }),
+            RangeBlockComponent::CustodyColumns(req_id, resp, peers) => {
+                resp.and_then(|(custody_columns, _)| {
                    request
-                        .add_blocks(req_id, blocks, peer_id)
-                        .map_err(RpcResponseError::BlockComponentCouplingError)
-                }),
-                RangeBlockComponent::Blob(req_id, resp) => resp.and_then(|(blobs, _)| {
-                    request
-                        .add_blobs(req_id, blobs, peer_id)
-                        .map_err(RpcResponseError::BlockComponentCouplingError)
-                }),
-                RangeBlockComponent::CustodyColumns(req_id, resp) => {
-                    resp.and_then(|(custody_columns, _)| {
-                        request
-                            .add_custody_columns(req_id, custody_columns, peer_id)
-                            .map_err(RpcResponseError::BlockComponentCouplingError)
-                    })
-                }
+                        .on_custody_by_range_result(req_id, custody_columns, peers, self)
+                        .map_err(Into::<RpcResponseError>::into)
+                })
            }
-        } {
-            entry.remove();
-            return Some(Err(e));
-        }
+        };

-        if let Some(blocks_result) = entry.get().responses(&self.chain.spec) {
-            entry.remove();
-            // If the request is finished, dequeue everything
-            Some(blocks_result.map_err(RpcResponseError::BlockComponentCouplingError))
-        } else {
-            None
+        let result = result.transpose();
+
+        // Convert a result from internal format of `ActiveCustodyRequest` (error first to use ?) to
+        // an Option first to use in an `if let Some() { act on result }` block.
+        match result.as_ref() {
+            Some(Ok((blocks, peer_group))) => {
+                let blocks_with_data = blocks
+                    .iter()
+                    .filter(|block| block.as_block().has_data())
+                    .count();
+                // Don't log the peer_group here, it's very long (could be up to 128 peers). If you
+                // want to trace which peer sent the column at index X, search for the log:
+                // `Sync RPC request sent method="DataColumnsByRange" ...`
+                debug!(
+                    %id,
+                    blocks = blocks.len(),
+                    blocks_with_data,
+                    block_peer = ?peer_group.block(),
+                    "Block components by range request success, removing"
+                )
+            }
+            Some(Err(e)) => {
+                debug!(%id, error = ?e, "Block components by range request failure, removing" )
+            }
+            None => {
+                self.block_components_by_range_requests.insert(id, request);
+            }
        }
+        result
    }

    /// Request block of `block_root` if necessary by checking:
@@ -853,7 +781,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
    }

    /// Request to send a single `data_columns_by_root` request to the network.
-    pub fn data_column_lookup_request(
+    pub fn data_columns_by_root_request(
        &mut self,
        requester: DataColumnsByRootRequester,
        peer_id: PeerId,
@@ -951,7 +879,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        );

        let requester = CustodyRequester(id);
-        let mut request = ActiveCustodyRequest::new(
+        let mut request = ActiveCustodyByRootRequest::new(
            block_root,
            CustodyId { requester },
            &custody_indexes_to_fetch,
@@ -967,25 +895,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                self.custody_by_root_requests.insert(requester, request);
                Ok(LookupRequestResult::RequestSent(id.req_id))
            }
-            Err(e) => Err(match e {
-                CustodyRequestError::NoPeer(column_index) => {
-                    RpcRequestSendError::NoPeer(NoPeerError::CustodyPeer(column_index))
-                }
-                // - TooManyFailures: Should never happen, `request` has just been created, it's
-                //   count of download_failures is 0 here
-                // - BadState: Should never happen, a bad state can only happen when handling a
-                //   network response
-                // - UnexpectedRequestId: Never happens: this Err is only constructed handling a
-                //   download or processing response
-                // - SendFailed: Should never happen unless in a bad drop sequence when shutting
-                //   down the node
-                e @ (CustodyRequestError::TooManyFailures
-                | CustodyRequestError::BadState { .. }
-                | CustodyRequestError::UnexpectedRequestId { .. }
-                | CustodyRequestError::SendFailed { .. }) => {
-                    RpcRequestSendError::InternalError(format!("{e:?}"))
-                }
-            }),
+            Err(e) => Err(e.into()),
        }
    }

@@ -1073,8 +983,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        &mut self,
        peer_id: PeerId,
        request: DataColumnsByRangeRequest,
-        parent_request_id: ComponentsByRangeRequestId,
-    ) -> Result<DataColumnsByRangeRequestId, RpcRequestSendError> {
+        parent_request_id: CustodyByRangeRequestId,
+    ) -> Result<DataColumnsByRangeRequestId, &'static str> {
        let id = DataColumnsByRangeRequestId {
            id: self.next_id(),
            parent_request_id,
@@ -1085,7 +995,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
            request: RequestType::DataColumnsByRange(request.clone()),
            app_request_id: AppRequestId::Sync(SyncRequestId::DataColumnsByRange(id)),
        })
-        .map_err(|_| RpcRequestSendError::InternalError("network send error".to_owned()))?;
+        .map_err(|_| "network send error")?;

        debug!(
            method = "DataColumnsByRange",
@@ -1108,6 +1018,50 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        Ok(id)
    }

+    /// Request to fetch all needed custody columns of a range of slot. This function may not send
+    /// any request to the network if no columns have to be fetched based on the import state of the
+    /// node. A custody request is a "super request" that may trigger 0 or more `data_columns_by_range`
+    /// requests.
+    pub fn send_custody_by_range_request(
+        &mut self,
+        parent_id: ComponentsByRangeRequestId,
+        blocks_with_data: Vec<SignedBeaconBlockHeader>,
+        epoch: Epoch,
+        column_indices: Vec<ColumnIndex>,
+        lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
+    ) -> Result<CustodyByRangeRequestId, RpcRequestSendError> {
+        let id = CustodyByRangeRequestId {
+            id: self.next_id(),
+            parent_request_id: parent_id,
+        };
+
+        debug!(
+            indices = ?column_indices,
+            %id,
+            "Starting custody columns by range request"
+        );
+
+        let mut request = ActiveCustodyByRangeRequest::new(
+            id,
+            epoch,
+            blocks_with_data,
+            &column_indices,
+            lookup_peers,
+        );
+
+        // Note that you can only send, but not handle a response here
+        match request.continue_requests(self) {
+            Ok(_) => {
+                // Ignoring the result of `continue_requests` is okay. A request that has just been
+                // created cannot return data immediately, it must send some request to the network
+                // first. And there must exist some request, `custody_indexes_to_fetch` is not empty.
+                self.custody_by_range_requests.insert(id, request);
+                Ok(id)
+            }
+            Err(e) => Err(e.into()),
+        }
+    }
+
    pub fn is_execution_engine_online(&self) -> bool {
        self.execution_engine_state == EngineState::Online
    }
@@ -1212,34 +1166,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        id
    }

-    /// Check whether a batch for this epoch (and only this epoch) should request just blocks or
-    /// blocks and blobs.
-    fn batch_type(&self, epoch: types::Epoch) -> ByRangeRequestType {
-        // Induces a compile time panic if this doesn't hold true.
-        #[allow(clippy::assertions_on_constants)]
-        const _: () = assert!(
-            super::backfill_sync::BACKFILL_EPOCHS_PER_BATCH == 1
-                && super::range_sync::EPOCHS_PER_BATCH == 1,
-            "To deal with alignment with deneb boundaries, batches need to be of just one epoch"
-        );
-
-        if self
-            .chain
-            .data_availability_checker
-            .data_columns_required_for_epoch(epoch)
-        {
-            ByRangeRequestType::BlocksAndColumns
-        } else if self
-            .chain
-            .data_availability_checker
-            .blobs_required_for_epoch(epoch)
-        {
-            ByRangeRequestType::BlocksAndBlobs
-        } else {
-            ByRangeRequestType::Blocks
-        }
-    }
-
    /// Attempt to make progress on all custody_by_root requests. Some request may be stale waiting
    /// for custody peers. Returns a Vec of results as zero or more requests may fail in this
    /// attempt.
@@ -1266,6 +1192,32 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
            .collect()
    }

+    /// Attempt to make progress on all custody_by_range requests. Some request may be stale waiting
+    /// for custody peers. Returns a Vec of results as zero or more requests may fail in this
+    /// attempt.
+    pub fn continue_custody_by_range_requests(
+        &mut self,
+    ) -> Vec<(CustodyByRangeRequestId, CustodyByRangeResult<T::EthSpec>)> {
+        let ids = self
+            .custody_by_range_requests
+            .keys()
+            .copied()
+            .collect::<Vec<_>>();
+
+        // Need to collect ids and results in separate steps to re-borrow self.
+        ids.into_iter()
+            .filter_map(|id| {
+                let mut request = self
+                    .custody_by_range_requests
+                    .remove(&id)
+                    .expect("key of hashmap");
+                let result = request.continue_requests(self);
+                self.handle_custody_by_range_result(id, request, result)
+                    .map(|result| (id, result))
+            })
+            .collect()
+    }
+
    // Request handlers

    pub(crate) fn on_single_block_response(
@@ -1425,8 +1377,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        // Note: need to remove the request to borrow self again below. Otherwise we can't
        // do nested requests
        let Some(mut request) = self.custody_by_root_requests.remove(&id.requester) else {
-            // TOOD(das): This log can happen if the request is error'ed early and dropped
-            debug!(?id, "Custody column downloaded event for unknown request");
+            metrics::inc_counter_vec(
+                &metrics::SYNC_UNKNOWN_NETWORK_REQUESTS,
+                &["custody_by_root"],
+            );
            return None;
        };

@@ -1438,8 +1392,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
    fn handle_custody_by_root_result(
        &mut self,
        id: CustodyRequester,
-        request: ActiveCustodyRequest<T>,
-        result: CustodyRequestResult<T::EthSpec>,
+        request: ActiveCustodyByRootRequest<T>,
+        result: CustodyByRootRequestResult<T::EthSpec>,
    ) -> Option<CustodyByRootResult<T::EthSpec>> {
        let span = span!(
            Level::INFO,
@@ -1448,18 +1402,16 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        );
        let _enter = span.enter();

-        let result = result
-            .map_err(RpcResponseError::CustodyRequestError)
-            .transpose();
+        let result = result.map_err(Into::<RpcResponseError>::into).transpose();

        // Convert a result from internal format of `ActiveCustodyRequest` (error first to use ?) to
        // an Option first to use in an `if let Some() { act on result }` block.
        match result.as_ref() {
            Some(Ok((columns, peer_group, _))) => {
-                debug!(?id, count = columns.len(), peers = ?peer_group, "Custody request success, removing")
+                debug!(%id, count = columns.len(), peers = ?peer_group, "Custody by root request success, removing")
            }
            Some(Err(e)) => {
-                debug!(?id, error = ?e, "Custody request failure, removing" )
+                debug!(%id, error = ?e, "Custody by root request failure, removing" )
            }
            None => {
                self.custody_by_root_requests.insert(id, request);
@@ -1468,6 +1420,61 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        result
    }

+    /// Insert a downloaded column into an active custody request. Then make progress on the
+    /// entire request.
+    ///
+    /// ### Returns
+    ///
+    /// - `Some`: Request completed, won't make more progress. Expect requester to act on the result.
+    /// - `None`: Request still active, requester should do no action
+    #[allow(clippy::type_complexity)]
+    pub fn on_custody_by_range_response(
+        &mut self,
+        id: CustodyByRangeRequestId,
+        req_id: DataColumnsByRangeRequestId,
+        peer_id: PeerId,
+        resp: RpcResponseResult<Vec<Arc<DataColumnSidecar<T::EthSpec>>>>,
+    ) -> Option<CustodyByRootResult<T::EthSpec>> {
+        // Note: need to remove the request to borrow self again below. Otherwise we can't
+        // do nested requests
+        let Some(mut request) = self.custody_by_range_requests.remove(&id) else {
+            // TOOD(das): This log can happen if the request is error'ed early and dropped
+            debug!(%id, "Custody by range downloaded event for unknown request");
+            return None;
+        };
+
+        let result = request.on_data_column_downloaded(peer_id, req_id, resp, self);
+
+        self.handle_custody_by_range_result(id, request, result)
+    }
+
+    fn handle_custody_by_range_result(
+        &mut self,
+        id: CustodyByRangeRequestId,
+        request: ActiveCustodyByRangeRequest<T>,
+        result: CustodyByRangeRequestResult<T::EthSpec>,
+    ) -> Option<CustodyByRangeResult<T::EthSpec>> {
+        let result = result.map_err(Into::<RpcResponseError>::into).transpose();
+
+        // Convert a result from internal format of `ActiveCustodyRequest` (error first to use ?) to
+        // an Option first to use in an `if let Some() { act on result }` block.
+        match result.as_ref() {
+            Some(Ok((columns, _peer_group, _))) => {
+                // Don't log the peer_group here, it's very long (could be up to 128 peers). If you
+                // want to trace which peer sent the column at index X, search for the log:
+                // `Sync RPC request sent method="DataColumnsByRange" ...`
+                debug!(%id, count = columns.len(), "Custody by range request success, removing")
+            }
+            Some(Err(e)) => {
+                debug!(%id, error = ?e, "Custody by range request failure, removing" )
+            }
+            None => {
+                self.custody_by_range_requests.insert(id, request);
+            }
+        }
+        result
+    }
+
    pub fn send_block_for_processing(
        &self,
        id: Id,
@@ -1529,7 +1536,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
            .beacon_processor_if_enabled()
            .ok_or(SendErrorProcessor::ProcessorNotAvailable)?;

-        debug!(?block_root, ?id, "Sending blobs for processing");
+        debug!(?block_root, %id, "Sending blobs for processing");
        // Lookup sync event safety: If `beacon_processor.send_rpc_blobs` returns Ok() sync
        // must receive a single `SyncMessage::BlockComponentProcessed` event with this process type
        beacon_processor
@@ -1600,8 +1607,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
            ),
            ("custody_by_root", self.custody_by_root_requests.len()),
            (
-                "components_by_range",
-                self.components_by_range_requests.len(),
+                "block_components_by_range",
+                self.block_components_by_range_requests.len(),
            ),
        ] {
            metrics::set_gauge_vec(&metrics::SYNC_ACTIVE_NETWORK_REQUESTS, &[id], count as i64);
--- a/beacon_node/network/src/sync/network_context/block_components_by_range.rs
+++ b/beacon_node/network/src/sync/network_context/block_components_by_range.rs
@@ -0,0 +1,550 @@
+use crate::sync::network_context::{
+    PeerGroup, RpcRequestSendError, RpcResponseError, SyncNetworkContext,
+};
+use crate::sync::range_sync::BatchPeers;
+use beacon_chain::block_verification_types::RpcBlock;
+use beacon_chain::data_column_verification::CustodyDataColumn;
+use beacon_chain::{get_block_root, BeaconChainTypes};
+use lighthouse_network::rpc::methods::{BlobsByRangeRequest, BlocksByRangeRequest};
+use lighthouse_network::service::api_types::{
+    BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
+    CustodyByRangeRequestId,
+};
+use lighthouse_network::PeerId;
+use parking_lot::RwLock;
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+use types::{
+    BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, EthSpec, Hash256, RuntimeVariableList,
+    SignedBeaconBlock, Slot,
+};
+
+pub struct BlockComponentsByRangeRequest<T: BeaconChainTypes> {
+    id: ComponentsByRangeRequestId,
+    peers: Arc<RwLock<HashSet<PeerId>>>,
+    request: BlocksByRangeRequest,
+    state: State<T::EthSpec>,
+}
+
+enum State<E: EthSpec> {
+    Base {
+        blocks_by_range_request:
+            ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
+    },
+    // Two single concurrent requests for block + blobs
+    DenebEnabled {
+        blocks_by_range_request:
+            ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
+        blobs_by_range_request: ByRangeRequest<BlobsByRangeRequestId, Vec<Arc<BlobSidecar<E>>>>,
+    },
+    // Request blocks first, then columns
+    FuluEnabled(FuluEnabledState<E>),
+}
+
+enum FuluEnabledState<E: EthSpec> {
+    BlockRequest {
+        blocks_by_range_request:
+            ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
+    },
+    CustodyRequest {
+        blocks: Vec<Arc<SignedBeaconBlock<E>>>,
+        block_peer: PeerId,
+        custody_by_range_request:
+            ByRangeRequest<CustodyByRangeRequestId, Vec<Arc<DataColumnSidecar<E>>>, PeerGroup>,
+    },
+}
+
+enum ByRangeRequest<I: PartialEq + std::fmt::Display, T, P = PeerId> {
+    /// Active(RequestIndex)
+    Active(I),
+    /// Complete(DownloadedData, Peers)
+    Complete(T, P),
+}
+
+pub type BlockComponentsByRangeRequestResult<E> =
+    Result<Option<(Vec<RpcBlock<E>>, BatchPeers)>, Error>;
+
+pub enum Error {
+    InternalError(String),
+}
+
+impl From<Error> for RpcResponseError {
+    fn from(e: Error) -> Self {
+        match e {
+            Error::InternalError(e) => RpcResponseError::InternalError(e),
+        }
+    }
+}
+
+impl From<Error> for RpcRequestSendError {
+    fn from(e: Error) -> Self {
+        match e {
+            Error::InternalError(e) => RpcRequestSendError::InternalError(e),
+        }
+    }
+}
+
+/// FOR TESTING ONLY
+#[cfg(test)]
+#[derive(Debug)]
+pub enum BlockComponentsByRangeRequestStep {
+    BlocksRequest,
+    CustodyRequest,
+}
+
+impl<T: BeaconChainTypes> BlockComponentsByRangeRequest<T> {
+    pub fn new(
+        id: ComponentsByRangeRequestId,
+        request: BlocksByRangeRequest,
+        peers: &HashSet<PeerId>,
+        peers_to_deprioritize: &HashSet<PeerId>,
+        total_requests_per_peer: &HashMap<PeerId, usize>,
+        cx: &mut SyncNetworkContext<T>,
+    ) -> Result<Self, RpcRequestSendError> {
+        // Induces a compile time panic if this doesn't hold true.
+        #[allow(clippy::assertions_on_constants)]
+        const _: () = assert!(
+            super::super::backfill_sync::BACKFILL_EPOCHS_PER_BATCH == 1
+                && super::super::range_sync::EPOCHS_PER_BATCH == 1,
+            "To deal with alignment with deneb boundaries, batches need to be of just one epoch"
+        );
+        // The assertion above ensures each batch is in one single epoch
+        let batch_epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch());
+        let batch_fork = cx.spec().fork_name_at_epoch(batch_epoch);
+
+        // TODO(das): a change of behaviour here is that if the SyncingChain has a single peer we
+        // will request all blocks for the first 5 epochs to that same single peer. Before we would
+        // query only idle peers in the syncing chain.
+        let Some(block_peer) = peers
+            .iter()
+            .map(|peer| {
+                (
+                    // If contains -> 1 (order after), not contains -> 0 (order first)
+                    peers_to_deprioritize.contains(peer),
+                    // TODO(das): Should we use active_request_count_by_peer?
+                    // Prefer peers with less overall requests
+                    // active_request_count_by_peer.get(peer).copied().unwrap_or(0),
+                    // Prefer peers with less total cummulative requests, so we fetch data from a
+                    // diverse set of peers
+                    total_requests_per_peer.get(peer).copied().unwrap_or(0),
+                    // Random factor to break ties, otherwise the PeerID breaks ties
+                    rand::random::<u32>(),
+                    peer,
+                )
+            })
+            .min()
+            .map(|(_, _, _, peer)| *peer)
+        else {
+            // When a peer disconnects and is removed from the SyncingChain peer set, if the set
+            // reaches zero the SyncingChain is removed.
+            // TODO(das): add test for this.
+            return Err(RpcRequestSendError::InternalError(
+                "A batch peer set should never be empty".to_string(),
+            ));
+        };
+
+        let blocks_req_id = cx.send_blocks_by_range_request(block_peer, request.clone(), id)?;
+
+        let state = if batch_fork.fulu_enabled() {
+            State::FuluEnabled(FuluEnabledState::BlockRequest {
+                blocks_by_range_request: ByRangeRequest::Active(blocks_req_id),
+            })
+        } else if batch_fork.deneb_enabled() {
+            // TODO(deneb): is it okay to send blobs_by_range requests outside the DA window? I
+            // would like the beacon processor / da_checker to be the one that decides if an
+            // RpcBlock is valid or not with respect to containing blobs. Having sync not even
+            // attempt a requests seems like an added limitation.
+            let blobs_req_id = cx.send_blobs_by_range_request(
+                block_peer,
+                BlobsByRangeRequest {
+                    start_slot: *request.start_slot(),
+                    count: *request.count(),
+                },
+                id,
+            )?;
+            State::DenebEnabled {
+                blocks_by_range_request: ByRangeRequest::Active(blocks_req_id),
+                blobs_by_range_request: ByRangeRequest::Active(blobs_req_id),
+            }
+        } else {
+            State::Base {
+                blocks_by_range_request: ByRangeRequest::Active(blocks_req_id),
+            }
+        };
+
+        Ok(Self {
+            id,
+            // TODO(das): share the rwlock with the range sync batch. Are peers added to the batch
+            // after being created?
+            peers: Arc::new(RwLock::new(peers.clone())),
+            request,
+            state,
+        })
+    }
+
+    pub fn continue_requests(
+        &mut self,
+        cx: &mut SyncNetworkContext<T>,
+    ) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
+        match &mut self.state {
+            State::Base {
+                blocks_by_range_request,
+            } => {
+                if let Some((blocks, block_peer)) = blocks_by_range_request.to_finished() {
+                    // TODO(das): use the peer group
+                    let peer_group = BatchPeers::new_from_block_peer(*block_peer);
+                    let rpc_blocks = couple_blocks_base(
+                        blocks.to_vec(),
+                        cx.network_globals().sampling_columns.len(),
+                    );
+                    Ok(Some((rpc_blocks, peer_group)))
+                } else {
+                    // Wait for blocks_by_range requests to complete
+                    Ok(None)
+                }
+            }
+            State::DenebEnabled {
+                blocks_by_range_request,
+                blobs_by_range_request,
+            } => {
+                if let (Some((blocks, block_peer)), Some((blobs, _))) = (
+                    blocks_by_range_request.to_finished(),
+                    blobs_by_range_request.to_finished(),
+                ) {
+                    // We use the same block_peer for the blobs request
+                    let peer_group = BatchPeers::new_from_block_peer(*block_peer);
+                    let rpc_blocks =
+                        couple_blocks_deneb(blocks.to_vec(), blobs.to_vec(), cx.spec())?;
+                    Ok(Some((rpc_blocks, peer_group)))
+                } else {
+                    // Wait for blocks_by_range and blobs_by_range requests to complete
+                    Ok(None)
+                }
+            }
+            State::FuluEnabled(state) => match state {
+                FuluEnabledState::BlockRequest {
+                    blocks_by_range_request,
+                } => {
+                    if let Some((blocks, block_peer)) = blocks_by_range_request.to_finished() {
+                        // TODO(das): use the peer group
+                        let blocks_with_data = blocks
+                            .iter()
+                            .filter(|block| block.has_data())
+                            .map(|block| block.signed_block_header())
+                            .collect::<Vec<_>>();
+
+                        if blocks_with_data.is_empty() {
+                            let custody_column_indices = cx
+                                .network_globals()
+                                .sampling_columns
+                                .clone()
+                                .iter()
+                                .copied()
+                                .collect();
+
+                            // Done, we got blocks and no columns needed
+                            let peer_group = BatchPeers::new_from_block_peer(*block_peer);
+                            let rpc_blocks = couple_blocks_fulu(
+                                blocks.to_vec(),
+                                vec![],
+                                custody_column_indices,
+                                cx.spec(),
+                            )?;
+                            Ok(Some((rpc_blocks, peer_group)))
+                        } else {
+                            let mut column_indices = cx
+                                .network_globals()
+                                .sampling_columns
+                                .clone()
+                                .iter()
+                                .copied()
+                                .collect::<Vec<_>>();
+                            column_indices.sort_unstable();
+
+                            let req_id = cx
+                                .send_custody_by_range_request(
+                                    self.id,
+                                    blocks_with_data,
+                                    Slot::new(*self.request.start_slot())
+                                        .epoch(T::EthSpec::slots_per_epoch()),
+                                    column_indices,
+                                    self.peers.clone(),
+                                )
+                                .map_err(|e| match e {
+                                    RpcRequestSendError::InternalError(e) => {
+                                        Error::InternalError(e)
+                                    }
+                                })?;
+
+                            *state = FuluEnabledState::CustodyRequest {
+                                blocks: blocks.to_vec(),
+                                block_peer: *block_peer,
+                                custody_by_range_request: ByRangeRequest::Active(req_id),
+                            };
+
+                            // Wait for the new custody_by_range request to complete
+                            Ok(None)
+                        }
+                    } else {
+                        // Wait for the block request to complete
+                        Ok(None)
+                    }
+                }
+                FuluEnabledState::CustodyRequest {
+                    blocks,
+                    block_peer,
+                    custody_by_range_request,
+                } => {
+                    if let Some((columns, column_peers)) = custody_by_range_request.to_finished() {
+                        let custody_column_indices = cx
+                            .network_globals()
+                            .sampling_columns
+                            .clone()
+                            .iter()
+                            .copied()
+                            .collect();
+
+                        let peer_group =
+                            BatchPeers::new(*block_peer, column_peers.as_reversed_map());
+                        let rpc_blocks = couple_blocks_fulu(
+                            blocks.to_vec(),
+                            columns.to_vec(),
+                            custody_column_indices,
+                            cx.spec(),
+                        )?;
+                        Ok(Some((rpc_blocks, peer_group)))
+                    } else {
+                        // Wait for the custody_by_range request to complete
+                        Ok(None)
+                    }
+                }
+            },
+        }
+    }
+
+    pub fn on_blocks_by_range_result(
+        &mut self,
+        id: BlocksByRangeRequestId,
+        data: Vec<Arc<SignedBeaconBlock<T::EthSpec>>>,
+        peer_id: PeerId,
+        cx: &mut SyncNetworkContext<T>,
+    ) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
+        match &mut self.state {
+            State::Base {
+                blocks_by_range_request,
+            }
+            | State::DenebEnabled {
+                blocks_by_range_request,
+                ..
+            }
+            | State::FuluEnabled(FuluEnabledState::BlockRequest {
+                blocks_by_range_request,
+            }) => {
+                blocks_by_range_request.finish(id, data, peer_id)?;
+            }
+            State::FuluEnabled(FuluEnabledState::CustodyRequest { .. }) => {
+                return Err(Error::InternalError(
+                    "Received blocks_by_range response expecting custody_by_range".to_string(),
+                ))
+            }
+        }
+
+        self.continue_requests(cx)
+    }
+
+    pub fn on_blobs_by_range_result(
+        &mut self,
+        id: BlobsByRangeRequestId,
+        data: Vec<Arc<BlobSidecar<T::EthSpec>>>,
+        peer_id: PeerId,
+        cx: &mut SyncNetworkContext<T>,
+    ) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
+        match &mut self.state {
+            State::Base { .. } => {
+                return Err(Error::InternalError(
+                    "Received blobs_by_range response before Deneb".to_string(),
+                ))
+            }
+            State::DenebEnabled {
+                blobs_by_range_request,
+                ..
+            } => {
+                blobs_by_range_request.finish(id, data, peer_id)?;
+            }
+            State::FuluEnabled(_) => {
+                return Err(Error::InternalError(
+                    "Received blobs_by_range response after PeerDAS".to_string(),
+                ))
+            }
+        }
+
+        self.continue_requests(cx)
+    }
+
+    pub fn on_custody_by_range_result(
+        &mut self,
+        id: CustodyByRangeRequestId,
+        data: Vec<Arc<DataColumnSidecar<T::EthSpec>>>,
+        peers: PeerGroup,
+        cx: &mut SyncNetworkContext<T>,
+    ) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
+        match &mut self.state {
+            State::Base { .. } | State::DenebEnabled { .. } => {
+                return Err(Error::InternalError(
+                    "Received custody_by_range response before PeerDAS".to_string(),
+                ))
+            }
+            State::FuluEnabled(state) => match state {
+                FuluEnabledState::BlockRequest { .. } => {
+                    return Err(Error::InternalError(
+                        "Received custody_by_range expecting blocks_by_range".to_string(),
+                    ));
+                }
+                FuluEnabledState::CustodyRequest {
+                    custody_by_range_request,
+                    ..
+                } => {
+                    custody_by_range_request.finish(id, data, peers)?;
+                }
+            },
+        }
+
+        self.continue_requests(cx)
+    }
+
+    #[cfg(test)]
+    pub fn state_step(&self) -> BlockComponentsByRangeRequestStep {
+        match &self.state {
+            State::Base { .. } => BlockComponentsByRangeRequestStep::BlocksRequest,
+            State::DenebEnabled { .. } => BlockComponentsByRangeRequestStep::BlocksRequest,
+            State::FuluEnabled(state) => match state {
+                FuluEnabledState::BlockRequest { .. } => {
+                    BlockComponentsByRangeRequestStep::BlocksRequest
+                }
+                FuluEnabledState::CustodyRequest { .. } => {
+                    BlockComponentsByRangeRequestStep::CustodyRequest
+                }
+            },
+        }
+    }
+}
+
+fn couple_blocks_base<E: EthSpec>(
+    blocks: Vec<Arc<SignedBeaconBlock<E>>>,
+    custody_columns_count: usize,
+) -> Vec<RpcBlock<E>> {
+    blocks
+        .into_iter()
+        .map(|block| RpcBlock::new_without_blobs(None, block, custody_columns_count))
+        .collect()
+}
+
+fn couple_blocks_deneb<E: EthSpec>(
+    blocks: Vec<Arc<SignedBeaconBlock<E>>>,
+    blobs: Vec<Arc<BlobSidecar<E>>>,
+    spec: &ChainSpec,
+) -> Result<Vec<RpcBlock<E>>, Error> {
+    let mut blobs_by_block = HashMap::<Hash256, Vec<Arc<BlobSidecar<E>>>>::new();
+    for blob in blobs {
+        let block_root = blob.block_root();
+        blobs_by_block.entry(block_root).or_default().push(blob);
+    }
+
+    // Now collect all blobs that match to the block by block root. BlobsByRange request checks
+    // the inclusion proof so we know that the commitment is the expected.
+    //
+    // BlobsByRange request handler ensures that we don't receive more blobs than possible.
+    // If the peer serving the request sends us blobs that don't pair well we'll send to the
+    // processor blocks without expected blobs, resulting in a downscoring event. A serving peer
+    // could serve fake blobs for blocks that don't have data, but it would gain nothing by it
+    // wasting theirs and our bandwidth 1:1. Therefore blobs that don't pair well are just ignored.
+    //
+    // RpcBlock::new ensures that the count of blobs is consistent with the block
+    blocks
+        .into_iter()
+        .map(|block| {
+            let block_root = get_block_root(&block);
+            let max_blobs_per_block = spec.max_blobs_per_block(block.epoch()) as usize;
+            let blobs = blobs_by_block.remove(&block_root).unwrap_or_default();
+            // BlobsByRange request handler enforces that blobs are sorted by index
+            let blobs = RuntimeVariableList::new(blobs, max_blobs_per_block).map_err(|_| {
+                Error::InternalError("Blobs returned exceeds max length".to_string())
+            })?;
+            Ok(RpcBlock::new(Some(block_root), block, Some(blobs))
+                .expect("TODO: don't do matching here"))
+        })
+        .collect::<Result<Vec<RpcBlock<E>>, Error>>()
+}
+
+fn couple_blocks_fulu<E: EthSpec>(
+    blocks: Vec<Arc<SignedBeaconBlock<E>>>,
+    data_columns: Vec<Arc<DataColumnSidecar<E>>>,
+    custody_column_indices: Vec<ColumnIndex>,
+    spec: &ChainSpec,
+) -> Result<Vec<RpcBlock<E>>, Error> {
+    // Group data columns by block_root and index
+    let mut custody_columns_by_block = HashMap::<Hash256, Vec<CustodyDataColumn<E>>>::new();
+
+    for column in data_columns {
+        let block_root = column.block_root();
+
+        if custody_column_indices.contains(&column.index) {
+            custody_columns_by_block
+                .entry(block_root)
+                .or_default()
+                // Safe to convert to `CustodyDataColumn`: we have asserted that the index of
+                // this column is in the set of `expects_custody_columns` and with the expected
+                // block root, so for the expected epoch of this batch.
+                .push(CustodyDataColumn::from_asserted_custody(column));
+        }
+    }
+
+    // Now iterate all blocks ensuring that the block roots of each block and data column match,
+    blocks
+        .into_iter()
+        .map(|block| {
+            let block_root = get_block_root(&block);
+            let data_columns_with_block_root = custody_columns_by_block
+                // Remove to only use columns once
+                .remove(&block_root)
+                .unwrap_or_default();
+
+            // TODO(das): Change RpcBlock to holding a Vec of DataColumnSidecars so we don't need
+            // the spec here.
+            RpcBlock::new_with_custody_columns(
+                Some(block_root),
+                block,
+                data_columns_with_block_root,
+                custody_column_indices.clone(),
+                spec,
+            )
+            .map_err(Error::InternalError)
+        })
+        .collect::<Result<Vec<_>, _>>()
+}
+
+impl<I: PartialEq + std::fmt::Display, T, P> ByRangeRequest<I, T, P> {
+    fn finish(&mut self, id: I, data: T, peer_id: P) -> Result<(), Error> {
+        match self {
+            Self::Active(expected_id) => {
+                if expected_id != &id {
+                    return Err(Error::InternalError(format!(
+                        "unexpected req_id expected {expected_id} got {id}"
+                    )));
+                }
+                *self = Self::Complete(data, peer_id);
+                Ok(())
+            }
+            Self::Complete(_, _) => Err(Error::InternalError(format!(
+                "request already complete {id}"
+            ))),
+        }
+    }
+
+    fn to_finished(&self) -> Option<(&T, &P)> {
+        match self {
+            Self::Active(_) => None,
+            Self::Complete(data, peer_id) => Some((data, peer_id)),
+        }
+    }
+}
--- a/beacon_node/network/src/sync/network_context/custody_by_range.rs
+++ b/beacon_node/network/src/sync/network_context/custody_by_range.rs
@@ -0,0 +1,481 @@
+use super::custody_by_root::{ColumnRequest, Error};
+use crate::sync::network_context::RpcResponseError;
+use beacon_chain::validator_monitor::timestamp_now;
+use beacon_chain::BeaconChainTypes;
+use fnv::FnvHashMap;
+use lighthouse_network::rpc::methods::DataColumnsByRangeRequest;
+use lighthouse_network::service::api_types::{
+    CustodyByRangeRequestId, DataColumnsByRangeRequestId,
+};
+use lighthouse_network::{PeerAction, PeerId};
+use lru_cache::LRUTimeCache;
+use parking_lot::RwLock;
+use rand::Rng;
+use std::collections::HashSet;
+use std::time::{Duration, Instant};
+use std::{collections::HashMap, marker::PhantomData, sync::Arc};
+use tracing::{debug, warn};
+use types::{
+    data_column_sidecar::ColumnIndex, DataColumnSidecar, Epoch, EthSpec, Hash256,
+    SignedBeaconBlockHeader, Slot,
+};
+
+use super::{PeerGroup, RpcResponseResult, SyncNetworkContext};
+
+const TEMPORARY_FAULT_EXPIRY_SECONDS: u64 = 15;
+const REQUEST_EXPIRY_SECONDS: u64 = 300;
+
+type DataColumnSidecarList<E> = Vec<Arc<DataColumnSidecar<E>>>;
+
+pub struct ActiveCustodyByRangeRequest<T: BeaconChainTypes> {
+    start_time: Instant,
+    id: CustodyByRangeRequestId,
+    // TODO(das): Pass a better type for the by_range request
+    epoch: Epoch,
+    /// Blocks that we expect peers to serve data columns for
+    blocks_with_data: Vec<SignedBeaconBlockHeader>,
+    /// List of column indices this request needs to download to complete successfully
+    column_requests: FnvHashMap<
+        ColumnIndex,
+        ColumnRequest<DataColumnsByRangeRequestId, DataColumnSidecarList<T::EthSpec>>,
+    >,
+    /// Active requests for 1 or more columns each
+    active_batch_columns_requests:
+        FnvHashMap<DataColumnsByRangeRequestId, ActiveBatchColumnsRequest>,
+    /// Peers that have recently failed to successfully respond to a columns by root request.
+    /// Having a LRUTimeCache allows this request to not have to track disconnecting peers.
+    peers_with_custody_failures: LRUTimeCache<PeerId>,
+    peers_with_temporary_faults: LRUTimeCache<PeerId>,
+    // TODO(das): does this HashSet has an OOM risk? We should either: make sure that this request
+    // structs are dropped after some time, that disconnected peers are pruned (but we may want to
+    // retain faulty information if they just disconnect and reconnect) or make this an LRUTimeCache
+    // with a long time (like 5 minutes).
+    peers_with_permanent_faults: HashSet<PeerId>,
+    /// Set of peers that claim to have imported this block and their custody columns
+    lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
+
+    _phantom: PhantomData<T>,
+}
+
+struct ActiveBatchColumnsRequest {
+    indices: Vec<ColumnIndex>,
+}
+
+pub type CustodyByRangeRequestResult<E> =
+    Result<Option<(DataColumnSidecarList<E>, PeerGroup, Duration)>, Error>;
+
+enum ColumnResponseError {
+    NonMatchingColumn {
+        slot: Slot,
+        actual_block_root: Hash256,
+        expected_block_root: Hash256,
+    },
+    MissingColumn(Slot),
+}
+
+impl<T: BeaconChainTypes> ActiveCustodyByRangeRequest<T> {
+    pub(crate) fn new(
+        id: CustodyByRangeRequestId,
+        epoch: Epoch,
+        blocks_with_data: Vec<SignedBeaconBlockHeader>,
+        column_indices: &[ColumnIndex],
+        lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
+    ) -> Self {
+        Self {
+            start_time: Instant::now(),
+            id,
+            epoch,
+            blocks_with_data,
+            column_requests: HashMap::from_iter(
+                column_indices
+                    .iter()
+                    .map(|index| (*index, ColumnRequest::new())),
+            ),
+            active_batch_columns_requests: <_>::default(),
+            peers_with_custody_failures: LRUTimeCache::new(Duration::from_secs(
+                TEMPORARY_FAULT_EXPIRY_SECONDS,
+            )),
+            peers_with_temporary_faults: LRUTimeCache::new(Duration::from_secs(
+                TEMPORARY_FAULT_EXPIRY_SECONDS,
+            )),
+            peers_with_permanent_faults: HashSet::new(),
+            lookup_peers,
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Insert a downloaded column into an active custody request. Then make progress on the
+    /// entire request.
+    ///
+    /// ### Returns
+    ///
+    /// - `Err`: Custody request has failed and will be dropped
+    /// - `Ok(Some)`: Custody request has successfully completed and will be dropped
+    /// - `Ok(None)`: Custody request still active
+    pub(crate) fn on_data_column_downloaded(
+        &mut self,
+        peer_id: PeerId,
+        req_id: DataColumnsByRangeRequestId,
+        resp: RpcResponseResult<DataColumnSidecarList<T::EthSpec>>,
+        cx: &mut SyncNetworkContext<T>,
+    ) -> CustodyByRangeRequestResult<T::EthSpec> {
+        let Some(batch_request) = self.active_batch_columns_requests.get_mut(&req_id) else {
+            warn!(
+                id = %self.id,
+                %req_id,
+                "Received custody by range response for unrequested index"
+            );
+            return Ok(None);
+        };
+
+        match resp {
+            Ok((data_columns, seen_timestamp)) => {
+                // Map columns by index as an optimization to not loop the returned list on each
+                // requested index. The worse case is 128 loops over a 128 item vec + mutation to
+                // drop the consumed columns.
+                let mut data_columns_by_index =
+                    HashMap::<(ColumnIndex, Slot), Arc<DataColumnSidecar<T::EthSpec>>>::new();
+                for data_column in data_columns {
+                    data_columns_by_index
+                        .insert((data_column.index, data_column.slot()), data_column);
+                }
+
+                // Accumulate columns that the peer does not have to issue a single log per request
+                let mut missing_column_indexes = vec![];
+                let mut incorrect_column_indices = vec![];
+                let mut imported_column_indices = vec![];
+
+                for index in &batch_request.indices {
+                    let column_request =
+                        self.column_requests
+                            .get_mut(index)
+                            .ok_or(Error::InternalError(format!(
+                                "unknown column_index {index}"
+                            )))?;
+
+                    let columns_at_index = self
+                        .blocks_with_data
+                        .iter()
+                        .map(|block| {
+                            let slot = block.message.slot;
+                            if let Some(data_column) = data_columns_by_index.remove(&(*index, slot))
+                            {
+                                let actual_block_root =
+                                    data_column.signed_block_header.message.canonical_root();
+                                let expected_block_root = block.message.canonical_root();
+                                if actual_block_root != expected_block_root {
+                                    Err(ColumnResponseError::NonMatchingColumn {
+                                        slot,
+                                        actual_block_root: data_column
+                                            .signed_block_header
+                                            .message
+                                            .canonical_root(),
+                                        expected_block_root: block.message.canonical_root(),
+                                    })
+                                } else {
+                                    Ok(data_column)
+                                }
+                            } else {
+                                // The following three statements are true:
+                                // - block at `slot` is not missed, and has data
+                                // - peer custodies this column `index`
+                                // - peer claims to be synced to at least `slot`
+                                //
+                                // Therefore not returning this column is an protocol violation that we
+                                // penalize and mark the peer as failed to retry with another peer.
+                                //
+                                // TODO(das) do not consider this case a success. We know for sure the block has
+                                // data. However we allow the peer to return empty as we can't attribute fault.
+                                // TODO(das): Should track which columns are missing and eventually give up
+                                // TODO(das): If the peer is in the lookup peer set it claims to have imported
+                                // the block AND its custody columns. So in this case we can downscore
+                                Err(ColumnResponseError::MissingColumn(slot))
+                            }
+                        })
+                        .collect::<Result<Vec<_>, _>>();
+
+                    match columns_at_index {
+                        Ok(columns_at_index) => {
+                            column_request.on_download_success(
+                                req_id,
+                                peer_id,
+                                columns_at_index,
+                                seen_timestamp,
+                            )?;
+
+                            imported_column_indices.push(index);
+                        }
+                        Err(e) => {
+                            column_request.on_download_error(req_id)?;
+
+                            match e {
+                                ColumnResponseError::NonMatchingColumn {
+                                    slot,
+                                    actual_block_root,
+                                    expected_block_root,
+                                } => {
+                                    incorrect_column_indices.push((
+                                        index,
+                                        slot,
+                                        actual_block_root,
+                                        expected_block_root,
+                                    ));
+                                }
+                                ColumnResponseError::MissingColumn(slot) => {
+                                    missing_column_indexes.push((index, slot));
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // Log missing_column_indexes and incorrect_column_indices here in batch per request
+                // to make this logs more compact and less noisy.
+                if !imported_column_indices.is_empty() {
+                    // TODO(das): this log may be redundant. We already log on DataColumnsByRange
+                    // completed, and on DataColumnsByRange sent we log the column indices
+                    // ```
+                    // Sync RPC request sent method="DataColumnsByRange" slots=8 epoch=4 columns=[52] peer=16Uiu2HAmEooeoHzHDYS35TSHrJDSfmREecPyFskrLPYm9Gm1EURj id=493/399/10/RangeSync/4/1
+                    // Sync RPC request completed id=493/399/10/RangeSync/4/1 method="DataColumnsByRange" count=1
+                    // ```
+                    // Which can be traced to this custody by range request, and the initial log
+                    debug!(
+                        id = %self.id,
+                        data_columns_by_range_req_id = %req_id,
+                        %peer_id,
+                        count = imported_column_indices.len(),
+                        "Custody by range request download imported columns"
+                    );
+                }
+
+                if !incorrect_column_indices.is_empty() {
+                    // Note: Batch logging that columns are missing to not spam logger
+                    debug!(
+                        id = %self.id,
+                        data_columns_by_range_req_id = %req_id,
+                        %peer_id,
+                        // TODO(das): this property can become very noisy, being the full range 0..128
+                        incorrect_columns = ?incorrect_column_indices,
+                        "Custody by range peer returned non-matching columns"
+                    );
+
+                    // Returning a non-canonical column is not a permanent fault. We should not
+                    // retry the peer for some time but the peer may return a canonical column in
+                    // the future.
+                    // TODO(das): if this finalized sync the fault is permanent
+                    self.peers_with_temporary_faults.insert(peer_id);
+                    cx.report_peer(
+                        peer_id,
+                        PeerAction::MidToleranceError,
+                        "non-matching data column",
+                    );
+                }
+
+                if !missing_column_indexes.is_empty() {
+                    // Note: Batch logging that columns are missing to not spam logger
+                    debug!(
+                        id = %self.id,
+                        data_columns_by_range_req_id = %req_id,
+                        %peer_id,
+                        // TODO(das): this property can become very noisy, being the full range 0..128
+                        ?missing_column_indexes,
+                        "Custody by range peer claims to not have some data"
+                    );
+
+                    // Not having columns is not a permanent fault. The peer may be backfilling.
+                    self.peers_with_custody_failures.insert(peer_id);
+                    cx.report_peer(peer_id, PeerAction::MidToleranceError, "custody_failure");
+                }
+            }
+            Err(err) => {
+                debug!(
+                    id = %self.id,
+                    %req_id,
+                    %peer_id,
+                    error = ?err,
+                    "Custody by range download error"
+                );
+
+                // TODO(das): Should mark peer as failed and try from another peer
+                for column_index in &batch_request.indices {
+                    self.column_requests
+                        .get_mut(column_index)
+                        .ok_or(Error::InternalError("unknown column_index".to_owned()))?
+                        .on_download_error_and_mark_failure(req_id, err.clone())?;
+                }
+
+                match err {
+                    // Verify errors are correctness errors against our request or about the
+                    // returned data itself. This peer is faulty or malicious, should not be
+                    // retried.
+                    RpcResponseError::VerifyError(_) => {
+                        self.peers_with_permanent_faults.insert(peer_id);
+                    }
+                    // Network errors are not permanent faults and worth retrying
+                    RpcResponseError::RpcError(_) => {
+                        self.peers_with_temporary_faults.insert(peer_id);
+                    }
+                    // Do nothing for internal errors
+                    RpcResponseError::InternalError(_) => {}
+                    // unreachable
+                    RpcResponseError::RequestExpired(_) => {}
+                }
+            }
+        };
+
+        self.continue_requests(cx)
+    }
+
+    pub(crate) fn continue_requests(
+        &mut self,
+        cx: &mut SyncNetworkContext<T>,
+    ) -> CustodyByRangeRequestResult<T::EthSpec> {
+        if self.column_requests.values().all(|r| r.is_downloaded()) {
+            // All requests have completed successfully.
+            let mut peers = HashMap::<PeerId, Vec<usize>>::new();
+            let mut seen_timestamps = vec![];
+            let columns = std::mem::take(&mut self.column_requests)
+                .into_values()
+                .map(|request| {
+                    let (peer, data_columns, seen_timestamp) = request.complete()?;
+
+                    for data_column in &data_columns {
+                        let columns_by_peer = peers.entry(peer).or_default();
+                        if !columns_by_peer.contains(&(data_column.index as usize)) {
+                            columns_by_peer.push(data_column.index as usize);
+                        }
+                    }
+
+                    seen_timestamps.push(seen_timestamp);
+
+                    Ok(data_columns)
+                })
+                .collect::<Result<Vec<_>, _>>()?
+                // Flatten Vec<Vec<Columns>> to Vec<Columns>
+                // TODO(das): maybe not optimal for the coupling logic later
+                .into_iter()
+                .flatten()
+                .collect();
+
+            let peer_group = PeerGroup::from_set(peers);
+            let max_seen_timestamp = seen_timestamps.into_iter().max().unwrap_or(timestamp_now());
+            return Ok(Some((columns, peer_group, max_seen_timestamp)));
+        }
+
+        let active_request_count_by_peer = cx.active_request_count_by_peer();
+        let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
+        let lookup_peers = self.lookup_peers.read();
+
+        // Need to:
+        // - track how many active requests a peer has for load balancing
+        // - which peers have failures to attempt others
+        // - which peer returned what to have PeerGroup attributability
+
+        for (column_index, request) in self.column_requests.iter_mut() {
+            if request.is_awaiting_download() {
+                if let Some(last_error) = request.too_many_failures() {
+                    return Err(Error::TooManyDownloadErrors(last_error));
+                }
+
+                // TODO(das): When is a fork and only a subset of your peers know about a block, we should
+                // only query the peers on that fork. Should this case be handled? How to handle it?
+                let custodial_peers = cx.get_custodial_peers(*column_index);
+
+                // We draw from the total set of peers, but prioritize those peers who we have
+                // received an attestation / status / block message claiming to have imported the
+                // lookup. The frequency of those messages is low, so drawing only from lookup_peers
+                // could cause many lookups to take much longer or fail as they don't have enough
+                // custody peers on a given column
+                let mut priorized_peers = custodial_peers
+                    .iter()
+                    .filter(|peer| {
+                        // Never request again peers with permanent faults
+                        // Do not request peers with custody failures for some time
+                        !self.peers_with_permanent_faults.contains(peer)
+                            && !self.peers_with_custody_failures.contains(peer)
+                    })
+                    .map(|peer| {
+                        (
+                            // Prioritize peers that claim to know have imported this block
+                            if lookup_peers.contains(peer) { 0 } else { 1 },
+                            // De-prioritize peers that have failed to successfully respond to
+                            // requests recently, but allow to immediatelly request them again
+                            self.peers_with_temporary_faults.contains(peer),
+                            // Prefer peers with fewer requests to load balance across peers.
+                            // We batch requests to the same peer, so count existence in the
+                            // `columns_to_request_by_peer` as a single 1 request.
+                            active_request_count_by_peer.get(peer).copied().unwrap_or(0)
+                                + columns_to_request_by_peer.get(peer).map(|_| 1).unwrap_or(0),
+                            // Random factor to break ties, otherwise the PeerID breaks ties
+                            rand::thread_rng().gen::<u32>(),
+                            *peer,
+                        )
+                    })
+                    .collect::<Vec<_>>();
+                priorized_peers.sort_unstable();
+
+                if let Some((_, _, _, _, peer_id)) = priorized_peers.first() {
+                    columns_to_request_by_peer
+                        .entry(*peer_id)
+                        .or_default()
+                        .push(*column_index);
+                } else {
+                    // Do not issue requests if there is no custody peer on this column. The request
+                    // will sit idle without making progress. The only way to make to progress is:
+                    // - Add a new peer that custodies the missing columns
+                    // - Call `continue_requests`
+                    //
+                    // Otherwise this request should be dropped and failed after some time.
+                    // TODO(das): implement the above
+                }
+            }
+        }
+
+        for (peer_id, indices) in columns_to_request_by_peer.into_iter() {
+            let req_id = cx
+                .send_data_columns_by_range_request(
+                    peer_id,
+                    DataColumnsByRangeRequest {
+                        // TODO(das): generalize with constants from batch
+                        start_slot: self
+                            .epoch
+                            .start_slot(T::EthSpec::slots_per_epoch())
+                            .as_u64(),
+                        count: T::EthSpec::slots_per_epoch(),
+                        columns: indices.clone(),
+                    },
+                    self.id,
+                )
+                .map_err(|e| Error::InternalError(format!("send failed {e}")))?;
+
+            for column_index in &indices {
+                let column_request = self
+                    .column_requests
+                    .get_mut(column_index)
+                    // Should never happen: column_index is iterated from column_requests
+                    .ok_or(Error::InternalError(format!(
+                        "Unknown column_request {column_index}"
+                    )))?;
+
+                column_request.on_download_start(req_id)?;
+            }
+
+            self.active_batch_columns_requests
+                .insert(req_id, ActiveBatchColumnsRequest { indices });
+        }
+
+        if self.start_time.elapsed() > Duration::from_secs(REQUEST_EXPIRY_SECONDS)
+            && !self.column_requests.values().any(|r| r.is_downloading())
+        {
+            let awaiting_peers_indicies = self
+                .column_requests
+                .iter()
+                .filter(|(_, r)| r.is_awaiting_download())
+                .map(|(id, _)| *id)
+                .collect::<Vec<_>>();
+            return Err(Error::ExpiredNoCustodyPeers(awaiting_peers_indicies));
+        }
+
+        Ok(None)
+    }
+}
--- a/beacon_node/network/src/sync/network_context/custody_by_root.rs
+++ b/beacon_node/network/src/sync/network_context/custody_by_root.rs
@@ -1,5 +1,6 @@
 use crate::sync::network_context::{
-    DataColumnsByRootRequestId, DataColumnsByRootSingleBlockRequest,
+    DataColumnsByRootRequestId, DataColumnsByRootSingleBlockRequest, RpcRequestSendError,
+    RpcResponseError,
 };
 use beacon_chain::validator_monitor::timestamp_now;
 use beacon_chain::BeaconChainTypes;
@@ -12,22 +13,29 @@ use rand::Rng;
 use std::collections::HashSet;
 use std::time::{Duration, Instant};
 use std::{collections::HashMap, marker::PhantomData, sync::Arc};
+use strum::IntoStaticStr;
 use tracing::{debug, warn};
-use types::EthSpec;
 use types::{data_column_sidecar::ColumnIndex, DataColumnSidecar, Hash256};

 use super::{LookupRequestResult, PeerGroup, RpcResponseResult, SyncNetworkContext};

 const FAILED_PEERS_CACHE_EXPIRY_SECONDS: u64 = 5;
-const MAX_STALE_NO_PEERS_DURATION: Duration = Duration::from_secs(30);
+const REQUEST_EXPIRY_SECONDS: u64 = 300;
+/// TODO(das): this attempt count is nested into the existing lookup request count.
+const MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS: usize = 3;

 type DataColumnSidecarList<E> = Vec<Arc<DataColumnSidecar<E>>>;

-pub struct ActiveCustodyRequest<T: BeaconChainTypes> {
+pub struct ActiveCustodyByRootRequest<T: BeaconChainTypes> {
+    start_time: Instant,
    block_root: Hash256,
    custody_id: CustodyId,
    /// List of column indices this request needs to download to complete successfully
-    column_requests: FnvHashMap<ColumnIndex, ColumnRequest<T::EthSpec>>,
+    #[allow(clippy::type_complexity)]
+    column_requests: FnvHashMap<
+        ColumnIndex,
+        ColumnRequest<DataColumnsByRootRequestId, Arc<DataColumnSidecar<T::EthSpec>>>,
+    >,
    /// Active requests for 1 or more columns each
    active_batch_columns_requests:
        FnvHashMap<DataColumnsByRootRequestId, ActiveBatchColumnsRequest>,
@@ -40,29 +48,47 @@ pub struct ActiveCustodyRequest<T: BeaconChainTypes> {
    _phantom: PhantomData<T>,
 }

-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug)]
 pub enum Error {
-    SendFailed(&'static str),
-    TooManyFailures,
-    BadState(String),
-    NoPeer(ColumnIndex),
-    /// Received a download result for a different request id than the in-flight request.
-    /// There should only exist a single request at a time. Having multiple requests is a bug and
-    /// can result in undefined state, so it's treated as a hard error and the lookup is dropped.
-    UnexpectedRequestId {
-        expected_req_id: DataColumnsByRootRequestId,
-        req_id: DataColumnsByRootRequestId,
-    },
+    InternalError(String),
+    TooManyDownloadErrors(RpcResponseError),
+    ExpiredNoCustodyPeers(Vec<ColumnIndex>),
+}
+
+impl From<Error> for RpcResponseError {
+    fn from(e: Error) -> Self {
+        match e {
+            Error::InternalError(e) => RpcResponseError::InternalError(e),
+            Error::TooManyDownloadErrors(e) => e,
+            Error::ExpiredNoCustodyPeers(indices) => RpcResponseError::RequestExpired(format!(
+                "Expired waiting for custody peers {indices:?}"
+            )),
+        }
+    }
+}
+
+impl From<Error> for RpcRequestSendError {
+    fn from(e: Error) -> Self {
+        match e {
+            Error::TooManyDownloadErrors(_) => {
+                RpcRequestSendError::InternalError("Download error in request send".to_string())
+            }
+            Error::InternalError(e) => RpcRequestSendError::InternalError(e),
+            Error::ExpiredNoCustodyPeers(_) => RpcRequestSendError::InternalError(
+                "Request can not expire when requesting it".to_string(),
+            ),
+        }
+    }
 }

 struct ActiveBatchColumnsRequest {
    indices: Vec<ColumnIndex>,
 }

-pub type CustodyRequestResult<E> =
+pub type CustodyByRootRequestResult<E> =
    Result<Option<(DataColumnSidecarList<E>, PeerGroup, Duration)>, Error>;

-impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
+impl<T: BeaconChainTypes> ActiveCustodyByRootRequest<T> {
    pub(crate) fn new(
        block_root: Hash256,
        custody_id: CustodyId,
@@ -70,6 +96,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
        lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
    ) -> Self {
        Self {
+            start_time: Instant::now(),
            block_root,
            custody_id,
            column_requests: HashMap::from_iter(
@@ -98,7 +125,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
        req_id: DataColumnsByRootRequestId,
        resp: RpcResponseResult<DataColumnSidecarList<T::EthSpec>>,
        cx: &mut SyncNetworkContext<T>,
-    ) -> CustodyRequestResult<T::EthSpec> {
+    ) -> CustodyByRootRequestResult<T::EthSpec> {
        let Some(batch_request) = self.active_batch_columns_requests.get_mut(&req_id) else {
            warn!(
                block_root = ?self.block_root,
@@ -131,7 +158,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
                    let column_request = self
                        .column_requests
                        .get_mut(column_index)
-                        .ok_or(Error::BadState("unknown column_index".to_owned()))?;
+                        .ok_or(Error::InternalError("unknown column_index".to_owned()))?;

                    if let Some(data_column) = data_columns.remove(column_index) {
                        column_request.on_download_success(
@@ -182,8 +209,8 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
                for column_index in &batch_request.indices {
                    self.column_requests
                        .get_mut(column_index)
-                        .ok_or(Error::BadState("unknown column_index".to_owned()))?
-                        .on_download_error_and_mark_failure(req_id)?;
+                        .ok_or(Error::InternalError("unknown column_index".to_owned()))?
+                        .on_download_error_and_mark_failure(req_id, err.clone())?;
                }

                self.failed_peers.insert(peer_id);
@@ -196,7 +223,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
    pub(crate) fn continue_requests(
        &mut self,
        cx: &mut SyncNetworkContext<T>,
-    ) -> CustodyRequestResult<T::EthSpec> {
+    ) -> CustodyByRootRequestResult<T::EthSpec> {
        if self.column_requests.values().all(|r| r.is_downloaded()) {
            // All requests have completed successfully.
            let mut peers = HashMap::<PeerId, Vec<usize>>::new();
@@ -222,6 +249,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
        let active_request_count_by_peer = cx.active_request_count_by_peer();
        let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
        let lookup_peers = self.lookup_peers.read();
+        let mut indices_without_peers = vec![];

        // Need to:
        // - track how many active requests a peer has for load balancing
@@ -229,9 +257,9 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
        // - which peer returned what to have PeerGroup attributability

        for (column_index, request) in self.column_requests.iter_mut() {
-            if let Some(wait_duration) = request.is_awaiting_download() {
-                if request.download_failures > MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS {
-                    return Err(Error::TooManyFailures);
+            if request.is_awaiting_download() {
+                if let Some(last_error) = request.too_many_failures() {
+                    return Err(Error::TooManyDownloadErrors(last_error));
                }

                // TODO(das): When is a fork and only a subset of your peers know about a block, we should
@@ -270,21 +298,22 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
                        .entry(*peer_id)
                        .or_default()
                        .push(*column_index);
-                } else if wait_duration > MAX_STALE_NO_PEERS_DURATION {
-                    // Allow to request to sit stale in `NotStarted` state for at most
-                    // `MAX_STALE_NO_PEERS_DURATION`, else error and drop the request. Note that
-                    // lookup will naturally retry when other peers send us attestations for
-                    // descendants of this un-available lookup.
-                    return Err(Error::NoPeer(*column_index));
                } else {
-                    // Do not issue requests if there is no custody peer on this column
+                    // Do not issue requests if there is no custody peer on this column. The request
+                    // will sit idle without making progress. The only way to make to progress is:
+                    // - Add a new peer that custodies the missing columns
+                    // - Call `continue_requests`
+                    //
+                    // Otherwise this request should be dropped and failed after some time.
+                    // TODO(das): implement the above
+                    indices_without_peers.push(column_index);
                }
            }
        }

        for (peer_id, indices) in columns_to_request_by_peer.into_iter() {
            let request_result = cx
-                .data_column_lookup_request(
+                .data_columns_by_root_request(
                    DataColumnsByRootRequester::Custody(self.custody_id),
                    peer_id,
                    DataColumnsByRootSingleBlockRequest {
@@ -297,7 +326,9 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
                    // columns. For the rest of peers, don't downscore if columns are missing.
                    lookup_peers.contains(&peer_id),
                )
-                .map_err(Error::SendFailed)?;
+                .map_err(|e| {
+                    Error::InternalError(format!("Send failed data_columns_by_root {e:?}"))
+                })?;

            match request_result {
                LookupRequestResult::RequestSent(req_id) => {
@@ -306,7 +337,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
                            .column_requests
                            .get_mut(column_index)
                            // Should never happen: column_index is iterated from column_requests
-                            .ok_or(Error::BadState("unknown column_index".to_owned()))?;
+                            .ok_or(Error::InternalError("unknown column_index".to_owned()))?;

                        column_request.on_download_start(req_id)?;
                    }
@@ -319,117 +350,149 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
            }
        }

+        if self.start_time.elapsed() > Duration::from_secs(REQUEST_EXPIRY_SECONDS)
+            && !self.column_requests.values().any(|r| r.is_downloading())
+        {
+            let awaiting_peers_indicies = self
+                .column_requests
+                .iter()
+                .filter(|(_, r)| r.is_awaiting_download())
+                .map(|(id, _)| *id)
+                .collect::<Vec<_>>();
+            return Err(Error::ExpiredNoCustodyPeers(awaiting_peers_indicies));
+        }
+
        Ok(None)
    }
 }

-/// TODO(das): this attempt count is nested into the existing lookup request count.
-const MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS: usize = 3;
-
-struct ColumnRequest<E: EthSpec> {
-    status: Status<E>,
-    download_failures: usize,
+pub struct ColumnRequest<I: std::fmt::Display + PartialEq, T> {
+    status: Status<I, T>,
+    download_failures: Vec<RpcResponseError>,
 }

-#[derive(Debug, Clone)]
-enum Status<E: EthSpec> {
-    NotStarted(Instant),
-    Downloading(DataColumnsByRootRequestId),
-    Downloaded(PeerId, Arc<DataColumnSidecar<E>>, Duration),
+#[derive(Debug, Clone, IntoStaticStr)]
+pub enum Status<I, T> {
+    NotStarted,
+    Downloading(I),
+    Downloaded(PeerId, T, Duration),
 }

-impl<E: EthSpec> ColumnRequest<E> {
-    fn new() -> Self {
+impl<I: std::fmt::Display + PartialEq, T> ColumnRequest<I, T> {
+    pub fn new() -> Self {
        Self {
-            status: Status::NotStarted(Instant::now()),
-            download_failures: 0,
+            status: Status::NotStarted,
+            download_failures: vec![],
        }
    }

-    fn is_awaiting_download(&self) -> Option<Duration> {
+    pub fn is_awaiting_download(&self) -> bool {
        match self.status {
-            Status::NotStarted(start_time) => Some(start_time.elapsed()),
-            Status::Downloading { .. } | Status::Downloaded { .. } => None,
+            Status::NotStarted => true,
+            Status::Downloading { .. } | Status::Downloaded { .. } => false,
        }
    }

-    fn is_downloaded(&self) -> bool {
+    pub fn is_downloading(&self) -> bool {
        match self.status {
-            Status::NotStarted { .. } | Status::Downloading { .. } => false,
+            Status::NotStarted => false,
+            Status::Downloading { .. } => true,
+            Status::Downloaded { .. } => false,
+        }
+    }
+
+    pub fn is_downloaded(&self) -> bool {
+        match self.status {
+            Status::NotStarted | Status::Downloading { .. } => false,
            Status::Downloaded { .. } => true,
        }
    }

-    fn on_download_start(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> {
+    pub fn too_many_failures(&self) -> Option<RpcResponseError> {
+        if self.download_failures.len() > MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS {
+            Some(
+                self.download_failures
+                    .last()
+                    .cloned()
+                    .expect("download_failures is not empty"),
+            )
+        } else {
+            None
+        }
+    }
+
+    pub fn on_download_start(&mut self, req_id: I) -> Result<(), Error> {
        match &self.status {
-            Status::NotStarted { .. } => {
+            Status::NotStarted => {
                self.status = Status::Downloading(req_id);
                Ok(())
            }
-            other => Err(Error::BadState(format!(
-                "bad state on_download_start expected NotStarted got {other:?}"
+            other => Err(Error::InternalError(format!(
+                "bad state on_download_start expected NotStarted got {}",
+                Into::<&'static str>::into(other),
            ))),
        }
    }

-    fn on_download_error(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> {
+    pub fn on_download_error(&mut self, req_id: I) -> Result<(), Error> {
        match &self.status {
            Status::Downloading(expected_req_id) => {
                if req_id != *expected_req_id {
-                    return Err(Error::UnexpectedRequestId {
-                        expected_req_id: *expected_req_id,
-                        req_id,
-                    });
+                    return Err(Error::InternalError(format!(
+                        "Received download result for req_id {req_id} expecting {expected_req_id}"
+                    )));
                }
-                self.status = Status::NotStarted(Instant::now());
+                self.status = Status::NotStarted;
                Ok(())
            }
-            other => Err(Error::BadState(format!(
-                "bad state on_download_error expected Downloading got {other:?}"
+            other => Err(Error::InternalError(format!(
+                "bad state on_download_error expected Downloading got {}",
+                Into::<&'static str>::into(other),
            ))),
        }
    }

-    fn on_download_error_and_mark_failure(
+    pub fn on_download_error_and_mark_failure(
        &mut self,
-        req_id: DataColumnsByRootRequestId,
+        req_id: I,
+        e: RpcResponseError,
    ) -> Result<(), Error> {
-        // TODO(das): Should track which peers don't have data
-        self.download_failures += 1;
+        self.download_failures.push(e);
        self.on_download_error(req_id)
    }

-    fn on_download_success(
+    pub fn on_download_success(
        &mut self,
-        req_id: DataColumnsByRootRequestId,
+        req_id: I,
        peer_id: PeerId,
-        data_column: Arc<DataColumnSidecar<E>>,
+        data_column: T,
        seen_timestamp: Duration,
    ) -> Result<(), Error> {
        match &self.status {
            Status::Downloading(expected_req_id) => {
                if req_id != *expected_req_id {
-                    return Err(Error::UnexpectedRequestId {
-                        expected_req_id: *expected_req_id,
-                        req_id,
-                    });
+                    return Err(Error::InternalError(format!(
+                        "Received download result for req_id {req_id} expecting {expected_req_id}"
+                    )));
                }
                self.status = Status::Downloaded(peer_id, data_column, seen_timestamp);
                Ok(())
            }
-            other => Err(Error::BadState(format!(
-                "bad state on_download_success expected Downloading got {other:?}"
+            other => Err(Error::InternalError(format!(
+                "bad state on_download_success expected Downloading got {}",
+                Into::<&'static str>::into(other),
            ))),
        }
    }

-    fn complete(self) -> Result<(PeerId, Arc<DataColumnSidecar<E>>, Duration), Error> {
+    pub fn complete(self) -> Result<(PeerId, T, Duration), Error> {
        match self.status {
            Status::Downloaded(peer_id, data_column, seen_timestamp) => {
                Ok((peer_id, data_column, seen_timestamp))
            }
-            other => Err(Error::BadState(format!(
-                "bad state complete expected Downloaded got {other:?}"
+            other => Err(Error::InternalError(format!(
+                "bad state complete expected Downloaded got {}",
+                Into::<&'static str>::into(other),
            ))),
        }
    }
--- a/beacon_node/network/src/sync/network_context/requests.rs
+++ b/beacon_node/network/src/sync/network_context/requests.rs
@@ -26,7 +26,7 @@ mod blocks_by_root;
 mod data_columns_by_range;
 mod data_columns_by_root;

-#[derive(Debug, PartialEq, Eq, IntoStaticStr)]
+#[derive(Debug, Clone, PartialEq, Eq, IntoStaticStr)]
 pub enum LookupVerifyError {
    NotEnoughResponsesReturned {
        actual: usize,
@@ -177,12 +177,10 @@ impl<K: Eq + Hash, T: ActiveRequestItems> ActiveRequests<K, T> {
        }
    }

-    pub fn active_requests_of_peer(&self, peer_id: &PeerId) -> Vec<&K> {
+    pub fn active_requests(&self) -> impl Iterator<Item = (&K, &PeerId)> {
        self.requests
            .iter()
-            .filter(|(_, request)| &request.peer_id == peer_id)
-            .map(|(id, _)| id)
-            .collect()
+            .map(|(id, request)| (id, &request.peer_id))
    }

    pub fn iter_request_peers(&self) -> impl Iterator<Item = PeerId> + '_ {
--- a/beacon_node/network/src/sync/peer_sampling.rs
+++ b/beacon_node/network/src/sync/peer_sampling.rs
@@ -98,13 +98,13 @@ impl<T: BeaconChainTypes> Sampling<T> {
                // TODO(das): Should track failed sampling request for some time? Otherwise there's
                // a risk of a loop with multiple triggers creating the request, then failing,
                // and repeat.
-                debug!(?id, "Ignoring duplicate sampling request");
+                debug!(%id, "Ignoring duplicate sampling request");
                return None;
            }
        };

        debug!(
-            ?id,
+            %id,
            column_selection = ?request.column_selection(),
            "Created new sample request"
        );
@@ -138,7 +138,7 @@ impl<T: BeaconChainTypes> Sampling<T> {
    ) -> Option<(SamplingRequester, SamplingResult)> {
        let Some(request) = self.requests.get_mut(&id.id) else {
            // TOOD(das): This log can happen if the request is error'ed early and dropped
-            debug!(?id, "Sample downloaded event for unknown request");
+            debug!(%id, "Sample downloaded event for unknown request");
            return None;
        };

@@ -167,7 +167,7 @@ impl<T: BeaconChainTypes> Sampling<T> {
    ) -> Option<(SamplingRequester, SamplingResult)> {
        let Some(request) = self.requests.get_mut(&id.id) else {
            // TOOD(das): This log can happen if the request is error'ed early and dropped
-            debug!(?id, "Sample verified event for unknown request");
+            debug!(%id, "Sample verified event for unknown request");
            return None;
        };

@@ -191,7 +191,7 @@ impl<T: BeaconChainTypes> Sampling<T> {
    ) -> Option<(SamplingRequester, SamplingResult)> {
        let result = result.transpose();
        if let Some(result) = result {
-            debug!(?id, ?result, "Sampling request completed, removing");
+            debug!(%id, ?result, "Sampling request completed, removing");
            metrics::inc_counter_vec(
                &metrics::SAMPLING_REQUEST_RESULT,
                &[metrics::from_result(&result)],
@@ -570,7 +570,7 @@ impl<T: BeaconChainTypes> ActiveSamplingRequest<T> {
        // Send requests.
        let mut sent_request = false;
        for (peer_id, column_indexes) in column_indexes_to_request {
-            cx.data_column_lookup_request(
+            cx.data_columns_by_root_request(
                DataColumnsByRootRequester::Sampling(SamplingId {
                    id: self.requester_id,
                    sampling_request_id: self.current_sampling_request_id,
--- a/beacon_node/network/src/sync/range_sync/batch.rs
+++ b/beacon_node/network/src/sync/range_sync/batch.rs
@@ -1,4 +1,5 @@
 use beacon_chain::block_verification_types::RpcBlock;
+use itertools::Itertools;
 use lighthouse_network::rpc::methods::BlocksByRangeRequest;
 use lighthouse_network::service::api_types::Id;
 use lighthouse_network::PeerId;
@@ -17,15 +18,7 @@ const MAX_BATCH_DOWNLOAD_ATTEMPTS: u8 = 5;
 /// after `MAX_BATCH_PROCESSING_ATTEMPTS` times, it is considered faulty.
 const MAX_BATCH_PROCESSING_ATTEMPTS: u8 = 3;

-/// Type of expected batch.
-#[derive(Debug, Copy, Clone, Display)]
-#[strum(serialize_all = "snake_case")]
-pub enum ByRangeRequestType {
-    BlocksAndColumns,
-    BlocksAndBlobs,
-    Blocks,
-}
-
+// TODO(das): Consider merging with PeerGroup
 #[derive(Clone, Debug)]
 pub struct BatchPeers {
    block_peer: PeerId,
@@ -53,6 +46,12 @@ impl BatchPeers {
    pub fn column(&self, index: &ColumnIndex) -> Option<&PeerId> {
        self.column_peers.get(index)
    }
+
+    pub fn iter_unique_peers(&self) -> impl Iterator<Item = &PeerId> {
+        std::iter::once(&self.block_peer)
+            .chain(self.column_peers.values())
+            .unique()
+    }
 }

 /// Allows customisation of the above constants used in other sync methods such as BackFillSync.
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -10,7 +10,7 @@ use itertools::Itertools;
 use lighthouse_network::service::api_types::Id;
 use lighthouse_network::{PeerAction, PeerId};
 use logging::crit;
-use std::collections::{btree_map::Entry, BTreeMap, HashSet};
+use std::collections::{btree_map::Entry, BTreeMap, HashMap, HashSet};
 use strum::IntoStaticStr;
 use tracing::{debug, instrument, warn};
 use types::{Epoch, EthSpec, Hash256, Slot};
@@ -87,9 +87,11 @@ pub struct SyncingChain<T: BeaconChainTypes> {
    batches: BTreeMap<BatchId, BatchInfo<T::EthSpec>>,

    /// The peers that agree on the `target_head_slot` and `target_head_root` as a canonical chain
-    /// and thus available to download this chain from, as well as the batches we are currently
-    /// requesting.
-    peers: HashSet<PeerId>,
+    /// and thus available to download this chain from.
+    ///
+    /// Also, For each peer tracks the total requests done per peer as part of this SyncingChain
+    /// `HashMap<peer, total_requests_per_peer>`
+    peers: HashMap<PeerId, usize>,

    /// Starting epoch of the next batch that needs to be downloaded.
    to_be_downloaded: BatchId,
@@ -121,7 +123,40 @@ pub enum ChainSyncingState {
    Syncing,
 }

+#[cfg(test)]
+#[derive(Debug, Eq, PartialEq)]
+pub enum BatchStateSummary {
+    Downloading,
+    Processing,
+    AwaitingProcessing,
+    AwaitingValidation,
+    Unexpected(&'static str),
+}
+
 impl<T: BeaconChainTypes> SyncingChain<T> {
+    /// Returns a summary of batch states for assertions in tests.
+    #[cfg(test)]
+    pub fn batches_state(&self) -> Vec<(BatchId, BatchStateSummary)> {
+        self.batches
+            .iter()
+            .map(|(id, batch)| {
+                let state = match batch.state() {
+                    // A batch is never left in this state, it's only the initial value
+                    BatchState::AwaitingDownload => {
+                        BatchStateSummary::Unexpected("AwaitingDownload")
+                    }
+                    BatchState::Downloading { .. } => BatchStateSummary::Downloading,
+                    BatchState::AwaitingProcessing { .. } => BatchStateSummary::AwaitingProcessing,
+                    BatchState::Poisoned => BatchStateSummary::Unexpected("Poisoned"),
+                    BatchState::Processing { .. } => BatchStateSummary::Processing,
+                    BatchState::Failed => BatchStateSummary::Unexpected("Failed"),
+                    BatchState::AwaitingValidation { .. } => BatchStateSummary::AwaitingValidation,
+                };
+                (*id, state)
+            })
+            .collect()
+    }
+
    #[allow(clippy::too_many_arguments)]
    pub fn new(
        id: Id,
@@ -138,7 +173,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
            target_head_slot,
            target_head_root,
            batches: BTreeMap::new(),
-            peers: HashSet::from_iter([peer_id]),
+            peers: HashMap::from_iter([(peer_id, <_>::default())]),
            to_be_downloaded: start_epoch,
            processing_target: start_epoch,
            optimistic_start: None,
@@ -168,7 +203,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
    /// Peers currently syncing this chain.
    #[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
    pub fn peers(&self) -> impl Iterator<Item = PeerId> + '_ {
-        self.peers.iter().cloned()
+        self.peers.keys().cloned()
    }

    /// Progress in epochs made by the chain
@@ -221,6 +256,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        request_id: Id,
        blocks: Vec<RpcBlock<T::EthSpec>>,
    ) -> ProcessingResult {
+        // Account for one more requests to this peer
+        // TODO(das): this code assumes that we do a single request per peer per RpcBlock
+        for peer in batch_peers.iter_unique_peers() {
+            *self.peers.entry(*peer).or_default() += 1;
+        }
+
        // check if we have this batch
        let batch = match self.batches.get_mut(&batch_id) {
            None => {
@@ -400,11 +441,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                    self.request_batches(network)?;
                }
            }
-        } else if !self.good_peers_on_sampling_subnets(self.processing_target, network) {
-            // This is to handle the case where no batch was sent for the current processing
-            // target when there is no sampling peers available. This is a valid state and should not
-            // return an error.
-            return Ok(KeepChain);
        } else {
            return Err(RemoveChain::WrongChainState(format!(
                "Batch not found for current processing target {}",
@@ -577,7 +613,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                            "Batch failed to download. Dropping chain scoring peers"
                        );

-                        for peer in self.peers.drain() {
+                        for (peer, _) in self.peers.drain() {
                            network.report_peer(peer, penalty, "faulty_chain");
                        }
                        Err(RemoveChain::ChainFailed {
@@ -842,7 +878,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        network: &mut SyncNetworkContext<T>,
        peer_id: PeerId,
    ) -> ProcessingResult {
-        self.peers.insert(peer_id);
+        self.peers.insert(peer_id, <_>::default());
        self.request_batches(network)
    }

@@ -854,7 +890,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        &mut self,
        network: &mut SyncNetworkContext<T>,
        batch_id: BatchId,
-        peer_id: &PeerId,
        request_id: Id,
        err: RpcResponseError,
    ) -> ProcessingResult {
@@ -869,7 +904,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                debug!(
                    batch_epoch = %batch_id,
                    batch_state = ?batch.state(),
-                    %peer_id,
                    %request_id,
                    ?batch_state,
                    "Batch not expecting block"
@@ -880,12 +914,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                batch_epoch = %batch_id,
                batch_state = ?batch.state(),
                error = ?err,
-                %peer_id,
                %request_id,
                "Batch download error"
            );
            if let BatchOperationOutcome::Failed { blacklist } =
-                batch.download_failed(Some(*peer_id))?
+                // TODO(das): Is it necessary for the batch to track failed peers? Can we make this
+                // mechanism compatible with PeerDAS and before PeerDAS?
+                batch.download_failed(None)?
            {
                return Err(RemoveChain::ChainFailed {
                    blacklist,
@@ -896,7 +931,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        } else {
            debug!(
                batch_epoch = %batch_id,
-                %peer_id,
                %request_id,
                batch_state,
                "Batch not found"
@@ -937,6 +971,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                },
                &synced_peers,
                &failed_peers,
+                &self.peers,
            ) {
                Ok(request_id) => {
                    // inform the batch about the new request
@@ -953,14 +988,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                    return Ok(KeepChain);
                }
                Err(e) => match e {
-                    // TODO(das): Handle the NoPeer case explicitly and don't drop the batch. For
-                    // sync to work properly it must be okay to have "stalled" batches in
-                    // AwaitingDownload state. Currently it will error with invalid state if
-                    // that happens. Sync manager must periodicatlly prune stalled batches like
-                    // we do for lookup sync. Then we can deprecate the redundant
-                    // `good_peers_on_sampling_subnets` checks.
-                    e
-                    @ (RpcRequestSendError::NoPeer(_) | RpcRequestSendError::InternalError(_)) => {
+                    RpcRequestSendError::InternalError(e) => {
                        // NOTE: under normal conditions this shouldn't happen but we handle it anyway
                        warn!(%batch_id, error = ?e, "batch_id" = %batch_id, %batch, "Could not send batch request");
                        // register the failed download and check if the batch can be retried
@@ -1019,11 +1047,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        // check if we have the batch for our optimistic start. If not, request it first.
        // We wait for this batch before requesting any other batches.
        if let Some(epoch) = self.optimistic_start {
-            if !self.good_peers_on_sampling_subnets(epoch, network) {
-                debug!("Waiting for peers to be available on sampling column subnets");
-                return Ok(KeepChain);
-            }
-
            if let Entry::Vacant(entry) = self.batches.entry(epoch) {
                let optimistic_batch = BatchInfo::new(&epoch, EPOCHS_PER_BATCH);
                entry.insert(optimistic_batch);
@@ -1046,35 +1069,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        Ok(KeepChain)
    }

-    /// Checks all sampling column subnets for peers. Returns `true` if there is at least one peer in
-    /// every sampling column subnet.
-    fn good_peers_on_sampling_subnets(
-        &self,
-        epoch: Epoch,
-        network: &SyncNetworkContext<T>,
-    ) -> bool {
-        if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) {
-            // Require peers on all sampling column subnets before sending batches
-            let peers_on_all_custody_subnets = network
-                .network_globals()
-                .sampling_subnets
-                .iter()
-                .all(|subnet_id| {
-                    let peer_count = network
-                        .network_globals()
-                        .peers
-                        .read()
-                        .good_custody_subnet_peer(*subnet_id)
-                        .count();
-
-                    peer_count > 0
-                });
-            peers_on_all_custody_subnets
-        } else {
-            true
-        }
-    }
-
    /// Creates the next required batch from the chain. If there are no more batches required,
    /// `false` is returned.
    #[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
@@ -1107,15 +1101,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
            return None;
        }

-        // don't send batch requests until we have peers on sampling subnets
-        // TODO(das): this is a workaround to avoid sending out excessive block requests because
-        // block and data column requests are currently coupled. This can be removed once we find a
-        // way to decouple the requests and do retries individually, see issue #6258.
-        if !self.good_peers_on_sampling_subnets(self.to_be_downloaded, network) {
-            debug!("Waiting for peers to be available on custody column subnets");
-            return None;
-        }
-
        // If no batch needs a retry, attempt to send the batch of the next epoch to download
        let next_batch_id = self.to_be_downloaded;
        // this batch could have been included already being an optimistic batch
--- a/beacon_node/network/src/sync/range_sync/chain_collection.rs
+++ b/beacon_node/network/src/sync/range_sync/chain_collection.rs
@@ -54,6 +54,13 @@ pub struct ChainCollection<T: BeaconChainTypes> {
 }

 impl<T: BeaconChainTypes> ChainCollection<T> {
+    #[cfg(test)]
+    pub(crate) fn iter(&self) -> impl Iterator<Item = &SyncingChain<T>> {
+        self.finalized_chains
+            .values()
+            .chain(self.head_chains.values())
+    }
+
    pub fn new(beacon_chain: Arc<BeaconChain<T>>) -> Self {
        ChainCollection {
            beacon_chain,
--- a/beacon_node/network/src/sync/range_sync/mod.rs
+++ b/beacon_node/network/src/sync/range_sync/mod.rs
@@ -9,10 +9,9 @@ mod sync_type;

 pub use batch::{
    BatchConfig, BatchInfo, BatchOperationOutcome, BatchPeers, BatchProcessingResult, BatchState,
-    ByRangeRequestType,
 };
-pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
 #[cfg(test)]
-pub use chain_collection::SyncChainStatus;
+pub use chain::BatchStateSummary;
+pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
 pub use range::RangeSync;
 pub use sync_type::RangeSyncType;
--- a/beacon_node/network/src/sync/range_sync/range.rs
+++ b/beacon_node/network/src/sync/range_sync/range.rs
@@ -39,6 +39,8 @@
 //!  Each chain is downloaded in batches of blocks. The batched blocks are processed sequentially
 //!  and further batches are requested as current blocks are being processed.

+#[cfg(test)]
+use super::chain::BatchStateSummary;
 use super::chain::{BatchId, ChainId, RemoveChain, SyncingChain};
 use super::chain_collection::{ChainCollection, SyncChainStatus};
 use super::sync_type::RangeSyncType;
@@ -100,10 +102,23 @@ where
    }

    #[cfg(test)]
-    pub(crate) fn __failed_chains(&mut self) -> Vec<Hash256> {
+    pub(crate) fn failed_chains(&mut self) -> Vec<Hash256> {
        self.failed_chains.keys().copied().collect()
    }

+    #[cfg(test)]
+    pub(crate) fn batches_state(&self) -> Vec<(ChainId, BatchId, BatchStateSummary)> {
+        self.chains
+            .iter()
+            .flat_map(|chain| {
+                chain
+                    .batches_state()
+                    .into_iter()
+                    .map(|(batch_id, state)| (chain.id(), batch_id, state))
+            })
+            .collect()
+    }
+
    #[instrument(parent = None,
        level = "info",
        fields(component = "range_sync"),
@@ -344,7 +359,6 @@ where
    pub fn inject_error(
        &mut self,
        network: &mut SyncNetworkContext<T>,
-        peer_id: PeerId,
        batch_id: BatchId,
        chain_id: ChainId,
        request_id: Id,
@@ -352,7 +366,7 @@ where
    ) {
        // check that this request is pending
        match self.chains.call_by_id(chain_id, |chain| {
-            chain.inject_error(network, batch_id, &peer_id, request_id, err)
+            chain.inject_error(network, batch_id, request_id, err)
        }) {
            Ok((removed_chain, sync_type)) => {
                if let Some((removed_chain, remove_reason)) = removed_chain {
--- a/beacon_node/network/src/sync/tests/lookups.rs
+++ b/beacon_node/network/src/sync/tests/lookups.rs
@@ -35,7 +35,7 @@ use lighthouse_network::{
        SamplingRequester, SingleLookupReqId, SyncRequestId,
    },
    types::SyncState,
-    NetworkConfig, NetworkGlobals, PeerId,
+    NetworkConfig, NetworkGlobals, PeerId, SyncInfo,
 };
 use slot_clock::{SlotClock, TestingSlotClock};
 use tokio::sync::mpsc;
@@ -53,8 +53,21 @@ const SAMPLING_REQUIRED_SUCCESSES: usize = 2;
 type DCByRootIds = Vec<DCByRootId>;
 type DCByRootId = (SyncRequestId, Vec<ColumnIndex>);

+pub enum PeersConfig {
+    SupernodeAndRandom,
+    SupernodeOnly,
+}
+
 impl TestRig {
    pub fn test_setup() -> Self {
+        Self::test_setup_with_options(false)
+    }
+
+    pub fn test_setup_as_supernode() -> Self {
+        Self::test_setup_with_options(true)
+    }
+
+    fn test_setup_with_options(is_supernode: bool) -> Self {
        // Use `fork_from_env` logic to set correct fork epochs
        let spec = test_spec::<E>();

@@ -83,10 +96,11 @@ impl TestRig {
        // TODO(das): make the generation of the ENR use the deterministic rng to have consistent
        // column assignments
        let network_config = Arc::new(NetworkConfig::default());
-        let globals = Arc::new(NetworkGlobals::new_test_globals(
+        let globals = Arc::new(NetworkGlobals::new_test_globals_as_supernode(
            Vec::new(),
            network_config,
            chain.spec.clone(),
+            is_supernode,
        ));
        let (beacon_processor, beacon_processor_rx) = NetworkBeaconProcessor::null_for_testing(
            globals,
@@ -113,6 +127,7 @@ impl TestRig {
            network_rx,
            network_rx_queue: vec![],
            sync_rx,
+            sent_blocks_by_range: <_>::default(),
            rng,
            network_globals: beacon_processor.network_globals.clone(),
            sync_manager: SyncManager::new(
@@ -244,8 +259,8 @@ impl TestRig {
        self.sync_manager.active_parent_lookups().len()
    }

-    fn active_range_sync_chain(&self) -> (RangeSyncType, Slot, Slot) {
-        self.sync_manager.get_range_sync_chains().unwrap().unwrap()
+    fn active_range_sync_chain(&mut self) -> (RangeSyncType, Slot, Slot) {
+        self.sync_manager.range_sync().state().unwrap().unwrap()
    }

    fn assert_single_lookups_count(&self, count: usize) {
@@ -355,29 +370,63 @@ impl TestRig {
        self.expect_empty_network();
    }

-    pub fn new_connected_peer(&mut self) -> PeerId {
+    // Don't make pub, use `add_connected_peer_testing_only`
+    fn new_connected_peer(&mut self) -> PeerId {
+        self.add_connected_peer_testing_only(false)
+    }
+
+    // Don't make pub, use `add_connected_peer_testing_only`
+    fn new_connected_supernode_peer(&mut self) -> PeerId {
+        self.add_connected_peer_testing_only(true)
+    }
+
+    pub fn add_connected_peer_testing_only(&mut self, supernode: bool) -> PeerId {
        let key = self.determinstic_key();
        let peer_id = self
            .network_globals
            .peers
            .write()
-            .__add_connected_peer_testing_only(false, &self.harness.spec, key);
-        self.log(&format!("Added new peer for testing {peer_id:?}"));
+            .__add_connected_peer_testing_only(supernode, &self.harness.spec, key);
+        let mut peer_custody_subnets = self
+            .network_globals
+            .peers
+            .read()
+            .peer_info(&peer_id)
+            .expect("peer was just added")
+            .custody_subnets_iter()
+            .map(|subnet| **subnet)
+            .collect::<Vec<_>>();
+        peer_custody_subnets.sort_unstable();
+        self.log(&format!(
+            "Added new peer for testing {peer_id:?} custody subnets {peer_custody_subnets:?}"
+        ));
        peer_id
    }

-    pub fn new_connected_supernode_peer(&mut self) -> PeerId {
-        let key = self.determinstic_key();
-        self.network_globals
-            .peers
-            .write()
-            .__add_connected_peer_testing_only(true, &self.harness.spec, key)
+    pub fn add_sync_peer(&mut self, supernode: bool, remote_info: SyncInfo) -> PeerId {
+        let peer_id = self.add_connected_peer_testing_only(supernode);
+        self.send_sync_message(SyncMessage::AddPeer(peer_id, remote_info));
+        peer_id
    }

    fn determinstic_key(&mut self) -> CombinedKey {
        k256::ecdsa::SigningKey::random(&mut self.rng).into()
    }

+    pub fn add_sync_peers(&mut self, config: PeersConfig, remote_info: SyncInfo) {
+        match config {
+            PeersConfig::SupernodeAndRandom => {
+                for _ in 0..100 {
+                    self.add_sync_peer(false, remote_info.clone());
+                }
+                self.add_sync_peer(true, remote_info);
+            }
+            PeersConfig::SupernodeOnly => {
+                self.add_sync_peer(true, remote_info);
+            }
+        }
+    }
+
    pub fn new_connected_peers_for_peerdas(&mut self) {
        // Enough sampling peers with few columns
        for _ in 0..100 {
@@ -840,6 +889,19 @@ impl TestRig {
        }
    }

+    // Find, not pop
+    pub fn filter_received_network_events<T, F: Fn(&NetworkMessage<E>) -> Option<T>>(
+        &mut self,
+        predicate_transform: F,
+    ) -> Vec<T> {
+        self.drain_network_rx();
+
+        self.network_rx_queue
+            .iter()
+            .filter_map(predicate_transform)
+            .collect()
+    }
+
    pub fn pop_received_processor_event<T, F: Fn(&WorkEvent<E>) -> Option<T>>(
        &mut self,
        predicate_transform: F,
@@ -1088,6 +1150,21 @@ impl TestRig {
        }
    }

+    pub fn expect_no_penalty_for_anyone(&mut self) {
+        self.drain_network_rx();
+        let downscore_events = self
+            .network_rx_queue
+            .iter()
+            .filter_map(|ev| match ev {
+                NetworkMessage::ReportPeer { peer_id, msg, .. } => Some((peer_id, msg)),
+                _ => None,
+            })
+            .collect::<Vec<_>>();
+        if !downscore_events.is_empty() {
+            panic!("Expected no downscoring events but found: {downscore_events:?}");
+        }
+    }
+
    #[track_caller]
    fn expect_parent_chain_process(&mut self) {
        match self.beacon_processor_rx.try_recv() {
@@ -1123,6 +1200,25 @@ impl TestRig {
        }
    }

+    #[track_caller]
+    pub fn expect_penalties(&mut self, expected_penalty_msg: &'static str) {
+        let all_penalties = self.filter_received_network_events(|ev| match ev {
+            NetworkMessage::ReportPeer { peer_id, msg, .. } => Some((*peer_id, *msg)),
+            _ => None,
+        });
+        if all_penalties
+            .iter()
+            .any(|(_, msg)| *msg != expected_penalty_msg)
+        {
+            panic!(
+                "Expected penalties only of {expected_penalty_msg}, but found {all_penalties:?}"
+            );
+        }
+        self.log(&format!(
+            "Found expected penalties {expected_penalty_msg}: {all_penalties:?}"
+        ));
+    }
+
    #[track_caller]
    pub fn expect_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) {
        let penalty_msg = self
--- a/beacon_node/network/src/sync/tests/mod.rs
+++ b/beacon_node/network/src/sync/tests/mod.rs
@@ -6,13 +6,17 @@ use beacon_chain::builder::Witness;
 use beacon_chain::eth1_chain::CachingEth1Backend;
 use beacon_chain::test_utils::{BeaconChainHarness, EphemeralHarnessType};
 use beacon_processor::WorkEvent;
+use lighthouse_network::service::api_types::ComponentsByRangeRequestId;
 use lighthouse_network::NetworkGlobals;
 use rand_chacha::ChaCha20Rng;
 use slot_clock::ManualSlotClock;
+use std::collections::HashMap;
 use std::sync::Arc;
 use store::MemoryStore;
 use tokio::sync::mpsc;
-use types::{ChainSpec, ForkName, MinimalEthSpec as E};
+use types::{ChainSpec, ForkName, MinimalEthSpec as E, SignedBeaconBlock};
+
+pub use lookups::PeersConfig;

 mod lookups;
 mod range;
@@ -64,4 +68,7 @@ struct TestRig {
    rng: ChaCha20Rng,
    fork_name: ForkName,
    spec: Arc<ChainSpec>,
+
+    // Cache of sent blocks for PeerDAS responses
+    sent_blocks_by_range: HashMap<ComponentsByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
 }
--- a/beacon_node/network/src/sync/tests/range.rs
+++ b/beacon_node/network/src/sync/tests/range.rs