Blob syncing (#24)

* add a rt is_blob_batch * use the mixed type everywhere * glue * more glue * minor fixes * fix range tests * filling in the gaps * moore filling in the gaps
2026-03-14 02:12:33 +00:00 · 2022-11-24 07:45:38 -05:00
parent ce097ac8d2
commit bf5005244e
10 changed files with 706 additions and 280 deletions
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -1,8 +1,8 @@
 //! Provides network functionality for the Syncing thread. This fundamentally wraps a network
 //! channel and stores a global RPC ID to perform requests.

-use super::manager::{Id, RequestId as SyncRequestId, SeansBlob, SeansBlock, SeansBlockBlob};
-use super::range_sync::{BatchId, ChainId};
+use super::manager::{BlockTy, Id, RequestId as SyncRequestId};
+use super::range_sync::{BatchId, ChainId, ExpectedBatchTy};
 use crate::beacon_processor::WorkEvent;
 use crate::service::{NetworkMessage, RequestId};
 use crate::status::ToStatusMessage;
@@ -12,22 +12,54 @@ use lighthouse_network::rpc::methods::BlobsByRangeRequest;
 use lighthouse_network::rpc::{BlocksByRangeRequest, BlocksByRootRequest, GoodbyeReason};
 use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource, Request};
 use slog::{debug, trace, warn};
+use std::collections::hash_map::Entry;
 use std::collections::VecDeque;
 use std::sync::Arc;
 use tokio::sync::mpsc;
+use types::{BlobsSidecar, EthSpec, SignedBeaconBlock, SignedBeaconBlockAndBlobsSidecar};

 #[derive(Debug, Default)]
-struct BlockBlobRequestInfo {
-    /// Blocks we have received awaiting for their corresponding blob
-    accumulated_blocks: VecDeque<SeansBlock>,
-    /// Blobs we have received awaiting for their corresponding block
-    accumulated_blobs: VecDeque<SeansBlob>,
+struct BlockBlobRequestInfo<T: EthSpec> {
+    /// Blocks we have received awaiting for their corresponding sidecar.
+    accumulated_blocks: VecDeque<Arc<SignedBeaconBlock<T>>>,
+    /// Sidecars we have received awaiting for their corresponding block.
+    accumulated_sidecars: VecDeque<Arc<BlobsSidecar<T>>>,
    /// Whether the individual RPC request for blocks is finished or not.
-    // Not sure if this is needed
    is_blocks_rpc_finished: bool,
-    /// Whether the individual RPC request for blobs is finished or not
-    // Not sure if this is needed
-    is_blobs_rpc_finished: bool,
+    /// Whether the individual RPC request for sidecars is finished or not.
+    is_sidecar_rpc_finished: bool,
+}
+
+impl<T: EthSpec> BlockBlobRequestInfo<T> {
+    pub fn add_block_response(&mut self, maybe_block: Option<Arc<SignedBeaconBlock<T>>>) {
+        match maybe_block {
+            Some(block) => self.accumulated_blocks.push_back(block),
+            None => self.is_blocks_rpc_finished = true,
+        }
+    }
+
+    pub fn add_sidecar_response(&mut self, maybe_sidecar: Option<Arc<BlobsSidecar<T>>>) {
+        match maybe_sidecar {
+            Some(sidecar) => self.accumulated_sidecars.push_back(sidecar),
+            None => self.is_sidecar_rpc_finished = true,
+        }
+    }
+
+    pub fn pop_response(&mut self) -> Option<SignedBeaconBlockAndBlobsSidecar<T>> {
+        if !self.accumulated_blocks.is_empty() && !self.accumulated_blocks.is_empty() {
+            let beacon_block = self.accumulated_blocks.pop_front().expect("non empty");
+            let blobs_sidecar = self.accumulated_sidecars.pop_front().expect("non empty");
+            return Some(SignedBeaconBlockAndBlobsSidecar {
+                beacon_block,
+                blobs_sidecar,
+            });
+        }
+        None
+    }
+
+    pub fn is_finished(&self) -> bool {
+        self.is_blocks_rpc_finished && self.is_sidecar_rpc_finished
+    }
 }

 /// Wraps a Network channel to employ various RPC related network functionality for the Sync manager. This includes management of a global RPC request Id.
@@ -47,7 +79,12 @@ pub struct SyncNetworkContext<T: BeaconChainTypes> {
    /// BlocksByRange requests made by backfill syncing.
    backfill_requests: FnvHashMap<Id, BatchId>,

-    block_blob_requests: FnvHashMap<Id, (ChainId, BatchId, BlockBlobRequestInfo)>,
+    /// BlocksByRange requests paired with BlobsByRange requests made by the range.
+    range_sidecar_pair_requests:
+        FnvHashMap<Id, (ChainId, BatchId, BlockBlobRequestInfo<T::EthSpec>)>,
+
+    /// BlocksByRange requests paired with BlobsByRange requests made by the backfill sync.
+    backfill_sidecar_pair_requests: FnvHashMap<Id, (BatchId, BlockBlobRequestInfo<T::EthSpec>)>,

    /// Whether the ee is online. If it's not, we don't allow access to the
    /// `beacon_processor_send`.
@@ -67,15 +104,16 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        beacon_processor_send: mpsc::Sender<WorkEvent<T>>,
        log: slog::Logger,
    ) -> Self {
-        Self {
+        SyncNetworkContext {
            network_send,
-            execution_engine_state: EngineState::Online, // always assume `Online` at the start
            network_globals,
            request_id: 1,
-            range_requests: FnvHashMap::default(),
-            backfill_requests: FnvHashMap::default(),
+            range_requests: Default::default(),
+            backfill_requests: Default::default(),
+            range_sidecar_pair_requests: Default::default(),
+            backfill_sidecar_pair_requests: Default::default(),
+            execution_engine_state: EngineState::Online, // always assume `Online` at the start
            beacon_processor_send,
-            block_blob_requests: Default::default(),
            log,
        }
    }
@@ -122,190 +160,295 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
    pub fn blocks_by_range_request(
        &mut self,
        peer_id: PeerId,
+        batch_type: ExpectedBatchTy,
        request: BlocksByRangeRequest,
        chain_id: ChainId,
        batch_id: BatchId,
    ) -> Result<Id, &'static str> {
-        trace!(
-            self.log,
-            "Sending BlocksByRange Request";
-            "method" => "BlocksByRange",
-            "count" => request.count,
-            "peer" => %peer_id,
-        );
-        let request = Request::BlocksByRange(request);
-        let id = self.next_id();
-        let request_id = RequestId::Sync(SyncRequestId::RangeSync { id });
-        self.send_network_msg(NetworkMessage::SendRequest {
-            peer_id,
-            request,
-            request_id,
-        })?;
-        self.range_requests.insert(id, (chain_id, batch_id));
-        Ok(id)
-    }
+        match batch_type {
+            ExpectedBatchTy::OnlyBlock => {
+                trace!(
+                    self.log,
+                    "Sending BlocksByRange Request";
+                    "method" => "BlocksByRange",
+                    "count" => request.count,
+                    "peer" => %peer_id,
+                );
+                let request = Request::BlocksByRange(request);
+                let id = self.next_id();
+                let request_id = RequestId::Sync(SyncRequestId::RangeSync { id });
+                self.send_network_msg(NetworkMessage::SendRequest {
+                    peer_id,
+                    request,
+                    request_id,
+                })?;
+                self.range_requests.insert(id, (chain_id, batch_id));
+                Ok(id)
+            }
+            ExpectedBatchTy::OnlyBlockBlobs => {
+                debug!(
+                    self.log,
+                    "Sending BlockBlock by range request";
+                    "method" => "Mixed by range request",
+                    "count" => request.count,
+                    "peer" => %peer_id,
+                );

-    /// A blocks-blob by range request for the range sync algorithm.
-    pub fn blocks_blobs_by_range_request(
-        &mut self,
-        peer_id: PeerId,
-        request: BlocksByRangeRequest, // for now this is enough to get both requests.
-        chain_id: ChainId,
-        batch_id: BatchId,
-    ) -> Result<Id, &'static str> {
-        debug!(
-            self.log,
-            "Sending BlockBlock by range request";
-            "method" => "BlocksByRangeAndBlobsOrSomething",
-            "count" => request.count,
-            "peer" => %peer_id,
-        );
+                // create the shared request id. This is fine since the rpc handles substream ids.
+                let id = self.next_id();
+                let request_id = RequestId::Sync(SyncRequestId::RangeSidecarPair { id });

-        // create the shared request id. This is fine since the rpc handles substream ids.
-        let id = self.next_id();
-        let request_id = RequestId::Sync(SyncRequestId::RangeBlockBlob { id });
+                // Create the blob request based on the blob request.
+                let blobs_request = Request::BlobsByRange(BlobsByRangeRequest {
+                    start_slot: request.start_slot,
+                    count: request.count,
+                });
+                let blocks_request = Request::BlocksByRange(request);

-        // Create the blob request based on the blob request.
-        let blobs_request = Request::BlobsByRange(BlobsByRangeRequest {
-            start_slot: request.start_slot,
-            count: request.count,
-        });
-        let blocks_request = Request::BlocksByRange(request);
-
-        // Send both requests. Make sure both can be sent.
-        self.send_network_msg(NetworkMessage::SendRequest {
-            peer_id,
-            request: blocks_request,
-            request_id,
-        })
-        .and_then(|_| {
-            self.send_network_msg(NetworkMessage::SendRequest {
-                peer_id,
-                request: blobs_request,
-                request_id,
-            })
-        })?;
-        let block_blob_info = BlockBlobRequestInfo::default();
-        self.block_blob_requests
-            .insert(id, (chain_id, batch_id, block_blob_info));
-        Ok(id)
+                // Send both requests. Make sure both can be sent.
+                self.send_network_msg(NetworkMessage::SendRequest {
+                    peer_id,
+                    request: blocks_request,
+                    request_id,
+                })?;
+                self.send_network_msg(NetworkMessage::SendRequest {
+                    peer_id,
+                    request: blobs_request,
+                    request_id,
+                })?;
+                let block_blob_info = BlockBlobRequestInfo::default();
+                self.range_sidecar_pair_requests
+                    .insert(id, (chain_id, batch_id, block_blob_info));
+                Ok(id)
+            }
+        }
    }

    /// A blocks by range request sent by the backfill sync algorithm
    pub fn backfill_blocks_by_range_request(
        &mut self,
        peer_id: PeerId,
+        batch_type: ExpectedBatchTy,
        request: BlocksByRangeRequest,
        batch_id: BatchId,
    ) -> Result<Id, &'static str> {
-        trace!(
-            self.log,
-            "Sending backfill BlocksByRange Request";
-            "method" => "BlocksByRange",
-            "count" => request.count,
-            "peer" => %peer_id,
-        );
-        let request = Request::BlocksByRange(request);
-        let id = self.next_id();
-        let request_id = RequestId::Sync(SyncRequestId::BackFillSync { id });
-        self.send_network_msg(NetworkMessage::SendRequest {
-            peer_id,
-            request,
-            request_id,
-        })?;
-        self.backfill_requests.insert(id, batch_id);
-        Ok(id)
-    }
+        match batch_type {
+            ExpectedBatchTy::OnlyBlock => {
+                trace!(
+                    self.log,
+                    "Sending backfill BlocksByRange Request";
+                    "method" => "BlocksByRange",
+                    "count" => request.count,
+                    "peer" => %peer_id,
+                );
+                let request = Request::BlocksByRange(request);
+                let id = self.next_id();
+                let request_id = RequestId::Sync(SyncRequestId::BackFillSync { id });
+                self.send_network_msg(NetworkMessage::SendRequest {
+                    peer_id,
+                    request,
+                    request_id,
+                })?;
+                self.backfill_requests.insert(id, batch_id);
+                Ok(id)
+            }
+            ExpectedBatchTy::OnlyBlockBlobs => {
+                debug!(
+                    self.log,
+                    "Sending BlockBlock by range request";
+                    "method" => "Mixed by range request",
+                    "count" => request.count,
+                    "peer" => %peer_id,
+                );

-    /// Received a blocks by range response.
-    pub fn range_sync_response(
-        &mut self,
-        request_id: Id,
-        remove: bool,
-    ) -> Option<(ChainId, BatchId)> {
-        if remove {
-            self.range_requests.remove(&request_id)
-        } else {
-            self.range_requests.get(&request_id).cloned()
+                // create the shared request id. This is fine since the rpc handles substream ids.
+                let id = self.next_id();
+                let request_id = RequestId::Sync(SyncRequestId::RangeSidecarPair { id });
+
+                // Create the blob request based on the blob request.
+                let blobs_request = Request::BlobsByRange(BlobsByRangeRequest {
+                    start_slot: request.start_slot,
+                    count: request.count,
+                });
+                let blocks_request = Request::BlocksByRange(request);
+
+                // Send both requests. Make sure both can be sent.
+                self.send_network_msg(NetworkMessage::SendRequest {
+                    peer_id,
+                    request: blocks_request,
+                    request_id,
+                })?;
+                self.send_network_msg(NetworkMessage::SendRequest {
+                    peer_id,
+                    request: blobs_request,
+                    request_id,
+                })?;
+                let block_blob_info = BlockBlobRequestInfo::default();
+                self.backfill_sidecar_pair_requests
+                    .insert(id, (batch_id, block_blob_info));
+                Ok(id)
+            }
        }
    }

-    /// Fails a blob bob request.
-    // We need to recover the chain and batch id to be able to tell range abound the failure.
-    pub fn fail_block_bob_request(&mut self, request_id: Id) -> Option<(ChainId, BatchId)> {
-        self.block_blob_requests
-            .remove(&request_id)
-            .map(|(chain_id, batch_id, _info)| (chain_id, batch_id))
-    }
-
-    /// We received a block for a block blob request. This returns:
-    /// None: if there is no pairing for this block yet
-    /// Some(chain_id, Some(paired block blob)) if the block was Some and there was a blob waiting
-    /// None if the block was none
-    pub fn block_blob_block_response(
+    /// Received a blocks by range response.
+    pub fn range_sync_block_response(
        &mut self,
        request_id: Id,
-        block: Option<SeansBlock>,
-    ) -> Option<(ChainId, BatchId, Option<SeansBlockBlob>)> {
-        unimplemented!()
-        // let (chain_id, batch_id, info) = self.block_blob_requests.get_mut(&request_id)?;
-        // match block {
-        //     Some(block) => match info.accumulated_blobs.pop_front() {
-        //         Some(blob) => Some(SeansBlockBlob { block, blob }),
-        //         None => {
-        //             // accumulate the block
-        //             info.accumulated_blocks.push_back(block);
-        //             None
-        //         }
-        //     },
-        //     None => {
-        //         info.is_blocks_rpc_finished = true;
-        //
-        //         if info.is_blobs_rpc_finished && info.is_blocks_rpc_finished {
-        //             // this is the coupled stream termination
-        //             Some((chain_id, batch_id, None))
-        //         } else {
-        //             None
-        //         }
-        //     }
-        // }
+        maybe_block: Option<Arc<SignedBeaconBlock<T::EthSpec>>>,
+        batch_type: ExpectedBatchTy,
+    ) -> Option<(ChainId, BatchId, Option<BlockTy<T::EthSpec>>)> {
+        match batch_type {
+            ExpectedBatchTy::OnlyBlockBlobs => {
+                match self.range_sidecar_pair_requests.entry(request_id) {
+                    Entry::Occupied(mut entry) => {
+                        let (chain_id, batch_id, info) = entry.get_mut();
+                        let chain_id = chain_id.clone();
+                        let batch_id = batch_id.clone();
+                        info.add_block_response(maybe_block);
+                        let maybe_block = info
+                            .pop_response()
+                            .map(|block_sidecar_pair| BlockTy::BlockAndBlob { block_sidecar_pair });
+                        if info.is_finished() {
+                            entry.remove();
+                        }
+                        Some((chain_id, batch_id, maybe_block))
+                    }
+                    Entry::Vacant(_) => None,
+                }
+            }
+            ExpectedBatchTy::OnlyBlock => {
+                // if the request is just for blocks then it can be removed on a stream termination
+                match maybe_block {
+                    Some(block) => {
+                        self.range_requests
+                            .get(&request_id)
+                            .cloned()
+                            .map(|(chain_id, batch_id)| {
+                                (chain_id, batch_id, Some(BlockTy::Block { block }))
+                            })
+                    }
+                    None => self
+                        .range_requests
+                        .remove(&request_id)
+                        .map(|(chain_id, batch_id)| (chain_id, batch_id, None)),
+                }
+            }
+        }
    }

-    pub fn block_blob_blob_response(
+    pub fn range_sync_sidecar_response(
        &mut self,
        request_id: Id,
-        blob: Option<SeansBlob>,
-    ) -> Option<(ChainId, BatchId, Option<SeansBlockBlob>)> {
-        // let (batch_id, chain_id, info) = self.block_blob_requests.get_mut(&request_id)?;
-        // match blob {
-        //     Some(blob) => match info.accumulated_blocks.pop_front() {
-        //         Some(block) => Some(SeansBlockBlob { block, blob }),
-        //         None => {
-        //             // accumulate the blob
-        //             info.accumulated_blobs.push_back(blob);
-        //             None
-        //         }
-        //     },
-        //     None => {
-        //         info.is_blobs_rpc_finished = true;
-        //
-        //         if info.is_blobs_rpc_finished && info.is_blocks_rpc_finished {
-        //             // this is the coupled stream termination
-        //             Some((chain_id, batch_id, None))
-        //         } else {
-        //             None
-        //         }
-        //     }
-        // }
-        unimplemented!("do it")
+        maybe_sidecar: Option<Arc<BlobsSidecar<T::EthSpec>>>,
+    ) -> Option<(ChainId, BatchId, Option<BlockTy<T::EthSpec>>)> {
+        match self.range_sidecar_pair_requests.entry(request_id) {
+            Entry::Occupied(mut entry) => {
+                let (chain_id, batch_id, info) = entry.get_mut();
+                let chain_id = chain_id.clone();
+                let batch_id = batch_id.clone();
+                info.add_sidecar_response(maybe_sidecar);
+                let maybe_block = info
+                    .pop_response()
+                    .map(|block_sidecar_pair| BlockTy::BlockAndBlob { block_sidecar_pair });
+                if info.is_finished() {
+                    entry.remove();
+                }
+                Some((chain_id, batch_id, maybe_block))
+            }
+            Entry::Vacant(_) => None,
+        }
+    }
+
+    pub fn range_sync_request_failed(
+        &mut self,
+        request_id: Id,
+        batch_type: ExpectedBatchTy,
+    ) -> Option<(ChainId, BatchId)> {
+        match batch_type {
+            ExpectedBatchTy::OnlyBlockBlobs => self
+                .range_sidecar_pair_requests
+                .remove(&request_id)
+                .map(|(chain_id, batch_id, _info)| (chain_id, batch_id)),
+            ExpectedBatchTy::OnlyBlock => self.range_requests.remove(&request_id),
+        }
+    }
+
+    pub fn backfill_request_failed(
+        &mut self,
+        request_id: Id,
+        batch_type: ExpectedBatchTy,
+    ) -> Option<BatchId> {
+        match batch_type {
+            ExpectedBatchTy::OnlyBlockBlobs => self
+                .backfill_sidecar_pair_requests
+                .remove(&request_id)
+                .map(|(batch_id, _info)| batch_id),
+            ExpectedBatchTy::OnlyBlock => self.backfill_requests.remove(&request_id),
+        }
    }

    /// Received a blocks by range response.
-    pub fn backfill_sync_response(&mut self, request_id: Id, remove: bool) -> Option<BatchId> {
-        if remove {
-            self.backfill_requests.remove(&request_id)
-        } else {
-            self.backfill_requests.get(&request_id).cloned()
+    pub fn backfill_sync_block_response(
+        &mut self,
+        request_id: Id,
+        maybe_block: Option<Arc<SignedBeaconBlock<T::EthSpec>>>,
+        batch_type: ExpectedBatchTy,
+    ) -> Option<(BatchId, Option<BlockTy<T::EthSpec>>)> {
+        match batch_type {
+            ExpectedBatchTy::OnlyBlockBlobs => {
+                match self.backfill_sidecar_pair_requests.entry(request_id) {
+                    Entry::Occupied(mut entry) => {
+                        let (batch_id, info) = entry.get_mut();
+                        let batch_id = batch_id.clone();
+                        info.add_block_response(maybe_block);
+                        let maybe_block = info
+                            .pop_response()
+                            .map(|block_sidecar_pair| BlockTy::BlockAndBlob { block_sidecar_pair });
+                        if info.is_finished() {
+                            entry.remove();
+                        }
+                        Some((batch_id, maybe_block))
+                    }
+                    Entry::Vacant(_) => None,
+                }
+            }
+            ExpectedBatchTy::OnlyBlock => {
+                // if the request is just for blocks then it can be removed on a stream termination
+                match maybe_block {
+                    Some(block) => self
+                        .backfill_requests
+                        .get(&request_id)
+                        .cloned()
+                        .map(|batch_id| (batch_id, Some(BlockTy::Block { block }))),
+                    None => self
+                        .backfill_requests
+                        .remove(&request_id)
+                        .map(|batch_id| (batch_id, None)),
+                }
+            }
+        }
+    }
+
+    pub fn backfill_sync_sidecar_response(
+        &mut self,
+        request_id: Id,
+        maybe_sidecar: Option<Arc<BlobsSidecar<T::EthSpec>>>,
+    ) -> Option<(BatchId, Option<BlockTy<T::EthSpec>>)> {
+        match self.backfill_sidecar_pair_requests.entry(request_id) {
+            Entry::Occupied(mut entry) => {
+                let (batch_id, info) = entry.get_mut();
+                let batch_id = batch_id.clone();
+                info.add_sidecar_response(maybe_sidecar);
+                let maybe_block = info
+                    .pop_response()
+                    .map(|block_sidecar_pair| BlockTy::BlockAndBlob { block_sidecar_pair });
+                if info.is_finished() {
+                    entry.remove();
+                }
+                Some((batch_id, maybe_block))
+            }
+            Entry::Vacant(_) => None,
        }
    }

@@ -316,6 +459,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        request: BlocksByRootRequest,
    ) -> Result<Id, &'static str> {
        //FIXME(sean) add prune depth logic here?
+        // D: YES

        trace!(
            self.log,
@@ -428,4 +572,29 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        self.request_id += 1;
        id
    }
+
+    pub fn batch_type(&self, epoch: types::Epoch) -> ExpectedBatchTy {
+        // Keep tests only for blocks.
+        #[cfg(test)]
+        {
+            return ExpectedBatchTy::OnlyBlock;
+        }
+        #[cfg(not(test))]
+        {
+            use super::range_sync::EPOCHS_PER_BATCH;
+            assert_eq!(
+                EPOCHS_PER_BATCH, 1,
+                "If this is not one, everything will fail horribly"
+            );
+            warn!(
+            self.log,
+            "Missing fork boundary and prunning boundary comparison to decide request type. EVERYTHING IS A BLOB, BOB."
+        );
+            // Here we need access to the beacon chain, check the fork boundary, the current epoch, the
+            // blob period to serve and check with that if the batch is a blob batch or not.
+            // NOTE: This would carelessly assume batch sizes are always 1 epoch, to avoid needing to
+            // align with the batch boundary.
+            ExpectedBatchTy::OnlyBlockBlobs
+        }
+    }
 }