Generalize sync ActiveRequests (#6398)

* Generalize sync ActiveRequests * Remove impossible to hit test * Update beacon_node/lighthouse_network/src/service/api_types.rs Co-authored-by: realbigsean <sean@sigmaprime.io> * Update beacon_node/network/src/sync/network_context.rs Co-authored-by: realbigsean <sean@sigmaprime.io> * Update beacon_node/network/src/sync/network_context.rs Co-authored-by: realbigsean <sean@sigmaprime.io> * Simplify match * Fix display * Merge remote-tracking branch 'sigp/unstable' into sync-active-request-generalize * Sampling requests should not expect all responses * Merge remote-tracking branch 'sigp/unstable' into sync-active-request-generalize * Fix sampling_batch_requests_not_enough_responses_returned test * Merge remote-tracking branch 'sigp/unstable' into sync-active-request-generalize * Merge branch 'unstable' of https://github.com/sigp/lighthouse into sync-active-request-generalize
2026-06-16 18:28:42 +00:00 · 2024-10-17 21:14:13 +03:00
parent 606a113cff
commit a074e9eb33
10 changed files with 371 additions and 335 deletions
--- a/beacon_node/network/src/sync/network_context/custody.rs
+++ b/beacon_node/network/src/sync/network_context/custody.rs
@@ -283,6 +283,10 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
                        block_root: self.block_root,
                        indices: indices.clone(),
                    },
+                    // true = enforce max_requests are returned data_columns_by_root. We only issue requests
+                    // for blocks after we know the block has data, and only request peers after they claim to
+                    // have imported the block+columns and claim to be custodians
+                    true,
                )
                .map_err(Error::SendFailed)?;

--- a/beacon_node/network/src/sync/network_context/requests.rs
+++ b/beacon_node/network/src/sync/network_context/requests.rs
@@ -1,23 +1,187 @@
+use std::{collections::hash_map::Entry, hash::Hash};
+
+use beacon_chain::validator_monitor::timestamp_now;
+use fnv::FnvHashMap;
+use lighthouse_network::PeerId;
 use strum::IntoStaticStr;
 use types::Hash256;

-pub use blobs_by_root::{ActiveBlobsByRootRequest, BlobsByRootSingleBlockRequest};
-pub use blocks_by_root::{ActiveBlocksByRootRequest, BlocksByRootSingleRequest};
+pub use blobs_by_root::{BlobsByRootRequestItems, BlobsByRootSingleBlockRequest};
+pub use blocks_by_root::{BlocksByRootRequestItems, BlocksByRootSingleRequest};
 pub use data_columns_by_root::{
-    ActiveDataColumnsByRootRequest, DataColumnsByRootSingleBlockRequest,
+    DataColumnsByRootRequestItems, DataColumnsByRootSingleBlockRequest,
 };

+use crate::metrics;
+
+use super::{RpcEvent, RpcResponseResult};
+
 mod blobs_by_root;
 mod blocks_by_root;
 mod data_columns_by_root;

 #[derive(Debug, PartialEq, Eq, IntoStaticStr)]
 pub enum LookupVerifyError {
-    NoResponseReturned,
-    NotEnoughResponsesReturned { expected: usize, actual: usize },
+    NotEnoughResponsesReturned { actual: usize },
    TooManyResponses,
    UnrequestedBlockRoot(Hash256),
    UnrequestedIndex(u64),
    InvalidInclusionProof,
    DuplicateData,
 }
+
+/// Collection of active requests of a single ReqResp method, i.e. `blocks_by_root`
+pub struct ActiveRequests<K: Eq + Hash, T: ActiveRequestItems> {
+    requests: FnvHashMap<K, ActiveRequest<T>>,
+    name: &'static str,
+}
+
+/// Stateful container for a single active ReqResp request
+struct ActiveRequest<T: ActiveRequestItems> {
+    state: State<T>,
+    peer_id: PeerId,
+    // Error if the request terminates before receiving max expected responses
+    expect_max_responses: bool,
+}
+
+enum State<T> {
+    Active(T),
+    CompletedEarly,
+    Errored,
+}
+
+impl<K: Eq + Hash, T: ActiveRequestItems> ActiveRequests<K, T> {
+    pub fn new(name: &'static str) -> Self {
+        Self {
+            requests: <_>::default(),
+            name,
+        }
+    }
+
+    pub fn insert(&mut self, id: K, peer_id: PeerId, expect_max_responses: bool, items: T) {
+        self.requests.insert(
+            id,
+            ActiveRequest {
+                state: State::Active(items),
+                peer_id,
+                expect_max_responses,
+            },
+        );
+    }
+
+    /// Handle an `RpcEvent` for a specific request index by `id`.
+    ///
+    /// Lighthouse ReqResp protocol API promises to send 0 or more `RpcEvent::Response` chunks,
+    /// and EITHER a single `RpcEvent::RPCError` or RpcEvent::StreamTermination.
+    ///
+    /// Downstream code expects to receive a single `Result` value per request ID. However,
+    /// `add_item` may convert ReqResp success chunks into errors. This function handles the
+    /// multiple errors / stream termination internally ensuring that a single `Some<Result>` is
+    /// returned.
+    pub fn on_response(
+        &mut self,
+        id: K,
+        rpc_event: RpcEvent<T::Item>,
+    ) -> Option<RpcResponseResult<Vec<T::Item>>> {
+        let Entry::Occupied(mut entry) = self.requests.entry(id) else {
+            metrics::inc_counter_vec(&metrics::SYNC_UNKNOWN_NETWORK_REQUESTS, &[self.name]);
+            return None;
+        };
+
+        match rpc_event {
+            // Handler of a success ReqResp chunk. Adds the item to the request accumulator.
+            // `ActiveRequestItems` validates the item before appending to its internal state.
+            RpcEvent::Response(item, seen_timestamp) => {
+                let request = &mut entry.get_mut();
+                match &mut request.state {
+                    State::Active(items) => {
+                        match items.add(item) {
+                            // Received all items we are expecting for, return early, but keep the request
+                            // struct to handle the stream termination gracefully.
+                            Ok(true) => {
+                                let items = items.consume();
+                                request.state = State::CompletedEarly;
+                                Some(Ok((items, seen_timestamp)))
+                            }
+                            // Received item, but we are still expecting more
+                            Ok(false) => None,
+                            // Received an invalid item
+                            Err(e) => {
+                                request.state = State::Errored;
+                                Some(Err(e.into()))
+                            }
+                        }
+                    }
+                    // Should never happen, ReqResp network behaviour enforces a max count of chunks
+                    // When `max_remaining_chunks <= 1` a the inbound stream in terminated in
+                    // `rpc/handler.rs`. Handling this case adds complexity for no gain. Even if an
+                    // attacker could abuse this, there's no gain in sending garbage chunks that
+                    // will be ignored anyway.
+                    State::CompletedEarly => None,
+                    // Ignore items after errors. We may want to penalize repeated invalid chunks
+                    // for the same response. But that's an optimization to ban peers sending
+                    // invalid data faster that we choose to not adopt for now.
+                    State::Errored => None,
+                }
+            }
+            RpcEvent::StreamTermination => {
+                // After stream termination we must forget about this request, there will be no more
+                // messages coming from the network
+                let request = entry.remove();
+                match request.state {
+                    // Received a stream termination in a valid sequence, consume items
+                    State::Active(mut items) => {
+                        if request.expect_max_responses {
+                            Some(Err(LookupVerifyError::NotEnoughResponsesReturned {
+                                actual: items.consume().len(),
+                            }
+                            .into()))
+                        } else {
+                            Some(Ok((items.consume(), timestamp_now())))
+                        }
+                    }
+                    // Items already returned, ignore stream termination
+                    State::CompletedEarly => None,
+                    // Returned an error earlier, ignore stream termination
+                    State::Errored => None,
+                }
+            }
+            RpcEvent::RPCError(e) => {
+                // After an Error event from the network we must forget about this request as this
+                // may be the last message for this request.
+                match entry.remove().state {
+                    // Received error while request is still active, propagate error.
+                    State::Active(_) => Some(Err(e.into())),
+                    // Received error after completing the request, ignore the error. This is okay
+                    // because the network has already registered a downscore event if necessary for
+                    // this message.
+                    State::CompletedEarly => None,
+                    // Received a network error after a validity error. Okay to ignore, see above
+                    State::Errored => None,
+                }
+            }
+        }
+    }
+
+    pub fn active_requests_of_peer(&self, peer_id: &PeerId) -> Vec<&K> {
+        self.requests
+            .iter()
+            .filter(|(_, request)| &request.peer_id == peer_id)
+            .map(|(id, _)| id)
+            .collect()
+    }
+
+    pub fn len(&self) -> usize {
+        self.requests.len()
+    }
+}
+
+pub trait ActiveRequestItems {
+    type Item;
+
+    /// Add a new item into the accumulator. Returns true if all expected items have been received.
+    fn add(&mut self, item: Self::Item) -> Result<bool, LookupVerifyError>;
+
+    /// Return all accumulated items consuming them.
+    fn consume(&mut self) -> Vec<Self::Item>;
+}
--- a/beacon_node/network/src/sync/network_context/requests/blobs_by_root.rs
+++ b/beacon_node/network/src/sync/network_context/requests/blobs_by_root.rs
@@ -1,8 +1,8 @@
-use lighthouse_network::{rpc::methods::BlobsByRootRequest, PeerId};
+use lighthouse_network::rpc::methods::BlobsByRootRequest;
 use std::sync::Arc;
 use types::{blob_sidecar::BlobIdentifier, BlobSidecar, ChainSpec, EthSpec, Hash256};

-use super::LookupVerifyError;
+use super::{ActiveRequestItems, LookupVerifyError};

 #[derive(Debug, Clone)]
 pub struct BlobsByRootSingleBlockRequest {
@@ -25,34 +25,27 @@ impl BlobsByRootSingleBlockRequest {
    }
 }

-pub struct ActiveBlobsByRootRequest<E: EthSpec> {
+pub struct BlobsByRootRequestItems<E: EthSpec> {
    request: BlobsByRootSingleBlockRequest,
-    blobs: Vec<Arc<BlobSidecar<E>>>,
-    resolved: bool,
-    pub(crate) peer_id: PeerId,
+    items: Vec<Arc<BlobSidecar<E>>>,
 }

-impl<E: EthSpec> ActiveBlobsByRootRequest<E> {
-    pub fn new(request: BlobsByRootSingleBlockRequest, peer_id: PeerId) -> Self {
+impl<E: EthSpec> BlobsByRootRequestItems<E> {
+    pub fn new(request: BlobsByRootSingleBlockRequest) -> Self {
        Self {
            request,
-            blobs: vec![],
-            resolved: false,
-            peer_id,
+            items: vec![],
        }
    }
+}
+
+impl<E: EthSpec> ActiveRequestItems for BlobsByRootRequestItems<E> {
+    type Item = Arc<BlobSidecar<E>>;

    /// Appends a chunk to this multi-item request. If all expected chunks are received, this
    /// method returns `Some`, resolving the request before the stream terminator.
    /// The active request SHOULD be dropped after `add_response` returns an error
-    pub fn add_response(
-        &mut self,
-        blob: Arc<BlobSidecar<E>>,
-    ) -> Result<Option<Vec<Arc<BlobSidecar<E>>>>, LookupVerifyError> {
-        if self.resolved {
-            return Err(LookupVerifyError::TooManyResponses);
-        }
-
+    fn add(&mut self, blob: Self::Item) -> Result<bool, LookupVerifyError> {
        let block_root = blob.block_root();
        if self.request.block_root != block_root {
            return Err(LookupVerifyError::UnrequestedBlockRoot(block_root));
@@ -63,34 +56,16 @@ impl<E: EthSpec> ActiveBlobsByRootRequest<E> {
        if !self.request.indices.contains(&blob.index) {
            return Err(LookupVerifyError::UnrequestedIndex(blob.index));
        }
-        if self.blobs.iter().any(|b| b.index == blob.index) {
+        if self.items.iter().any(|b| b.index == blob.index) {
            return Err(LookupVerifyError::DuplicateData);
        }

-        self.blobs.push(blob);
-        if self.blobs.len() >= self.request.indices.len() {
-            // All expected chunks received, return result early
-            self.resolved = true;
-            Ok(Some(std::mem::take(&mut self.blobs)))
-        } else {
-            Ok(None)
-        }
+        self.items.push(blob);
+
+        Ok(self.items.len() >= self.request.indices.len())
    }

-    pub fn terminate(self) -> Result<(), LookupVerifyError> {
-        if self.resolved {
-            Ok(())
-        } else {
-            Err(LookupVerifyError::NotEnoughResponsesReturned {
-                expected: self.request.indices.len(),
-                actual: self.blobs.len(),
-            })
-        }
-    }
-
-    /// Mark request as resolved (= has returned something downstream) while marking this status as
-    /// true for future calls.
-    pub fn resolve(&mut self) -> bool {
-        std::mem::replace(&mut self.resolved, true)
+    fn consume(&mut self) -> Vec<Self::Item> {
+        std::mem::take(&mut self.items)
    }
 }
--- a/beacon_node/network/src/sync/network_context/requests/blocks_by_root.rs
+++ b/beacon_node/network/src/sync/network_context/requests/blocks_by_root.rs
@@ -1,9 +1,9 @@
 use beacon_chain::get_block_root;
-use lighthouse_network::{rpc::BlocksByRootRequest, PeerId};
+use lighthouse_network::rpc::BlocksByRootRequest;
 use std::sync::Arc;
 use types::{ChainSpec, EthSpec, Hash256, SignedBeaconBlock};

-use super::LookupVerifyError;
+use super::{ActiveRequestItems, LookupVerifyError};

 #[derive(Debug, Copy, Clone)]
 pub struct BlocksByRootSingleRequest(pub Hash256);
@@ -14,47 +14,38 @@ impl BlocksByRootSingleRequest {
    }
 }

-pub struct ActiveBlocksByRootRequest {
+pub struct BlocksByRootRequestItems<E: EthSpec> {
    request: BlocksByRootSingleRequest,
-    resolved: bool,
-    pub(crate) peer_id: PeerId,
+    items: Vec<Arc<SignedBeaconBlock<E>>>,
 }

-impl ActiveBlocksByRootRequest {
-    pub fn new(request: BlocksByRootSingleRequest, peer_id: PeerId) -> Self {
+impl<E: EthSpec> BlocksByRootRequestItems<E> {
+    pub fn new(request: BlocksByRootSingleRequest) -> Self {
        Self {
            request,
-            resolved: false,
-            peer_id,
+            items: vec![],
        }
    }
+}
+
+impl<E: EthSpec> ActiveRequestItems for BlocksByRootRequestItems<E> {
+    type Item = Arc<SignedBeaconBlock<E>>;

    /// Append a response to the single chunk request. If the chunk is valid, the request is
    /// resolved immediately.
    /// The active request SHOULD be dropped after `add_response` returns an error
-    pub fn add_response<E: EthSpec>(
-        &mut self,
-        block: Arc<SignedBeaconBlock<E>>,
-    ) -> Result<Arc<SignedBeaconBlock<E>>, LookupVerifyError> {
-        if self.resolved {
-            return Err(LookupVerifyError::TooManyResponses);
-        }
-
+    fn add(&mut self, block: Self::Item) -> Result<bool, LookupVerifyError> {
        let block_root = get_block_root(&block);
        if self.request.0 != block_root {
            return Err(LookupVerifyError::UnrequestedBlockRoot(block_root));
        }

-        // Valid data, blocks by root expects a single response
-        self.resolved = true;
-        Ok(block)
+        self.items.push(block);
+        // Always returns true, blocks by root expects a single response
+        Ok(true)
    }

-    pub fn terminate(self) -> Result<(), LookupVerifyError> {
-        if self.resolved {
-            Ok(())
-        } else {
-            Err(LookupVerifyError::NoResponseReturned)
-        }
+    fn consume(&mut self) -> Vec<Self::Item> {
+        std::mem::take(&mut self.items)
    }
 }
--- a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
+++ b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
@@ -1,9 +1,8 @@
-use lighthouse_network::service::api_types::DataColumnsByRootRequester;
-use lighthouse_network::{rpc::methods::DataColumnsByRootRequest, PeerId};
+use lighthouse_network::rpc::methods::DataColumnsByRootRequest;
 use std::sync::Arc;
 use types::{ChainSpec, DataColumnIdentifier, DataColumnSidecar, EthSpec, Hash256};

-use super::LookupVerifyError;
+use super::{ActiveRequestItems, LookupVerifyError};

 #[derive(Debug, Clone)]
 pub struct DataColumnsByRootSingleBlockRequest {
@@ -26,40 +25,27 @@ impl DataColumnsByRootSingleBlockRequest {
    }
 }

-pub struct ActiveDataColumnsByRootRequest<E: EthSpec> {
+pub struct DataColumnsByRootRequestItems<E: EthSpec> {
    request: DataColumnsByRootSingleBlockRequest,
    items: Vec<Arc<DataColumnSidecar<E>>>,
-    resolved: bool,
-    pub(crate) peer_id: PeerId,
-    pub(crate) requester: DataColumnsByRootRequester,
 }

-impl<E: EthSpec> ActiveDataColumnsByRootRequest<E> {
-    pub fn new(
-        request: DataColumnsByRootSingleBlockRequest,
-        peer_id: PeerId,
-        requester: DataColumnsByRootRequester,
-    ) -> Self {
+impl<E: EthSpec> DataColumnsByRootRequestItems<E> {
+    pub fn new(request: DataColumnsByRootSingleBlockRequest) -> Self {
        Self {
            request,
            items: vec![],
-            resolved: false,
-            peer_id,
-            requester,
        }
    }
+}
+
+impl<E: EthSpec> ActiveRequestItems for DataColumnsByRootRequestItems<E> {
+    type Item = Arc<DataColumnSidecar<E>>;

    /// Appends a chunk to this multi-item request. If all expected chunks are received, this
    /// method returns `Some`, resolving the request before the stream terminator.
    /// The active request SHOULD be dropped after `add_response` returns an error
-    pub fn add_response(
-        &mut self,
-        data_column: Arc<DataColumnSidecar<E>>,
-    ) -> Result<Option<Vec<Arc<DataColumnSidecar<E>>>>, LookupVerifyError> {
-        if self.resolved {
-            return Err(LookupVerifyError::TooManyResponses);
-        }
-
+    fn add(&mut self, data_column: Self::Item) -> Result<bool, LookupVerifyError> {
        let block_root = data_column.block_root();
        if self.request.block_root != block_root {
            return Err(LookupVerifyError::UnrequestedBlockRoot(block_root));
@@ -75,29 +61,11 @@ impl<E: EthSpec> ActiveDataColumnsByRootRequest<E> {
        }

        self.items.push(data_column);
-        if self.items.len() >= self.request.indices.len() {
-            // All expected chunks received, return result early
-            self.resolved = true;
-            Ok(Some(std::mem::take(&mut self.items)))
-        } else {
-            Ok(None)
-        }
+
+        Ok(self.items.len() >= self.request.indices.len())
    }

-    pub fn terminate(self) -> Result<(), LookupVerifyError> {
-        if self.resolved {
-            Ok(())
-        } else {
-            Err(LookupVerifyError::NotEnoughResponsesReturned {
-                expected: self.request.indices.len(),
-                actual: self.items.len(),
-            })
-        }
-    }
-
-    /// Mark request as resolved (= has returned something downstream) while marking this status as
-    /// true for future calls.
-    pub fn resolve(&mut self) -> bool {
-        std::mem::replace(&mut self.resolved, true)
+    fn consume(&mut self) -> Vec<Self::Item> {
+        std::mem::take(&mut self.items)
    }
 }