Add range sync tests (#8989)

Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com>
2026-04-20 14:28:37 +00:00 · 2026-03-31 00:07:22 -05:00
parent 2fb69f84c0
commit bc5d8c9f90
5 changed files with 781 additions and 552 deletions
--- a/beacon_node/network/src/sync/tests/lookups.rs
+++ b/beacon_node/network/src/sync/tests/lookups.rs
@@ -1,16 +1,18 @@
 use super::*;
 use crate::NetworkMessage;
-use crate::network_beacon_processor::{InvalidBlockStorage, NetworkBeaconProcessor};
+use crate::network_beacon_processor::{
+    ChainSegmentProcessId, InvalidBlockStorage, NetworkBeaconProcessor,
+};
 use crate::sync::block_lookups::{BlockLookupSummary, PARENT_DEPTH_TOLERANCE};
 use crate::sync::{
    SyncMessage,
-    manager::{BlockProcessType, BlockProcessingResult, SyncManager},
+    manager::{BatchProcessResult, BlockProcessType, BlockProcessingResult, SyncManager},
 };
 use beacon_chain::blob_verification::KzgVerifiedBlob;
 use beacon_chain::block_verification_types::LookupBlock;
 use beacon_chain::custody_context::NodeCustodyType;
 use beacon_chain::{
-    AvailabilityProcessingStatus, BlockError, NotifyExecutionLayer,
+    AvailabilityProcessingStatus, BlockError, EngineState, NotifyExecutionLayer,
    block_verification_types::{AsBlock, AvailableBlockData},
    data_availability_checker::Availability,
    test_utils::{
@@ -23,7 +25,7 @@ use educe::Educe;
 use itertools::Itertools;
 use lighthouse_network::discovery::CombinedKey;
 use lighthouse_network::{
-    NetworkConfig, NetworkGlobals, PeerId,
+    NetworkConfig, NetworkGlobals, PeerAction, PeerId,
    rpc::{RPCError, RequestType},
    service::api_types::{AppRequestId, SyncRequestId},
    types::SyncState,
@@ -64,14 +66,33 @@ pub struct SimulateConfig {
        Option<Box<dyn Fn(Hash256) -> Option<BlockProcessingResult> + Send + Sync>>,
    // Import a block directly before processing it (for simulating race conditions)
    import_block_before_process: HashSet<Hash256>,
+    /// Number of range batch processing attempts that return FaultyFailure
+    range_faulty_failures: usize,
+    /// Number of range batch processing attempts that return NonFaultyFailure
+    range_non_faulty_failures: usize,
+    /// Number of BlocksByRange requests that return empty (no blocks)
+    return_no_range_blocks_n_times: usize,
+    /// Number of DataColumnsByRange requests that return empty (no columns)
+    return_no_range_columns_n_times: usize,
+    /// Number of DataColumnsByRange requests that return columns with unrequested indices
+    return_wrong_range_column_indices_n_times: usize,
+    /// Number of DataColumnsByRange requests that return columns with unrequested slots
+    return_wrong_range_column_slots_n_times: usize,
+    /// Number of DataColumnsByRange requests that return fewer columns than requested
+    /// (drops half the columns). Triggers CouplingError::DataColumnPeerFailure → retry_partial_batch
+    return_partial_range_columns_n_times: usize,
+    /// Set EE offline at start, bring back online after this many BlocksByRange responses
+    ee_offline_for_n_range_responses: Option<usize>,
+    /// Disconnect all peers after this many successful BlocksByRange responses.
+    successful_range_responses_before_disconnect: Option<usize>,
 }

 impl SimulateConfig {
-    fn new() -> Self {
+    pub(super) fn new() -> Self {
        Self::default()
    }

-    fn happy_path() -> Self {
+    pub(super) fn happy_path() -> Self {
        Self::default()
    }

@@ -111,7 +132,7 @@ impl SimulateConfig {
        self
    }

-    fn return_rpc_error(mut self, error: RPCError) -> Self {
+    pub(super) fn return_rpc_error(mut self, error: RPCError) -> Self {
        self.return_rpc_error = Some(error);
        self
    }
@@ -133,6 +154,51 @@ impl SimulateConfig {
        self.import_block_before_process.insert(block_root);
        self
    }
+
+    pub(super) fn with_range_faulty_failures(mut self, n: usize) -> Self {
+        self.range_faulty_failures = n;
+        self
+    }
+
+    pub(super) fn with_range_non_faulty_failures(mut self, n: usize) -> Self {
+        self.range_non_faulty_failures = n;
+        self
+    }
+
+    pub(super) fn with_no_range_blocks_n_times(mut self, n: usize) -> Self {
+        self.return_no_range_blocks_n_times = n;
+        self
+    }
+
+    pub(super) fn with_no_range_columns_n_times(mut self, n: usize) -> Self {
+        self.return_no_range_columns_n_times = n;
+        self
+    }
+
+    pub(super) fn with_wrong_range_column_indices_n_times(mut self, n: usize) -> Self {
+        self.return_wrong_range_column_indices_n_times = n;
+        self
+    }
+
+    pub(super) fn with_wrong_range_column_slots_n_times(mut self, n: usize) -> Self {
+        self.return_wrong_range_column_slots_n_times = n;
+        self
+    }
+
+    pub(super) fn with_partial_range_columns_n_times(mut self, n: usize) -> Self {
+        self.return_partial_range_columns_n_times = n;
+        self
+    }
+
+    pub(super) fn with_ee_offline_for_n_range_responses(mut self, n: usize) -> Self {
+        self.ee_offline_for_n_range_responses = Some(n);
+        self
+    }
+
+    pub(super) fn with_disconnect_after_range_requests(mut self, n: usize) -> Self {
+        self.successful_range_responses_before_disconnect = Some(n);
+        self
+    }
 }

 fn genesis_fork() -> ForkName {
@@ -256,6 +322,7 @@ impl TestRig {
        })
    }

+    #[allow(dead_code)]
    pub fn with_custody_type(node_custody_type: NodeCustodyType) -> Self {
        Self::new(TestRigConfig {
            fulu_test_type: FuluTestType::WeFullnodeThemSupernode,
@@ -267,13 +334,23 @@ impl TestRig {
    ///
    /// Processes events from sync_rx (sink), beacon processor, and network queues in fixed
    /// priority order each tick. Handles completed work before pulling new requests.
-    async fn simulate(&mut self, complete_strategy: SimulateConfig) {
+    pub(super) async fn simulate(&mut self, complete_strategy: SimulateConfig) {
        self.complete_strategy = complete_strategy;
        self.log(&format!(
            "Running simulate with config {:?}",
            self.complete_strategy
        ));

+        // Set EE offline at the start if configured
+        if self
+            .complete_strategy
+            .ee_offline_for_n_range_responses
+            .is_some()
+        {
+            self.sync_manager
+                .update_execution_engine_state(EngineState::Offline);
+        }
+
        let mut i = 0;

        loop {
@@ -352,9 +429,34 @@ impl TestRig {
                            process_fn.await
                        }
                    }
-                    Work::RpcBlobs { process_fn }
-                    | Work::RpcCustodyColumn(process_fn)
-                    | Work::ChainSegment(process_fn) => process_fn.await,
+                    Work::RpcBlobs { process_fn } | Work::RpcCustodyColumn(process_fn) => {
+                        process_fn.await
+                    }
+                    Work::ChainSegment {
+                        process_fn,
+                        process_id: (chain_id, batch_epoch),
+                    } => {
+                        let sync_type =
+                            ChainSegmentProcessId::RangeBatchId(chain_id, batch_epoch.into());
+                        if self.complete_strategy.range_faulty_failures > 0 {
+                            self.complete_strategy.range_faulty_failures -= 1;
+                            self.push_sync_message(SyncMessage::BatchProcessed {
+                                sync_type,
+                                result: BatchProcessResult::FaultyFailure {
+                                    imported_blocks: 0,
+                                    penalty: PeerAction::LowToleranceError,
+                                },
+                            });
+                        } else if self.complete_strategy.range_non_faulty_failures > 0 {
+                            self.complete_strategy.range_non_faulty_failures -= 1;
+                            self.push_sync_message(SyncMessage::BatchProcessed {
+                                sync_type,
+                                result: BatchProcessResult::NonFaultyFailure,
+                            });
+                        } else {
+                            process_fn.await;
+                        }
+                    }
                    Work::Reprocess(_) => {} // ignore
                    other => panic!("Unsupported Work event {}", other.str_id()),
                }
@@ -573,15 +675,50 @@ impl TestRig {
                if self.complete_strategy.skip_by_range_routes {
                    return;
                }
-                let blocks = (*req.start_slot()..req.start_slot() + req.count())
-                    .filter_map(|slot| {
-                        self.network_blocks_by_slot
-                            .get(&Slot::new(slot))
-                            .map(|block| block.block_cloned())
-                    })
-                    .collect::<Vec<_>>();

-                self.send_rpc_blocks_response(req_id, peer_id, &blocks);
+                // Check if we should disconnect all peers instead of continuing
+                if let Some(ref mut remaining) = self
+                    .complete_strategy
+                    .successful_range_responses_before_disconnect
+                {
+                    if *remaining == 0 {
+                        // Disconnect all peers — remaining responses become "late"
+                        for peer in self.get_connected_peers() {
+                            self.peer_disconnected(peer);
+                        }
+                        return;
+                    } else {
+                        *remaining -= 1;
+                    }
+                }
+
+                // Return empty response N times to simulate peer returning no blocks
+                if self.complete_strategy.return_no_range_blocks_n_times > 0 {
+                    self.complete_strategy.return_no_range_blocks_n_times -= 1;
+                    self.send_rpc_blocks_response(req_id, peer_id, &[]);
+                } else {
+                    let blocks = (*req.start_slot()..req.start_slot() + req.count())
+                        .filter_map(|slot| {
+                            self.network_blocks_by_slot
+                                .get(&Slot::new(slot))
+                                .map(|block| block.block_cloned())
+                        })
+                        .collect::<Vec<_>>();
+                    self.send_rpc_blocks_response(req_id, peer_id, &blocks);
+                }
+
+                // Bring EE back online after N range responses
+                if let Some(ref mut remaining) =
+                    self.complete_strategy.ee_offline_for_n_range_responses
+                {
+                    if *remaining == 0 {
+                        self.sync_manager
+                            .update_execution_engine_state(EngineState::Online);
+                        self.complete_strategy.ee_offline_for_n_range_responses = None;
+                    } else {
+                        *remaining -= 1;
+                    }
+                }
            }

            (RequestType::BlobsByRange(req), AppRequestId::Sync(req_id)) => {
@@ -605,10 +742,80 @@ impl TestRig {
                if self.complete_strategy.skip_by_range_routes {
                    return;
                }
-                // Note: This function is permissive, blocks may have zero columns and it won't
-                // error. Some caveats:
-                // - The genesis block never has columns
-                // - Some blocks may not have columns as the blob count is random
+
+                // Return empty columns N times
+                if self.complete_strategy.return_no_range_columns_n_times > 0 {
+                    self.complete_strategy.return_no_range_columns_n_times -= 1;
+                    self.send_rpc_columns_response(req_id, peer_id, &[]);
+                    return;
+                }
+
+                // Return columns with unrequested indices N times.
+                // Note: for supernodes this returns no columns since they custody all indices.
+                if self
+                    .complete_strategy
+                    .return_wrong_range_column_indices_n_times
+                    > 0
+                {
+                    self.complete_strategy
+                        .return_wrong_range_column_indices_n_times -= 1;
+                    let wrong_columns = (req.start_slot..req.start_slot + req.count)
+                        .filter_map(|slot| self.network_blocks_by_slot.get(&Slot::new(slot)))
+                        .filter_map(|block| block.block_data().data_columns())
+                        .flat_map(|columns| {
+                            columns
+                                .into_iter()
+                                .filter(|c| !req.columns.contains(c.index()))
+                        })
+                        .collect::<Vec<_>>();
+                    self.send_rpc_columns_response(req_id, peer_id, &wrong_columns);
+                    return;
+                }
+
+                // Return columns from an out-of-range slot N times
+                if self
+                    .complete_strategy
+                    .return_wrong_range_column_slots_n_times
+                    > 0
+                {
+                    self.complete_strategy
+                        .return_wrong_range_column_slots_n_times -= 1;
+                    // Get a column from a slot AFTER the requested range
+                    let wrong_slot = req.start_slot + req.count;
+                    let wrong_columns = self
+                        .network_blocks_by_slot
+                        .get(&Slot::new(wrong_slot))
+                        .and_then(|block| block.block_data().data_columns())
+                        .into_iter()
+                        .flat_map(|columns| {
+                            columns
+                                .into_iter()
+                                .filter(|c| req.columns.contains(c.index()))
+                        })
+                        .collect::<Vec<_>>();
+                    self.send_rpc_columns_response(req_id, peer_id, &wrong_columns);
+                    return;
+                }
+
+                // Return only half the requested columns N times — triggers CouplingError
+                if self.complete_strategy.return_partial_range_columns_n_times > 0 {
+                    self.complete_strategy.return_partial_range_columns_n_times -= 1;
+                    let columns = (req.start_slot..req.start_slot + req.count)
+                        .filter_map(|slot| self.network_blocks_by_slot.get(&Slot::new(slot)))
+                        .filter_map(|block| block.block_data().data_columns())
+                        .flat_map(|columns| {
+                            columns
+                                .into_iter()
+                                .filter(|c| req.columns.contains(c.index()))
+                        })
+                        .enumerate()
+                        .filter(|(i, _)| i % 2 == 0) // keep every other column
+                        .map(|(_, c)| c)
+                        .collect::<Vec<_>>();
+                    self.send_rpc_columns_response(req_id, peer_id, &columns);
+                    return;
+                }
+
                let columns = (req.start_slot..req.start_slot + req.count)
                    .filter_map(|slot| self.network_blocks_by_slot.get(&Slot::new(slot)))
                    .filter_map(|block| block.block_data().data_columns())
@@ -726,7 +933,7 @@ impl TestRig {
    // Preparation steps

    /// Returns the block root of the tip of the built chain
-    async fn build_chain(&mut self, block_count: usize) -> Hash256 {
+    pub(super) async fn build_chain(&mut self, block_count: usize) -> Hash256 {
        let mut blocks = vec![];

        // Initialise a new beacon chain
@@ -947,6 +1154,30 @@ impl TestRig {
        self.trigger_with_last_block();
    }

+    /// Import blocks for slots 1..=up_to_slot into the local chain (advance local head)
+    pub(super) async fn import_blocks_up_to_slot(&mut self, up_to_slot: u64) {
+        for slot in 1..=up_to_slot {
+            let rpc_block = self
+                .network_blocks_by_slot
+                .get(&Slot::new(slot))
+                .unwrap_or_else(|| panic!("No block at slot {slot}"))
+                .clone();
+            let block_root = rpc_block.canonical_root();
+            self.harness
+                .chain
+                .process_block(
+                    block_root,
+                    rpc_block,
+                    NotifyExecutionLayer::Yes,
+                    BlockImportSource::Gossip,
+                    || Ok(()),
+                )
+                .await
+                .unwrap();
+        }
+        self.harness.chain.recompute_head_at_current_slot().await;
+    }
+
    /// Import a block directly into the chain without going through lookup sync
    async fn import_block_by_root(&mut self, block_root: Hash256) {
        let range_sync_block = self
@@ -1000,23 +1231,32 @@ impl TestRig {

    // Post-test assertions

-    fn head_slot(&self) -> Slot {
+    pub(super) fn head_slot(&self) -> Slot {
        self.harness.chain.head().head_slot()
    }

-    fn assert_head_slot(&self, slot: u64) {
+    pub(super) fn assert_head_slot(&self, slot: u64) {
        assert_eq!(self.head_slot(), Slot::new(slot), "Unexpected head slot");
    }

-    fn max_known_slot(&self) -> Slot {
+    pub(super) fn max_known_slot(&self) -> Slot {
        self.network_blocks_by_slot
            .keys()
            .max()
            .copied()
-            .expect("no blocks")
+            .unwrap_or_default()
    }

-    fn assert_penalties(&self, expected_penalties: &[&'static str]) {
+    pub(super) fn finalized_epoch(&self) -> types::Epoch {
+        self.harness
+            .chain
+            .canonical_head
+            .cached_head()
+            .finalized_checkpoint()
+            .epoch
+    }
+
+    pub(super) fn assert_penalties(&self, expected_penalties: &[&'static str]) {
        let penalties = self
            .penalties
            .iter()
@@ -1034,7 +1274,7 @@ impl TestRig {
        }
    }

-    fn assert_penalties_of_type(&self, expected_penalty: &'static str) {
+    pub(super) fn assert_penalties_of_type(&self, expected_penalty: &'static str) {
        if self.penalties.is_empty() {
            panic!("No penalties but expected some of type {expected_penalty}");
        }
@@ -1051,7 +1291,7 @@ impl TestRig {
        }
    }

-    fn assert_no_penalties(&mut self) {
+    pub(super) fn assert_no_penalties(&mut self) {
        if !self.penalties.is_empty() {
            panic!("Some downscore events: {:?}", self.penalties);
        }
@@ -1102,7 +1342,7 @@ impl TestRig {
    }

    /// Assert there is at least one range sync chain created and that all sync chains completed
-    fn assert_successful_range_sync(&self) {
+    pub(super) fn assert_successful_range_sync(&self) {
        assert!(
            self.range_sync_chains_added() > 0,
            "No created range sync chains"
@@ -1425,6 +1665,7 @@ impl TestRig {
        }
    }

+    #[allow(dead_code)]
    pub fn pop_received_processor_event<T, F: Fn(&WorkEvent<E>) -> Option<T>>(
        &mut self,
        predicate_transform: F,