Add range sync tests (#8989)

Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com>
This commit is contained in:
Lion - dapplion
2026-03-31 00:07:22 -05:00
committed by GitHub
parent 2fb69f84c0
commit bc5d8c9f90
5 changed files with 781 additions and 552 deletions

View File

@@ -1,16 +1,18 @@
use super::*;
use crate::NetworkMessage;
use crate::network_beacon_processor::{InvalidBlockStorage, NetworkBeaconProcessor};
use crate::network_beacon_processor::{
ChainSegmentProcessId, InvalidBlockStorage, NetworkBeaconProcessor,
};
use crate::sync::block_lookups::{BlockLookupSummary, PARENT_DEPTH_TOLERANCE};
use crate::sync::{
SyncMessage,
manager::{BlockProcessType, BlockProcessingResult, SyncManager},
manager::{BatchProcessResult, BlockProcessType, BlockProcessingResult, SyncManager},
};
use beacon_chain::blob_verification::KzgVerifiedBlob;
use beacon_chain::block_verification_types::LookupBlock;
use beacon_chain::custody_context::NodeCustodyType;
use beacon_chain::{
AvailabilityProcessingStatus, BlockError, NotifyExecutionLayer,
AvailabilityProcessingStatus, BlockError, EngineState, NotifyExecutionLayer,
block_verification_types::{AsBlock, AvailableBlockData},
data_availability_checker::Availability,
test_utils::{
@@ -23,7 +25,7 @@ use educe::Educe;
use itertools::Itertools;
use lighthouse_network::discovery::CombinedKey;
use lighthouse_network::{
NetworkConfig, NetworkGlobals, PeerId,
NetworkConfig, NetworkGlobals, PeerAction, PeerId,
rpc::{RPCError, RequestType},
service::api_types::{AppRequestId, SyncRequestId},
types::SyncState,
@@ -64,14 +66,33 @@ pub struct SimulateConfig {
Option<Box<dyn Fn(Hash256) -> Option<BlockProcessingResult> + Send + Sync>>,
// Import a block directly before processing it (for simulating race conditions)
import_block_before_process: HashSet<Hash256>,
/// Number of range batch processing attempts that return FaultyFailure
range_faulty_failures: usize,
/// Number of range batch processing attempts that return NonFaultyFailure
range_non_faulty_failures: usize,
/// Number of BlocksByRange requests that return empty (no blocks)
return_no_range_blocks_n_times: usize,
/// Number of DataColumnsByRange requests that return empty (no columns)
return_no_range_columns_n_times: usize,
/// Number of DataColumnsByRange requests that return columns with unrequested indices
return_wrong_range_column_indices_n_times: usize,
/// Number of DataColumnsByRange requests that return columns with unrequested slots
return_wrong_range_column_slots_n_times: usize,
/// Number of DataColumnsByRange requests that return fewer columns than requested
/// (drops half the columns). Triggers CouplingError::DataColumnPeerFailure → retry_partial_batch
return_partial_range_columns_n_times: usize,
/// Set EE offline at start, bring back online after this many BlocksByRange responses
ee_offline_for_n_range_responses: Option<usize>,
/// Disconnect all peers after this many successful BlocksByRange responses.
successful_range_responses_before_disconnect: Option<usize>,
}
impl SimulateConfig {
fn new() -> Self {
pub(super) fn new() -> Self {
Self::default()
}
fn happy_path() -> Self {
pub(super) fn happy_path() -> Self {
Self::default()
}
@@ -111,7 +132,7 @@ impl SimulateConfig {
self
}
fn return_rpc_error(mut self, error: RPCError) -> Self {
pub(super) fn return_rpc_error(mut self, error: RPCError) -> Self {
self.return_rpc_error = Some(error);
self
}
@@ -133,6 +154,51 @@ impl SimulateConfig {
self.import_block_before_process.insert(block_root);
self
}
pub(super) fn with_range_faulty_failures(mut self, n: usize) -> Self {
self.range_faulty_failures = n;
self
}
pub(super) fn with_range_non_faulty_failures(mut self, n: usize) -> Self {
self.range_non_faulty_failures = n;
self
}
pub(super) fn with_no_range_blocks_n_times(mut self, n: usize) -> Self {
self.return_no_range_blocks_n_times = n;
self
}
pub(super) fn with_no_range_columns_n_times(mut self, n: usize) -> Self {
self.return_no_range_columns_n_times = n;
self
}
pub(super) fn with_wrong_range_column_indices_n_times(mut self, n: usize) -> Self {
self.return_wrong_range_column_indices_n_times = n;
self
}
pub(super) fn with_wrong_range_column_slots_n_times(mut self, n: usize) -> Self {
self.return_wrong_range_column_slots_n_times = n;
self
}
pub(super) fn with_partial_range_columns_n_times(mut self, n: usize) -> Self {
self.return_partial_range_columns_n_times = n;
self
}
pub(super) fn with_ee_offline_for_n_range_responses(mut self, n: usize) -> Self {
self.ee_offline_for_n_range_responses = Some(n);
self
}
pub(super) fn with_disconnect_after_range_requests(mut self, n: usize) -> Self {
self.successful_range_responses_before_disconnect = Some(n);
self
}
}
fn genesis_fork() -> ForkName {
@@ -256,6 +322,7 @@ impl TestRig {
})
}
#[allow(dead_code)]
pub fn with_custody_type(node_custody_type: NodeCustodyType) -> Self {
Self::new(TestRigConfig {
fulu_test_type: FuluTestType::WeFullnodeThemSupernode,
@@ -267,13 +334,23 @@ impl TestRig {
///
/// Processes events from sync_rx (sink), beacon processor, and network queues in fixed
/// priority order each tick. Handles completed work before pulling new requests.
async fn simulate(&mut self, complete_strategy: SimulateConfig) {
pub(super) async fn simulate(&mut self, complete_strategy: SimulateConfig) {
self.complete_strategy = complete_strategy;
self.log(&format!(
"Running simulate with config {:?}",
self.complete_strategy
));
// Set EE offline at the start if configured
if self
.complete_strategy
.ee_offline_for_n_range_responses
.is_some()
{
self.sync_manager
.update_execution_engine_state(EngineState::Offline);
}
let mut i = 0;
loop {
@@ -352,9 +429,34 @@ impl TestRig {
process_fn.await
}
}
Work::RpcBlobs { process_fn }
| Work::RpcCustodyColumn(process_fn)
| Work::ChainSegment(process_fn) => process_fn.await,
Work::RpcBlobs { process_fn } | Work::RpcCustodyColumn(process_fn) => {
process_fn.await
}
Work::ChainSegment {
process_fn,
process_id: (chain_id, batch_epoch),
} => {
let sync_type =
ChainSegmentProcessId::RangeBatchId(chain_id, batch_epoch.into());
if self.complete_strategy.range_faulty_failures > 0 {
self.complete_strategy.range_faulty_failures -= 1;
self.push_sync_message(SyncMessage::BatchProcessed {
sync_type,
result: BatchProcessResult::FaultyFailure {
imported_blocks: 0,
penalty: PeerAction::LowToleranceError,
},
});
} else if self.complete_strategy.range_non_faulty_failures > 0 {
self.complete_strategy.range_non_faulty_failures -= 1;
self.push_sync_message(SyncMessage::BatchProcessed {
sync_type,
result: BatchProcessResult::NonFaultyFailure,
});
} else {
process_fn.await;
}
}
Work::Reprocess(_) => {} // ignore
other => panic!("Unsupported Work event {}", other.str_id()),
}
@@ -573,15 +675,50 @@ impl TestRig {
if self.complete_strategy.skip_by_range_routes {
return;
}
let blocks = (*req.start_slot()..req.start_slot() + req.count())
.filter_map(|slot| {
self.network_blocks_by_slot
.get(&Slot::new(slot))
.map(|block| block.block_cloned())
})
.collect::<Vec<_>>();
self.send_rpc_blocks_response(req_id, peer_id, &blocks);
// Check if we should disconnect all peers instead of continuing
if let Some(ref mut remaining) = self
.complete_strategy
.successful_range_responses_before_disconnect
{
if *remaining == 0 {
// Disconnect all peers — remaining responses become "late"
for peer in self.get_connected_peers() {
self.peer_disconnected(peer);
}
return;
} else {
*remaining -= 1;
}
}
// Return empty response N times to simulate peer returning no blocks
if self.complete_strategy.return_no_range_blocks_n_times > 0 {
self.complete_strategy.return_no_range_blocks_n_times -= 1;
self.send_rpc_blocks_response(req_id, peer_id, &[]);
} else {
let blocks = (*req.start_slot()..req.start_slot() + req.count())
.filter_map(|slot| {
self.network_blocks_by_slot
.get(&Slot::new(slot))
.map(|block| block.block_cloned())
})
.collect::<Vec<_>>();
self.send_rpc_blocks_response(req_id, peer_id, &blocks);
}
// Bring EE back online after N range responses
if let Some(ref mut remaining) =
self.complete_strategy.ee_offline_for_n_range_responses
{
if *remaining == 0 {
self.sync_manager
.update_execution_engine_state(EngineState::Online);
self.complete_strategy.ee_offline_for_n_range_responses = None;
} else {
*remaining -= 1;
}
}
}
(RequestType::BlobsByRange(req), AppRequestId::Sync(req_id)) => {
@@ -605,10 +742,80 @@ impl TestRig {
if self.complete_strategy.skip_by_range_routes {
return;
}
// Note: This function is permissive, blocks may have zero columns and it won't
// error. Some caveats:
// - The genesis block never has columns
// - Some blocks may not have columns as the blob count is random
// Return empty columns N times
if self.complete_strategy.return_no_range_columns_n_times > 0 {
self.complete_strategy.return_no_range_columns_n_times -= 1;
self.send_rpc_columns_response(req_id, peer_id, &[]);
return;
}
// Return columns with unrequested indices N times.
// Note: for supernodes this returns no columns since they custody all indices.
if self
.complete_strategy
.return_wrong_range_column_indices_n_times
> 0
{
self.complete_strategy
.return_wrong_range_column_indices_n_times -= 1;
let wrong_columns = (req.start_slot..req.start_slot + req.count)
.filter_map(|slot| self.network_blocks_by_slot.get(&Slot::new(slot)))
.filter_map(|block| block.block_data().data_columns())
.flat_map(|columns| {
columns
.into_iter()
.filter(|c| !req.columns.contains(c.index()))
})
.collect::<Vec<_>>();
self.send_rpc_columns_response(req_id, peer_id, &wrong_columns);
return;
}
// Return columns from an out-of-range slot N times
if self
.complete_strategy
.return_wrong_range_column_slots_n_times
> 0
{
self.complete_strategy
.return_wrong_range_column_slots_n_times -= 1;
// Get a column from a slot AFTER the requested range
let wrong_slot = req.start_slot + req.count;
let wrong_columns = self
.network_blocks_by_slot
.get(&Slot::new(wrong_slot))
.and_then(|block| block.block_data().data_columns())
.into_iter()
.flat_map(|columns| {
columns
.into_iter()
.filter(|c| req.columns.contains(c.index()))
})
.collect::<Vec<_>>();
self.send_rpc_columns_response(req_id, peer_id, &wrong_columns);
return;
}
// Return only half the requested columns N times — triggers CouplingError
if self.complete_strategy.return_partial_range_columns_n_times > 0 {
self.complete_strategy.return_partial_range_columns_n_times -= 1;
let columns = (req.start_slot..req.start_slot + req.count)
.filter_map(|slot| self.network_blocks_by_slot.get(&Slot::new(slot)))
.filter_map(|block| block.block_data().data_columns())
.flat_map(|columns| {
columns
.into_iter()
.filter(|c| req.columns.contains(c.index()))
})
.enumerate()
.filter(|(i, _)| i % 2 == 0) // keep every other column
.map(|(_, c)| c)
.collect::<Vec<_>>();
self.send_rpc_columns_response(req_id, peer_id, &columns);
return;
}
let columns = (req.start_slot..req.start_slot + req.count)
.filter_map(|slot| self.network_blocks_by_slot.get(&Slot::new(slot)))
.filter_map(|block| block.block_data().data_columns())
@@ -726,7 +933,7 @@ impl TestRig {
// Preparation steps
/// Returns the block root of the tip of the built chain
async fn build_chain(&mut self, block_count: usize) -> Hash256 {
pub(super) async fn build_chain(&mut self, block_count: usize) -> Hash256 {
let mut blocks = vec![];
// Initialise a new beacon chain
@@ -947,6 +1154,30 @@ impl TestRig {
self.trigger_with_last_block();
}
/// Import blocks for slots 1..=up_to_slot into the local chain (advance local head)
pub(super) async fn import_blocks_up_to_slot(&mut self, up_to_slot: u64) {
for slot in 1..=up_to_slot {
let rpc_block = self
.network_blocks_by_slot
.get(&Slot::new(slot))
.unwrap_or_else(|| panic!("No block at slot {slot}"))
.clone();
let block_root = rpc_block.canonical_root();
self.harness
.chain
.process_block(
block_root,
rpc_block,
NotifyExecutionLayer::Yes,
BlockImportSource::Gossip,
|| Ok(()),
)
.await
.unwrap();
}
self.harness.chain.recompute_head_at_current_slot().await;
}
/// Import a block directly into the chain without going through lookup sync
async fn import_block_by_root(&mut self, block_root: Hash256) {
let range_sync_block = self
@@ -1000,23 +1231,32 @@ impl TestRig {
// Post-test assertions
fn head_slot(&self) -> Slot {
pub(super) fn head_slot(&self) -> Slot {
self.harness.chain.head().head_slot()
}
fn assert_head_slot(&self, slot: u64) {
pub(super) fn assert_head_slot(&self, slot: u64) {
assert_eq!(self.head_slot(), Slot::new(slot), "Unexpected head slot");
}
fn max_known_slot(&self) -> Slot {
pub(super) fn max_known_slot(&self) -> Slot {
self.network_blocks_by_slot
.keys()
.max()
.copied()
.expect("no blocks")
.unwrap_or_default()
}
fn assert_penalties(&self, expected_penalties: &[&'static str]) {
pub(super) fn finalized_epoch(&self) -> types::Epoch {
self.harness
.chain
.canonical_head
.cached_head()
.finalized_checkpoint()
.epoch
}
pub(super) fn assert_penalties(&self, expected_penalties: &[&'static str]) {
let penalties = self
.penalties
.iter()
@@ -1034,7 +1274,7 @@ impl TestRig {
}
}
fn assert_penalties_of_type(&self, expected_penalty: &'static str) {
pub(super) fn assert_penalties_of_type(&self, expected_penalty: &'static str) {
if self.penalties.is_empty() {
panic!("No penalties but expected some of type {expected_penalty}");
}
@@ -1051,7 +1291,7 @@ impl TestRig {
}
}
fn assert_no_penalties(&mut self) {
pub(super) fn assert_no_penalties(&mut self) {
if !self.penalties.is_empty() {
panic!("Some downscore events: {:?}", self.penalties);
}
@@ -1102,7 +1342,7 @@ impl TestRig {
}
/// Assert there is at least one range sync chain created and that all sync chains completed
fn assert_successful_range_sync(&self) {
pub(super) fn assert_successful_range_sync(&self) {
assert!(
self.range_sync_chains_added() > 0,
"No created range sync chains"
@@ -1425,6 +1665,7 @@ impl TestRig {
}
}
#[allow(dead_code)]
pub fn pop_received_processor_event<T, F: Fn(&WorkEvent<E>) -> Option<T>>(
&mut self,
predicate_transform: F,