mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-20 14:28:37 +00:00
Add range sync tests (#8989)
Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com>
This commit is contained in:
@@ -1,16 +1,18 @@
|
||||
use super::*;
|
||||
use crate::NetworkMessage;
|
||||
use crate::network_beacon_processor::{InvalidBlockStorage, NetworkBeaconProcessor};
|
||||
use crate::network_beacon_processor::{
|
||||
ChainSegmentProcessId, InvalidBlockStorage, NetworkBeaconProcessor,
|
||||
};
|
||||
use crate::sync::block_lookups::{BlockLookupSummary, PARENT_DEPTH_TOLERANCE};
|
||||
use crate::sync::{
|
||||
SyncMessage,
|
||||
manager::{BlockProcessType, BlockProcessingResult, SyncManager},
|
||||
manager::{BatchProcessResult, BlockProcessType, BlockProcessingResult, SyncManager},
|
||||
};
|
||||
use beacon_chain::blob_verification::KzgVerifiedBlob;
|
||||
use beacon_chain::block_verification_types::LookupBlock;
|
||||
use beacon_chain::custody_context::NodeCustodyType;
|
||||
use beacon_chain::{
|
||||
AvailabilityProcessingStatus, BlockError, NotifyExecutionLayer,
|
||||
AvailabilityProcessingStatus, BlockError, EngineState, NotifyExecutionLayer,
|
||||
block_verification_types::{AsBlock, AvailableBlockData},
|
||||
data_availability_checker::Availability,
|
||||
test_utils::{
|
||||
@@ -23,7 +25,7 @@ use educe::Educe;
|
||||
use itertools::Itertools;
|
||||
use lighthouse_network::discovery::CombinedKey;
|
||||
use lighthouse_network::{
|
||||
NetworkConfig, NetworkGlobals, PeerId,
|
||||
NetworkConfig, NetworkGlobals, PeerAction, PeerId,
|
||||
rpc::{RPCError, RequestType},
|
||||
service::api_types::{AppRequestId, SyncRequestId},
|
||||
types::SyncState,
|
||||
@@ -64,14 +66,33 @@ pub struct SimulateConfig {
|
||||
Option<Box<dyn Fn(Hash256) -> Option<BlockProcessingResult> + Send + Sync>>,
|
||||
// Import a block directly before processing it (for simulating race conditions)
|
||||
import_block_before_process: HashSet<Hash256>,
|
||||
/// Number of range batch processing attempts that return FaultyFailure
|
||||
range_faulty_failures: usize,
|
||||
/// Number of range batch processing attempts that return NonFaultyFailure
|
||||
range_non_faulty_failures: usize,
|
||||
/// Number of BlocksByRange requests that return empty (no blocks)
|
||||
return_no_range_blocks_n_times: usize,
|
||||
/// Number of DataColumnsByRange requests that return empty (no columns)
|
||||
return_no_range_columns_n_times: usize,
|
||||
/// Number of DataColumnsByRange requests that return columns with unrequested indices
|
||||
return_wrong_range_column_indices_n_times: usize,
|
||||
/// Number of DataColumnsByRange requests that return columns with unrequested slots
|
||||
return_wrong_range_column_slots_n_times: usize,
|
||||
/// Number of DataColumnsByRange requests that return fewer columns than requested
|
||||
/// (drops half the columns). Triggers CouplingError::DataColumnPeerFailure → retry_partial_batch
|
||||
return_partial_range_columns_n_times: usize,
|
||||
/// Set EE offline at start, bring back online after this many BlocksByRange responses
|
||||
ee_offline_for_n_range_responses: Option<usize>,
|
||||
/// Disconnect all peers after this many successful BlocksByRange responses.
|
||||
successful_range_responses_before_disconnect: Option<usize>,
|
||||
}
|
||||
|
||||
impl SimulateConfig {
|
||||
fn new() -> Self {
|
||||
pub(super) fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
fn happy_path() -> Self {
|
||||
pub(super) fn happy_path() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
@@ -111,7 +132,7 @@ impl SimulateConfig {
|
||||
self
|
||||
}
|
||||
|
||||
fn return_rpc_error(mut self, error: RPCError) -> Self {
|
||||
pub(super) fn return_rpc_error(mut self, error: RPCError) -> Self {
|
||||
self.return_rpc_error = Some(error);
|
||||
self
|
||||
}
|
||||
@@ -133,6 +154,51 @@ impl SimulateConfig {
|
||||
self.import_block_before_process.insert(block_root);
|
||||
self
|
||||
}
|
||||
|
||||
pub(super) fn with_range_faulty_failures(mut self, n: usize) -> Self {
|
||||
self.range_faulty_failures = n;
|
||||
self
|
||||
}
|
||||
|
||||
pub(super) fn with_range_non_faulty_failures(mut self, n: usize) -> Self {
|
||||
self.range_non_faulty_failures = n;
|
||||
self
|
||||
}
|
||||
|
||||
pub(super) fn with_no_range_blocks_n_times(mut self, n: usize) -> Self {
|
||||
self.return_no_range_blocks_n_times = n;
|
||||
self
|
||||
}
|
||||
|
||||
pub(super) fn with_no_range_columns_n_times(mut self, n: usize) -> Self {
|
||||
self.return_no_range_columns_n_times = n;
|
||||
self
|
||||
}
|
||||
|
||||
pub(super) fn with_wrong_range_column_indices_n_times(mut self, n: usize) -> Self {
|
||||
self.return_wrong_range_column_indices_n_times = n;
|
||||
self
|
||||
}
|
||||
|
||||
pub(super) fn with_wrong_range_column_slots_n_times(mut self, n: usize) -> Self {
|
||||
self.return_wrong_range_column_slots_n_times = n;
|
||||
self
|
||||
}
|
||||
|
||||
pub(super) fn with_partial_range_columns_n_times(mut self, n: usize) -> Self {
|
||||
self.return_partial_range_columns_n_times = n;
|
||||
self
|
||||
}
|
||||
|
||||
pub(super) fn with_ee_offline_for_n_range_responses(mut self, n: usize) -> Self {
|
||||
self.ee_offline_for_n_range_responses = Some(n);
|
||||
self
|
||||
}
|
||||
|
||||
pub(super) fn with_disconnect_after_range_requests(mut self, n: usize) -> Self {
|
||||
self.successful_range_responses_before_disconnect = Some(n);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
fn genesis_fork() -> ForkName {
|
||||
@@ -256,6 +322,7 @@ impl TestRig {
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn with_custody_type(node_custody_type: NodeCustodyType) -> Self {
|
||||
Self::new(TestRigConfig {
|
||||
fulu_test_type: FuluTestType::WeFullnodeThemSupernode,
|
||||
@@ -267,13 +334,23 @@ impl TestRig {
|
||||
///
|
||||
/// Processes events from sync_rx (sink), beacon processor, and network queues in fixed
|
||||
/// priority order each tick. Handles completed work before pulling new requests.
|
||||
async fn simulate(&mut self, complete_strategy: SimulateConfig) {
|
||||
pub(super) async fn simulate(&mut self, complete_strategy: SimulateConfig) {
|
||||
self.complete_strategy = complete_strategy;
|
||||
self.log(&format!(
|
||||
"Running simulate with config {:?}",
|
||||
self.complete_strategy
|
||||
));
|
||||
|
||||
// Set EE offline at the start if configured
|
||||
if self
|
||||
.complete_strategy
|
||||
.ee_offline_for_n_range_responses
|
||||
.is_some()
|
||||
{
|
||||
self.sync_manager
|
||||
.update_execution_engine_state(EngineState::Offline);
|
||||
}
|
||||
|
||||
let mut i = 0;
|
||||
|
||||
loop {
|
||||
@@ -352,9 +429,34 @@ impl TestRig {
|
||||
process_fn.await
|
||||
}
|
||||
}
|
||||
Work::RpcBlobs { process_fn }
|
||||
| Work::RpcCustodyColumn(process_fn)
|
||||
| Work::ChainSegment(process_fn) => process_fn.await,
|
||||
Work::RpcBlobs { process_fn } | Work::RpcCustodyColumn(process_fn) => {
|
||||
process_fn.await
|
||||
}
|
||||
Work::ChainSegment {
|
||||
process_fn,
|
||||
process_id: (chain_id, batch_epoch),
|
||||
} => {
|
||||
let sync_type =
|
||||
ChainSegmentProcessId::RangeBatchId(chain_id, batch_epoch.into());
|
||||
if self.complete_strategy.range_faulty_failures > 0 {
|
||||
self.complete_strategy.range_faulty_failures -= 1;
|
||||
self.push_sync_message(SyncMessage::BatchProcessed {
|
||||
sync_type,
|
||||
result: BatchProcessResult::FaultyFailure {
|
||||
imported_blocks: 0,
|
||||
penalty: PeerAction::LowToleranceError,
|
||||
},
|
||||
});
|
||||
} else if self.complete_strategy.range_non_faulty_failures > 0 {
|
||||
self.complete_strategy.range_non_faulty_failures -= 1;
|
||||
self.push_sync_message(SyncMessage::BatchProcessed {
|
||||
sync_type,
|
||||
result: BatchProcessResult::NonFaultyFailure,
|
||||
});
|
||||
} else {
|
||||
process_fn.await;
|
||||
}
|
||||
}
|
||||
Work::Reprocess(_) => {} // ignore
|
||||
other => panic!("Unsupported Work event {}", other.str_id()),
|
||||
}
|
||||
@@ -573,15 +675,50 @@ impl TestRig {
|
||||
if self.complete_strategy.skip_by_range_routes {
|
||||
return;
|
||||
}
|
||||
let blocks = (*req.start_slot()..req.start_slot() + req.count())
|
||||
.filter_map(|slot| {
|
||||
self.network_blocks_by_slot
|
||||
.get(&Slot::new(slot))
|
||||
.map(|block| block.block_cloned())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
self.send_rpc_blocks_response(req_id, peer_id, &blocks);
|
||||
// Check if we should disconnect all peers instead of continuing
|
||||
if let Some(ref mut remaining) = self
|
||||
.complete_strategy
|
||||
.successful_range_responses_before_disconnect
|
||||
{
|
||||
if *remaining == 0 {
|
||||
// Disconnect all peers — remaining responses become "late"
|
||||
for peer in self.get_connected_peers() {
|
||||
self.peer_disconnected(peer);
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
*remaining -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Return empty response N times to simulate peer returning no blocks
|
||||
if self.complete_strategy.return_no_range_blocks_n_times > 0 {
|
||||
self.complete_strategy.return_no_range_blocks_n_times -= 1;
|
||||
self.send_rpc_blocks_response(req_id, peer_id, &[]);
|
||||
} else {
|
||||
let blocks = (*req.start_slot()..req.start_slot() + req.count())
|
||||
.filter_map(|slot| {
|
||||
self.network_blocks_by_slot
|
||||
.get(&Slot::new(slot))
|
||||
.map(|block| block.block_cloned())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
self.send_rpc_blocks_response(req_id, peer_id, &blocks);
|
||||
}
|
||||
|
||||
// Bring EE back online after N range responses
|
||||
if let Some(ref mut remaining) =
|
||||
self.complete_strategy.ee_offline_for_n_range_responses
|
||||
{
|
||||
if *remaining == 0 {
|
||||
self.sync_manager
|
||||
.update_execution_engine_state(EngineState::Online);
|
||||
self.complete_strategy.ee_offline_for_n_range_responses = None;
|
||||
} else {
|
||||
*remaining -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(RequestType::BlobsByRange(req), AppRequestId::Sync(req_id)) => {
|
||||
@@ -605,10 +742,80 @@ impl TestRig {
|
||||
if self.complete_strategy.skip_by_range_routes {
|
||||
return;
|
||||
}
|
||||
// Note: This function is permissive, blocks may have zero columns and it won't
|
||||
// error. Some caveats:
|
||||
// - The genesis block never has columns
|
||||
// - Some blocks may not have columns as the blob count is random
|
||||
|
||||
// Return empty columns N times
|
||||
if self.complete_strategy.return_no_range_columns_n_times > 0 {
|
||||
self.complete_strategy.return_no_range_columns_n_times -= 1;
|
||||
self.send_rpc_columns_response(req_id, peer_id, &[]);
|
||||
return;
|
||||
}
|
||||
|
||||
// Return columns with unrequested indices N times.
|
||||
// Note: for supernodes this returns no columns since they custody all indices.
|
||||
if self
|
||||
.complete_strategy
|
||||
.return_wrong_range_column_indices_n_times
|
||||
> 0
|
||||
{
|
||||
self.complete_strategy
|
||||
.return_wrong_range_column_indices_n_times -= 1;
|
||||
let wrong_columns = (req.start_slot..req.start_slot + req.count)
|
||||
.filter_map(|slot| self.network_blocks_by_slot.get(&Slot::new(slot)))
|
||||
.filter_map(|block| block.block_data().data_columns())
|
||||
.flat_map(|columns| {
|
||||
columns
|
||||
.into_iter()
|
||||
.filter(|c| !req.columns.contains(c.index()))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
self.send_rpc_columns_response(req_id, peer_id, &wrong_columns);
|
||||
return;
|
||||
}
|
||||
|
||||
// Return columns from an out-of-range slot N times
|
||||
if self
|
||||
.complete_strategy
|
||||
.return_wrong_range_column_slots_n_times
|
||||
> 0
|
||||
{
|
||||
self.complete_strategy
|
||||
.return_wrong_range_column_slots_n_times -= 1;
|
||||
// Get a column from a slot AFTER the requested range
|
||||
let wrong_slot = req.start_slot + req.count;
|
||||
let wrong_columns = self
|
||||
.network_blocks_by_slot
|
||||
.get(&Slot::new(wrong_slot))
|
||||
.and_then(|block| block.block_data().data_columns())
|
||||
.into_iter()
|
||||
.flat_map(|columns| {
|
||||
columns
|
||||
.into_iter()
|
||||
.filter(|c| req.columns.contains(c.index()))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
self.send_rpc_columns_response(req_id, peer_id, &wrong_columns);
|
||||
return;
|
||||
}
|
||||
|
||||
// Return only half the requested columns N times — triggers CouplingError
|
||||
if self.complete_strategy.return_partial_range_columns_n_times > 0 {
|
||||
self.complete_strategy.return_partial_range_columns_n_times -= 1;
|
||||
let columns = (req.start_slot..req.start_slot + req.count)
|
||||
.filter_map(|slot| self.network_blocks_by_slot.get(&Slot::new(slot)))
|
||||
.filter_map(|block| block.block_data().data_columns())
|
||||
.flat_map(|columns| {
|
||||
columns
|
||||
.into_iter()
|
||||
.filter(|c| req.columns.contains(c.index()))
|
||||
})
|
||||
.enumerate()
|
||||
.filter(|(i, _)| i % 2 == 0) // keep every other column
|
||||
.map(|(_, c)| c)
|
||||
.collect::<Vec<_>>();
|
||||
self.send_rpc_columns_response(req_id, peer_id, &columns);
|
||||
return;
|
||||
}
|
||||
|
||||
let columns = (req.start_slot..req.start_slot + req.count)
|
||||
.filter_map(|slot| self.network_blocks_by_slot.get(&Slot::new(slot)))
|
||||
.filter_map(|block| block.block_data().data_columns())
|
||||
@@ -726,7 +933,7 @@ impl TestRig {
|
||||
// Preparation steps
|
||||
|
||||
/// Returns the block root of the tip of the built chain
|
||||
async fn build_chain(&mut self, block_count: usize) -> Hash256 {
|
||||
pub(super) async fn build_chain(&mut self, block_count: usize) -> Hash256 {
|
||||
let mut blocks = vec![];
|
||||
|
||||
// Initialise a new beacon chain
|
||||
@@ -947,6 +1154,30 @@ impl TestRig {
|
||||
self.trigger_with_last_block();
|
||||
}
|
||||
|
||||
/// Import blocks for slots 1..=up_to_slot into the local chain (advance local head)
|
||||
pub(super) async fn import_blocks_up_to_slot(&mut self, up_to_slot: u64) {
|
||||
for slot in 1..=up_to_slot {
|
||||
let rpc_block = self
|
||||
.network_blocks_by_slot
|
||||
.get(&Slot::new(slot))
|
||||
.unwrap_or_else(|| panic!("No block at slot {slot}"))
|
||||
.clone();
|
||||
let block_root = rpc_block.canonical_root();
|
||||
self.harness
|
||||
.chain
|
||||
.process_block(
|
||||
block_root,
|
||||
rpc_block,
|
||||
NotifyExecutionLayer::Yes,
|
||||
BlockImportSource::Gossip,
|
||||
|| Ok(()),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
self.harness.chain.recompute_head_at_current_slot().await;
|
||||
}
|
||||
|
||||
/// Import a block directly into the chain without going through lookup sync
|
||||
async fn import_block_by_root(&mut self, block_root: Hash256) {
|
||||
let range_sync_block = self
|
||||
@@ -1000,23 +1231,32 @@ impl TestRig {
|
||||
|
||||
// Post-test assertions
|
||||
|
||||
fn head_slot(&self) -> Slot {
|
||||
pub(super) fn head_slot(&self) -> Slot {
|
||||
self.harness.chain.head().head_slot()
|
||||
}
|
||||
|
||||
fn assert_head_slot(&self, slot: u64) {
|
||||
pub(super) fn assert_head_slot(&self, slot: u64) {
|
||||
assert_eq!(self.head_slot(), Slot::new(slot), "Unexpected head slot");
|
||||
}
|
||||
|
||||
fn max_known_slot(&self) -> Slot {
|
||||
pub(super) fn max_known_slot(&self) -> Slot {
|
||||
self.network_blocks_by_slot
|
||||
.keys()
|
||||
.max()
|
||||
.copied()
|
||||
.expect("no blocks")
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn assert_penalties(&self, expected_penalties: &[&'static str]) {
|
||||
pub(super) fn finalized_epoch(&self) -> types::Epoch {
|
||||
self.harness
|
||||
.chain
|
||||
.canonical_head
|
||||
.cached_head()
|
||||
.finalized_checkpoint()
|
||||
.epoch
|
||||
}
|
||||
|
||||
pub(super) fn assert_penalties(&self, expected_penalties: &[&'static str]) {
|
||||
let penalties = self
|
||||
.penalties
|
||||
.iter()
|
||||
@@ -1034,7 +1274,7 @@ impl TestRig {
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_penalties_of_type(&self, expected_penalty: &'static str) {
|
||||
pub(super) fn assert_penalties_of_type(&self, expected_penalty: &'static str) {
|
||||
if self.penalties.is_empty() {
|
||||
panic!("No penalties but expected some of type {expected_penalty}");
|
||||
}
|
||||
@@ -1051,7 +1291,7 @@ impl TestRig {
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_no_penalties(&mut self) {
|
||||
pub(super) fn assert_no_penalties(&mut self) {
|
||||
if !self.penalties.is_empty() {
|
||||
panic!("Some downscore events: {:?}", self.penalties);
|
||||
}
|
||||
@@ -1102,7 +1342,7 @@ impl TestRig {
|
||||
}
|
||||
|
||||
/// Assert there is at least one range sync chain created and that all sync chains completed
|
||||
fn assert_successful_range_sync(&self) {
|
||||
pub(super) fn assert_successful_range_sync(&self) {
|
||||
assert!(
|
||||
self.range_sync_chains_added() > 0,
|
||||
"No created range sync chains"
|
||||
@@ -1425,6 +1665,7 @@ impl TestRig {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn pop_received_processor_event<T, F: Fn(&WorkEvent<E>) -> Option<T>>(
|
||||
&mut self,
|
||||
predicate_transform: F,
|
||||
|
||||
Reference in New Issue
Block a user