Tree-sync friendly lookup sync tests (#8592)

- Step 0 of the tree-sync roadmap https://github.com/sigp/lighthouse/issues/7678

Current lookup sync tests are written in an explicit way that assume how the internals of lookup sync work. For example the test would do:

- Emit unknown block parent message
- Expect block request for X
- Respond with successful block request
- Expect block processing request for X
- Response with successful processing request
- etc..

This is unnecessarily verbose. And it will requires a complete re-write when something changes in the internals of lookup sync (has happened a few times, mostly for deneb and fulu).

What we really want to assert is:

- WHEN: we receive an unknown block parent message
- THEN: Lookup sync can sync that block
- ASSERT: Without penalizing peers, without unnecessary retries


  Keep all existing tests and add new cases but written in the new style described above. The logic to serve and respond to request is in this function `fn simulate` 2288a3aeb1/beacon_node/network/src/sync/tests/lookups.rs (L301)
- It controls peer behavior based on a `CompleteStrategy` where you can set for example "respond to BlocksByRoot requests with empty"
- It actually runs beacon processor messages running their clousures. Now sync tests actually import blocks, increasing the test coverage to the interaction of sync and the da_checker.
- To achieve the above the tests create real blocks with the test harness. To make the tests as fast as before, I disabled crypto with `TestConfig`

Along the way I found a couple bugs, which I documented on the diff.


Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com>
This commit is contained in:
Lion - dapplion
2026-02-12 21:24:51 -07:00
committed by GitHub
parent c59e4a0cee
commit f4a6b8d9b9
27 changed files with 2298 additions and 2381 deletions

1
Cargo.lock generated
View File

@@ -6081,6 +6081,7 @@ dependencies = [
"metrics",
"operation_pool",
"parking_lot",
"paste",
"rand 0.8.5",
"rand 0.9.2",
"rand_chacha 0.3.1",

View File

@@ -36,8 +36,12 @@ PROFILE ?= release
RECENT_FORKS_BEFORE_GLOAS=electra fulu
# List of all recent hard forks. This list is used to set env variables for http_api tests
# Include phase0 to test the code paths in sync that are pre blobs
RECENT_FORKS=electra fulu gloas
# For network tests include phase0 to cover genesis syncing (blocks without blobs or columns)
TEST_NETWORK_FORKS=phase0 $(RECENT_FORKS_BEFORE_GLOAS)
# Extra flags for Cargo
CARGO_INSTALL_EXTRA_FLAGS?=
@@ -226,12 +230,15 @@ test-op-pool-%:
# Run the tests in the `network` crate for all known forks.
# TODO(EIP-7732) Extend to support gloas by using RECENT_FORKS instead
test-network: $(patsubst %,test-network-%,$(RECENT_FORKS_BEFORE_GLOAS))
test-network: $(patsubst %,test-network-%,$(TEST_NETWORK_FORKS))
test-network-%:
env FORK_NAME=$* cargo nextest run --release \
--features "fork_from_env,$(TEST_FEATURES)" \
env FORK_NAME=$* cargo nextest run --no-fail-fast --release \
--features "fork_from_env,fake_crypto,$(TEST_FEATURES)" \
-p network
env FORK_NAME=$* cargo nextest run --no-fail-fast --release \
--features "fork_from_env,$(TEST_FEATURES)" \
-p network crypto_on
# Run the tests in the `slasher` crate for all supported database backends.
test-slasher:

View File

@@ -287,21 +287,6 @@ pub struct BlockImportData<E: EthSpec> {
pub consensus_context: ConsensusContext<E>,
}
impl<E: EthSpec> BlockImportData<E> {
pub fn __new_for_test(
block_root: Hash256,
state: BeaconState<E>,
parent_block: SignedBeaconBlock<E, BlindedPayload<E>>,
) -> Self {
Self {
block_root,
state,
parent_block,
consensus_context: ConsensusContext::new(Slot::new(0)),
}
}
}
/// Trait for common block operations.
pub trait AsBlock<E: EthSpec> {
fn slot(&self) -> Slot;

View File

@@ -698,6 +698,8 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
pub fn remove_pre_execution_block(&self, block_root: &Hash256) {
// The read lock is immediately dropped so we can safely remove the block from the cache.
if let Some(BlockProcessStatus::NotValidated(_, _)) = self.get_cached_block(block_root) {
// If the block is execution invalid, this status is permanent and idempotent to this
// block_root. We drop its components (e.g. columns) because they will never be useful.
self.critical.write().pop(block_root);
}
}

View File

@@ -818,7 +818,11 @@ where
}
pub fn get_full_block(&self, block_root: &Hash256) -> RpcBlock<E> {
let block = self.chain.get_blinded_block(block_root).unwrap().unwrap();
let block = self
.chain
.get_blinded_block(block_root)
.unwrap()
.unwrap_or_else(|| panic!("block root does not exist in harness {block_root:?}"));
let full_block = self.chain.store.make_full_block(block_root, block).unwrap();
self.build_rpc_block_from_store_blobs(Some(*block_root), Arc::new(full_block))
}

View File

@@ -243,12 +243,15 @@ impl<E: EthSpec> From<ReadyWork> for WorkEvent<E> {
},
},
ReadyWork::RpcBlock(QueuedRpcBlock {
beacon_block_root: _,
beacon_block_root,
process_fn,
ignore_fn: _,
}) => Self {
drop_during_sync: false,
work: Work::RpcBlock { process_fn },
work: Work::RpcBlock {
process_fn,
beacon_block_root,
},
},
ReadyWork::IgnoredRpcBlock(IgnoredRpcBlock { process_fn }) => Self {
drop_during_sync: false,
@@ -389,6 +392,7 @@ pub enum Work<E: EthSpec> {
GossipLightClientFinalityUpdate(BlockingFn),
GossipLightClientOptimisticUpdate(BlockingFn),
RpcBlock {
beacon_block_root: Hash256,
process_fn: AsyncFn,
},
RpcBlobs {
@@ -479,7 +483,7 @@ pub enum WorkType {
}
impl<E: EthSpec> Work<E> {
fn str_id(&self) -> &'static str {
pub fn str_id(&self) -> &'static str {
self.to_type().into()
}
@@ -1432,7 +1436,10 @@ impl<E: EthSpec> BeaconProcessor<E> {
beacon_block_root: _,
process_fn,
} => task_spawner.spawn_async(process_fn),
Work::RpcBlock { process_fn }
Work::RpcBlock {
process_fn,
beacon_block_root: _,
}
| Work::RpcBlobs { process_fn }
| Work::RpcCustodyColumn(process_fn)
| Work::ColumnReconstruction(process_fn) => task_spawner.spawn_async(process_fn),

View File

@@ -18,6 +18,7 @@ use ssz_types::VariableList;
use std::cmp::max;
use std::collections::HashMap;
use std::sync::Arc;
use tracing::warn;
use tree_hash::TreeHash;
use tree_hash_derive::TreeHash;
use types::{
@@ -537,6 +538,21 @@ impl<E: EthSpec> ExecutionBlockGenerator<E> {
.contains_key(&forkchoice_state.finalized_block_hash);
if unknown_head_block_hash || unknown_safe_block_hash || unknown_finalized_block_hash {
if unknown_head_block_hash {
warn!(?head_block_hash, "Received unknown head block hash");
}
if unknown_safe_block_hash {
warn!(
safe_block_hash = ?forkchoice_state.safe_block_hash,
"Received unknown safe block hash"
);
}
if unknown_finalized_block_hash {
warn!(
finalized_block_hash = ?forkchoice_state.finalized_block_hash,
"Received unknown finalized block hash"
)
}
return Ok(JsonForkchoiceUpdatedV1Response {
payload_status: JsonPayloadStatusV1 {
status: JsonPayloadStatusV1Status::Syncing,

View File

@@ -5,6 +5,7 @@ use crate::test_utils::{DEFAULT_CLIENT_VERSION, DEFAULT_MOCK_EL_PAYLOAD_VALUE_WE
use serde::{Deserialize, de::DeserializeOwned};
use serde_json::Value as JsonValue;
use std::sync::Arc;
use tracing::debug;
pub const GENERIC_ERROR_CODE: i64 = -1234;
pub const BAD_PARAMS_ERROR_CODE: i64 = -32602;
@@ -28,6 +29,8 @@ pub async fn handle_rpc<E: EthSpec>(
.ok_or_else(|| "missing/invalid params field".to_string())
.map_err(|s| (s, GENERIC_ERROR_CODE))?;
debug!(method, "Mock execution engine");
match method {
ETH_SYNCING => ctx
.syncing_response
@@ -517,6 +520,12 @@ pub async fn handle_rpc<E: EthSpec>(
_ => unreachable!(),
};
debug!(
?payload_attributes,
?forkchoice_state,
"ENGINE_FORKCHOICE_UPDATED"
);
// validate method called correctly according to fork time
if let Some(pa) = payload_attributes.as_ref() {
match ctx

View File

@@ -731,7 +731,7 @@ where
}
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, IntoStaticStr)]
pub enum RequestType<E: EthSpec> {
Status(StatusMessage),
Goodbye(GoodbyeReason),

View File

@@ -135,7 +135,7 @@ pub struct CustodyId {
pub struct CustodyRequester(pub SingleLookupReqId);
/// Application level requests sent to the network.
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum AppRequestId {
Sync(SyncRequestId),
Router,

View File

@@ -8,6 +8,7 @@ edition = { workspace = true }
# NOTE: This can be run via cargo build --bin lighthouse --features network/disable-backfill
disable-backfill = []
fork_from_env = ["beacon_chain/fork_from_env"]
fake_crypto = ["bls/fake_crypto", "kzg/fake_crypto"]
portable = ["beacon_chain/portable"]
test_logger = []
@@ -57,6 +58,7 @@ k256 = "0.13.4"
kzg = { workspace = true }
libp2p = { workspace = true }
matches = "0.1.8"
paste = { workspace = true }
rand_08 = { package = "rand", version = "0.8.5" }
rand_chacha = "0.9.0"
rand_chacha_03 = { package = "rand_chacha", version = "0.3.1" }

View File

@@ -526,7 +526,10 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
);
self.try_send(BeaconWorkEvent {
drop_during_sync: false,
work: Work::RpcBlock { process_fn },
work: Work::RpcBlock {
process_fn,
beacon_block_root: block_root,
},
})
}

View File

@@ -219,7 +219,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
// to be sent from the peers if we already have them.
let publish_blobs = false;
self.fetch_engine_blobs_and_publish(signed_beacon_block, block_root, publish_blobs)
.await
.await;
}
_ => {}
}

View File

@@ -940,20 +940,20 @@ async fn data_column_reconstruction_at_deadline() {
.set_current_time(slot_start + Duration::from_millis(reconstruction_deadline_millis));
let min_columns_for_reconstruction = E::number_of_columns() / 2;
// Enqueue all columns first - at deadline, reconstruction races with gossip drain
for i in 0..min_columns_for_reconstruction {
rig.enqueue_gossip_data_columns(i);
rig.assert_event_journal_completes(&[WorkType::GossipDataColumnSidecar])
.await;
}
// Since we're at the reconstruction deadline, reconstruction should be triggered immediately
rig.assert_event_journal_with_timeout(
&[WorkType::ColumnReconstruction.into()],
Duration::from_millis(50),
false,
false,
)
.await;
// Expect all gossip events + reconstruction
let mut expected_events: Vec<WorkType> = (0..min_columns_for_reconstruction)
.map(|_| WorkType::GossipDataColumnSidecar)
.collect();
expected_events.push(WorkType::ColumnReconstruction);
rig.assert_event_journal_contains_ordered(&expected_events)
.await;
}
// Test the column reconstruction is delayed for columns that arrive for a previous slot.

View File

@@ -121,15 +121,24 @@ pub struct BlockLookups<T: BeaconChainTypes> {
// TODO: Why not index lookups by block_root?
single_block_lookups: FnvHashMap<SingleLookupId, SingleBlockLookup<T>>,
/// Used for testing assertions
metrics: BlockLookupsMetrics,
}
#[cfg(test)]
use lighthouse_network::service::api_types::Id;
#[cfg(test)]
/// Tuple of `SingleLookupId`, requested block root, awaiting parent block root (if any),
/// and list of peers that claim to have imported this set of block components.
pub(crate) type BlockLookupSummary = (Id, Hash256, Option<Hash256>, Vec<PeerId>);
#[derive(Debug)]
pub(crate) struct BlockLookupSummary {
/// Lookup ID
pub id: Id,
/// Requested block root
pub block_root: Hash256,
/// List of peers that claim to have imported this set of block components.
pub peers: Vec<PeerId>,
}
impl<T: BeaconChainTypes> BlockLookups<T> {
pub fn new() -> Self {
@@ -138,9 +147,15 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
IGNORED_CHAINS_CACHE_EXPIRY_SECONDS,
)),
single_block_lookups: Default::default(),
metrics: <_>::default(),
}
}
#[cfg(test)]
pub(crate) fn metrics(&self) -> &BlockLookupsMetrics {
&self.metrics
}
#[cfg(test)]
pub(crate) fn insert_ignored_chain(&mut self, block_root: Hash256) {
self.ignored_chains.insert(block_root);
@@ -155,7 +170,11 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
pub(crate) fn active_single_lookups(&self) -> Vec<BlockLookupSummary> {
self.single_block_lookups
.iter()
.map(|(id, l)| (*id, l.block_root(), l.awaiting_parent(), l.all_peers()))
.map(|(id, l)| BlockLookupSummary {
id: *id,
block_root: l.block_root(),
peers: l.all_peers(),
})
.collect()
}
@@ -306,7 +325,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
// attributability. A peer can send us garbage blocks over blocks_by_root, and
// then correct blocks via blocks_by_range.
self.drop_lookup_and_children(*lookup_id);
self.drop_lookup_and_children(*lookup_id, "chain_too_long");
} else {
// Should never happen
error!(
@@ -414,6 +433,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
"Created block lookup"
);
metrics::inc_counter(&metrics::SYNC_LOOKUP_CREATED);
self.metrics.created_lookups += 1;
let result = lookup.continue_requests(cx);
if self.on_lookup_result(id, result, "new_current_lookup", cx) {
@@ -513,8 +533,11 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
/* Error responses */
pub fn peer_disconnected(&mut self, peer_id: &PeerId) {
for (_, lookup) in self.single_block_lookups.iter_mut() {
for (id, lookup) in self.single_block_lookups.iter_mut() {
lookup.remove_peer(peer_id);
if lookup.has_no_peers() {
debug!(%id, "Lookup has no peers");
}
}
}
@@ -566,7 +589,8 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
let action = match result {
BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_))
| BlockProcessingResult::Err(BlockError::DuplicateFullyImported(..)) => {
| BlockProcessingResult::Err(BlockError::DuplicateFullyImported(..))
| BlockProcessingResult::Err(BlockError::GenesisBlock) => {
// Successfully imported
request_state.on_processing_success()?;
Action::Continue
@@ -747,6 +771,15 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
let lookup_result = if imported {
Ok(LookupResult::Completed)
} else {
// A lookup may be in the following state:
// - Block awaiting processing from a different source
// - Blobs downloaded processed, and inserted into the da_checker
//
// At this point the block fails processing (e.g. execution engine offline) and it is
// removed from the da_checker. Note that ALL components are removed from the da_checker
// so when we re-download and process the block we get the error
// MissingComponentsAfterAllProcessed and get stuck.
lookup.reset_requests();
lookup.continue_requests(cx)
};
let id = *id;
@@ -779,14 +812,17 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
/// Drops `dropped_id` lookup and all its children recursively. Lookups awaiting a parent need
/// the parent to make progress to resolve, therefore we must drop them if the parent is
/// dropped.
pub fn drop_lookup_and_children(&mut self, dropped_id: SingleLookupId) {
pub fn drop_lookup_and_children(&mut self, dropped_id: SingleLookupId, reason: &'static str) {
if let Some(dropped_lookup) = self.single_block_lookups.remove(&dropped_id) {
debug!(
id = ?dropped_id,
block_root = ?dropped_lookup.block_root(),
awaiting_parent = ?dropped_lookup.awaiting_parent(),
reason,
"Dropping lookup"
);
metrics::inc_counter_vec(&metrics::SYNC_LOOKUP_DROPPED, &[reason]);
self.metrics.dropped_lookups += 1;
let child_lookups = self
.single_block_lookups
@@ -796,7 +832,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
.collect::<Vec<_>>();
for id in child_lookups {
self.drop_lookup_and_children(id);
self.drop_lookup_and_children(id, reason);
}
}
}
@@ -814,8 +850,13 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
Ok(LookupResult::Pending) => true, // no action
Ok(LookupResult::Completed) => {
if let Some(lookup) = self.single_block_lookups.remove(&id) {
debug!(block = ?lookup.block_root(), id, "Dropping completed lookup");
debug!(
block = ?lookup.block_root(),
id,
"Dropping completed lookup"
);
metrics::inc_counter(&metrics::SYNC_LOOKUP_COMPLETED);
self.metrics.completed_lookups += 1;
// Block imported, continue the requests of pending child blocks
self.continue_child_lookups(lookup.block_root(), cx);
self.update_metrics();
@@ -829,8 +870,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
Err(LookupRequestError::UnknownLookup) => false,
Err(error) => {
debug!(id, source, ?error, "Dropping lookup on request error");
metrics::inc_counter_vec(&metrics::SYNC_LOOKUP_DROPPED, &[error.into()]);
self.drop_lookup_and_children(id);
self.drop_lookup_and_children(id, error.into());
self.update_metrics();
false
}
@@ -897,7 +937,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
%block_root,
"Dropping lookup with no peers"
);
self.drop_lookup_and_children(lookup_id);
self.drop_lookup_and_children(lookup_id, "no_peers");
}
}
@@ -946,7 +986,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
}
metrics::inc_counter(&metrics::SYNC_LOOKUPS_STUCK);
self.drop_lookup_and_children(ancestor_stuck_lookup.id);
self.drop_lookup_and_children(ancestor_stuck_lookup.id, "lookup_stuck");
}
}
@@ -1022,3 +1062,10 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
}
}
}
#[derive(Default, Clone, Debug)]
pub(crate) struct BlockLookupsMetrics {
pub created_lookups: usize,
pub dropped_lookups: usize,
pub completed_lookups: usize,
}

View File

@@ -109,6 +109,12 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
}
}
/// Reset the status of all internal requests
pub fn reset_requests(&mut self) {
self.block_request_state = BlockRequestState::new(self.block_root);
self.component_requests = ComponentRequests::WaitingForBlock;
}
/// Return the slot of this lookup's block if it's currently cached as `AwaitingProcessing`
pub fn peek_downloaded_block_slot(&self) -> Option<Slot> {
self.block_request_state

View File

@@ -70,6 +70,7 @@ use slot_clock::SlotClock;
use std::ops::Sub;
use std::sync::Arc;
use std::time::Duration;
use strum::IntoStaticStr;
use tokio::sync::mpsc;
use tracing::{debug, error, info, trace};
use types::{
@@ -90,7 +91,7 @@ pub const SLOT_IMPORT_TOLERANCE: usize = 32;
/// arbitrary number that covers a full slot, but allows recovery if sync get stuck for a few slots.
const NOTIFIED_UNKNOWN_ROOT_EXPIRY_SECONDS: u64 = 30;
#[derive(Debug)]
#[derive(Debug, IntoStaticStr)]
/// A message that can be sent to the sync manager thread.
pub enum SyncMessage<E: EthSpec> {
/// A useful peer has been discovered.
@@ -323,17 +324,18 @@ impl<T: BeaconChainTypes> SyncManager<T> {
}
#[cfg(test)]
pub(crate) fn active_single_lookups(&self) -> Vec<super::block_lookups::BlockLookupSummary> {
self.block_lookups.active_single_lookups()
pub(crate) fn send_sync_message(&mut self, sync_message: SyncMessage<<T>::EthSpec>) {
self.network.send_sync_message(sync_message);
}
#[cfg(test)]
pub(crate) fn active_parent_lookups(&self) -> Vec<Vec<Hash256>> {
self.block_lookups
.active_parent_lookups()
.iter()
.map(|c| c.chain.clone())
.collect()
pub(crate) fn block_lookups(&self) -> &BlockLookups<T> {
&self.block_lookups
}
#[cfg(test)]
pub(crate) fn range_sync(&self) -> &RangeSync<T> {
&self.range_sync
}
#[cfg(test)]
@@ -512,17 +514,18 @@ impl<T: BeaconChainTypes> SyncManager<T> {
/// there is no way to guarantee that libp2p always emits a error along with
/// the disconnect.
fn peer_disconnect(&mut self, peer_id: &PeerId) {
// Inject a Disconnected error on all requests associated with the disconnected peer
// to retry all batches/lookups
for sync_request_id in self.network.peer_disconnected(peer_id) {
self.inject_error(*peer_id, sync_request_id, RPCError::Disconnected);
}
// Remove peer from all data structures
self.range_sync.peer_disconnect(&mut self.network, peer_id);
let _ = self.backfill_sync.peer_disconnected(peer_id);
self.block_lookups.peer_disconnected(peer_id);
// Inject a Disconnected error on all requests associated with the disconnected peer
// to retry all batches/lookups. Only after removing the peer from the data structures to
// avoid sending retry requests to the disconnecting peer.
for sync_request_id in self.network.peer_disconnected(peer_id) {
self.inject_error(*peer_id, sync_request_id, RPCError::Disconnected);
}
// Regardless of the outcome, we update the sync status.
self.update_sync_state();
}

View File

@@ -17,7 +17,7 @@ use crate::sync::block_lookups::SingleLookupId;
use crate::sync::block_sidecar_coupling::CouplingError;
use crate::sync::network_context::requests::BlobsByRootSingleBlockRequest;
use crate::sync::range_data_column_batch_request::RangeDataColumnBatchRequest;
use beacon_chain::block_verification_types::RpcBlock;
use beacon_chain::block_verification_types::{AsBlock, RpcBlock};
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
use custody::CustodyRequestResult;
use fnv::FnvHashMap;
@@ -1095,13 +1095,14 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
})?;
// Include only the blob indexes not yet imported (received through gossip)
let custody_indexes_to_fetch = self
let mut custody_indexes_to_fetch = self
.chain
.sampling_columns_for_epoch(current_epoch)
.iter()
.copied()
.filter(|index| !custody_indexes_imported.contains(index))
.collect::<Vec<_>>();
custody_indexes_to_fetch.sort_unstable();
if custody_indexes_to_fetch.is_empty() {
// No indexes required, do not issue any request
@@ -1595,7 +1596,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
)
.map_err(|_| SendErrorProcessor::SendError)?;
debug!(block = ?block_root, id, "Sending block for processing");
debug!(block = ?block_root, block_slot = %block.slot(), id, "Sending block for processing");
// Lookup sync event safety: If `beacon_processor.send_rpc_beacon_block` returns Ok() sync
// must receive a single `SyncMessage::BlockComponentProcessed` with this process type
beacon_processor

View File

@@ -198,7 +198,14 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
cx: &mut SyncNetworkContext<T>,
) -> CustodyRequestResult<T::EthSpec> {
let _guard = self.span.clone().entered();
if self.column_requests.values().all(|r| r.is_downloaded()) {
let total_requests = self.column_requests.len();
let completed_requests = self
.column_requests
.values()
.filter(|r| r.is_downloaded())
.count();
if completed_requests >= total_requests {
// All requests have completed successfully.
let mut peers = HashMap::<PeerId, Vec<usize>>::new();
let mut seen_timestamps = vec![];
@@ -222,6 +229,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
let active_request_count_by_peer = cx.active_request_count_by_peer();
let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
let mut columns_without_peers = vec![];
let lookup_peers = self.lookup_peers.read();
// Create deterministic hasher per request to ensure consistent peer ordering within
// this request (avoiding fragmentation) while varying selection across different requests
@@ -256,6 +264,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
return Err(Error::NoPeer(*column_index));
} else {
// Do not issue requests if there is no custody peer on this column
columns_without_peers.push(*column_index);
}
}
}
@@ -270,10 +279,13 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
lookup_peers = lookup_peers.len(),
"Requesting {} columns from {} peers", columns_requested_count, peer_requests,
);
} else {
} else if !columns_without_peers.is_empty() {
debug!(
lookup_peers = lookup_peers.len(),
"No column peers found for look up",
total_requests,
completed_requests,
?columns_without_peers,
"No column peers found for lookup",
);
}
@@ -288,7 +300,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
},
// If peer is in the lookup peer set, it claims to have imported the block and
// must have its columns in custody. In that case, set `true = enforce max_requests`
// and downscore if data_columns_by_root does not returned the expected custody
// and downscore if data_columns_by_root does not return the expected custody
// columns. For the rest of peers, don't downscore if columns are missing.
lookup_peers.contains(&peer_id),
)

View File

@@ -41,6 +41,13 @@ pub enum RangeSyncState {
pub type SyncChainStatus =
Result<Option<(RangeSyncType, Slot /* from */, Slot /* to */)>, &'static str>;
#[cfg(test)]
#[derive(Default, Debug)]
pub struct ChainCollectionMetrics {
pub chains_added: usize,
pub chains_removed: usize,
}
/// A collection of finalized and head chains currently being processed.
pub struct ChainCollection<T: BeaconChainTypes> {
/// The beacon chain for processing.
@@ -51,6 +58,9 @@ pub struct ChainCollection<T: BeaconChainTypes> {
head_chains: FnvHashMap<ChainId, SyncingChain<T>>,
/// The current sync state of the process.
state: RangeSyncState,
#[cfg(test)]
/// Used for testing assertions
metrics: ChainCollectionMetrics,
}
impl<T: BeaconChainTypes> ChainCollection<T> {
@@ -60,12 +70,23 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
finalized_chains: FnvHashMap::default(),
head_chains: FnvHashMap::default(),
state: RangeSyncState::Idle,
#[cfg(test)]
metrics: <_>::default(),
}
}
#[cfg(test)]
pub(crate) fn metrics(&self) -> &ChainCollectionMetrics {
&self.metrics
}
/// Updates the Syncing state of the collection after a chain is removed.
fn on_chain_removed(&mut self, id: &ChainId, was_syncing: bool, sync_type: RangeSyncType) {
metrics::inc_counter_vec(&metrics::SYNCING_CHAINS_REMOVED, &[sync_type.as_str()]);
#[cfg(test)]
{
self.metrics.chains_removed += 1;
}
self.update_metrics();
match self.state {
@@ -510,6 +531,10 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
);
collection.insert(id, new_chain);
metrics::inc_counter_vec(&metrics::SYNCING_CHAINS_ADDED, &[sync_type.as_str()]);
#[cfg(test)]
{
self.metrics.chains_added += 1;
}
self.update_metrics();
}
}

View File

@@ -98,6 +98,11 @@ where
self.failed_chains.keys().copied().collect()
}
#[cfg(test)]
pub(crate) fn metrics(&self) -> &super::chain_collection::ChainCollectionMetrics {
self.chains.metrics()
}
pub fn state(&self) -> SyncChainStatus {
self.chains.state()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,19 @@
use crate::NetworkMessage;
use crate::sync::SyncMessage;
use crate::sync::block_lookups::BlockLookupsMetrics;
use crate::sync::manager::SyncManager;
use crate::sync::range_sync::RangeSyncType;
use crate::sync::tests::lookups::SimulateConfig;
use beacon_chain::block_verification_types::RpcBlock;
use beacon_chain::builder::Witness;
use beacon_chain::custody_context::NodeCustodyType;
use beacon_chain::test_utils::{BeaconChainHarness, EphemeralHarnessType};
use beacon_processor::WorkEvent;
use lighthouse_network::NetworkGlobals;
use lighthouse_network::rpc::RequestType;
use lighthouse_network::service::api_types::{AppRequestId, Id};
use lighthouse_network::{NetworkGlobals, PeerId};
use rand_chacha::ChaCha20Rng;
use slot_clock::ManualSlotClock;
use std::collections::{HashMap, HashSet};
use std::fs::OpenOptions;
use std::io::Write;
use std::sync::{Arc, Once};
@@ -16,7 +22,7 @@ use tokio::sync::mpsc;
use tracing_subscriber::fmt::MakeWriter;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use types::{ForkName, MinimalEthSpec as E};
use types::{ForkName, Hash256, MinimalEthSpec as E, Slot};
mod lookups;
mod range;
@@ -58,6 +64,8 @@ struct TestRig {
network_rx_queue: Vec<NetworkMessage<E>>,
/// Receiver for `SyncMessage` from the network
sync_rx: mpsc::UnboundedReceiver<SyncMessage<E>>,
/// Stores all `SyncMessage`s received from `sync_rx`
sync_rx_queue: Vec<SyncMessage<E>>,
/// To send `SyncMessage`. For sending RPC responses or block processing results to sync.
sync_manager: SyncManager<T>,
/// To manipulate sync state and peer connection status
@@ -68,6 +76,65 @@ struct TestRig {
rng_08: rand_chacha_03::ChaCha20Rng,
rng: ChaCha20Rng,
fork_name: ForkName,
/// Blocks that will be used in the test but may not be known to `harness` yet.
network_blocks_by_root: HashMap<Hash256, RpcBlock<E>>,
network_blocks_by_slot: HashMap<Slot, RpcBlock<E>>,
penalties: Vec<ReportedPenalty>,
/// All seen lookups through the test run
seen_lookups: HashMap<Id, SeenLookup>,
/// Registry of all requests done by the test
requests: Vec<(RequestType<E>, AppRequestId)>,
/// Persistent config on how to complete request
complete_strategy: SimulateConfig,
/// Metrics values to allow a reset
initial_block_lookups_metrics: BlockLookupsMetrics,
/// Fulu test type
fulu_test_type: FuluTestType,
}
enum FuluTestType {
WeSupernodeThemSupernode,
WeSupernodeThemFullnodes,
WeFullnodeThemSupernode,
WeFullnodeThemFullnodes,
}
impl FuluTestType {
fn we_node_custody_type(&self) -> NodeCustodyType {
match self {
Self::WeSupernodeThemSupernode | Self::WeSupernodeThemFullnodes => {
NodeCustodyType::Supernode
}
Self::WeFullnodeThemSupernode | Self::WeFullnodeThemFullnodes => {
NodeCustodyType::Fullnode
}
}
}
fn them_node_custody_type(&self) -> NodeCustodyType {
match self {
Self::WeSupernodeThemSupernode | Self::WeFullnodeThemSupernode => {
NodeCustodyType::Supernode
}
Self::WeSupernodeThemFullnodes | Self::WeFullnodeThemFullnodes => {
NodeCustodyType::Fullnode
}
}
}
}
#[derive(Debug)]
struct SeenLookup {
/// Lookup's Id
id: Id,
block_root: Hash256,
seen_peers: HashSet<PeerId>,
}
#[derive(Debug)]
struct ReportedPenalty {
pub peer_id: PeerId,
pub msg: &'static str,
}
// Environment variable to read if `fork_from_env` feature is enabled.

View File

@@ -185,7 +185,7 @@ impl TestRig {
}
#[track_caller]
fn expect_chain_segments(&mut self, count: usize) {
fn assert_chain_segments(&mut self, count: usize) {
for i in 0..count {
self.pop_received_processor_event(|ev| {
(ev.work_type() == beacon_processor::WorkType::ChainSegment).then_some(())
@@ -235,7 +235,7 @@ impl TestRig {
panic!("Should have a BlocksByRange request, filter {request_filter:?}: {e:?}")
});
let by_range_data_requests = if self.after_fulu() {
let by_range_data_requests = if self.is_after_fulu() {
let mut data_columns_requests = vec![];
while let Ok(data_columns_request) = self.pop_received_network_event(|ev| match ev {
NetworkMessage::SendRequest {
@@ -254,7 +254,7 @@ impl TestRig {
panic!("Found zero DataColumnsByRange requests, filter {request_filter:?}");
}
ByRangeDataRequestIds::PostPeerDAS(data_columns_requests)
} else if self.after_deneb() {
} else if self.is_after_deneb() {
let (id, peer) = self
.pop_received_network_event(|ev| match ev {
NetworkMessage::SendRequest {
@@ -489,7 +489,7 @@ fn build_rpc_block(
fn head_chain_removed_while_finalized_syncing() {
// NOTE: this is a regression test.
// Added in PR https://github.com/sigp/lighthouse/pull/2821
let mut rig = TestRig::test_setup();
let mut rig = TestRig::default();
// Get a peer with an advanced head
let head_peer = rig.add_head_peer();
@@ -514,11 +514,11 @@ fn head_chain_removed_while_finalized_syncing() {
async fn state_update_while_purging() {
// NOTE: this is a regression test.
// Added in PR https://github.com/sigp/lighthouse/pull/2827
let mut rig = TestRig::test_setup_with_custody_type(NodeCustodyType::SemiSupernode);
let mut rig = TestRig::with_custody_type(NodeCustodyType::SemiSupernode);
// Create blocks on a separate harness
// SemiSupernode ensures enough columns are stored for sampling + custody RPC block validation
let mut rig_2 = TestRig::test_setup_with_custody_type(NodeCustodyType::SemiSupernode);
let mut rig_2 = TestRig::with_custody_type(NodeCustodyType::SemiSupernode);
// Need to create blocks that can be inserted into the fork-choice and fit the "known
// conditions" below.
let head_peer_block = rig_2.create_canonical_block().await;
@@ -550,7 +550,7 @@ async fn state_update_while_purging() {
#[test]
fn pause_and_resume_on_ee_offline() {
let mut rig = TestRig::test_setup();
let mut rig = TestRig::default();
// add some peers
let peer1 = rig.add_head_peer();
@@ -559,7 +559,7 @@ fn pause_and_resume_on_ee_offline() {
// send the response to the request
rig.find_and_complete_blocks_by_range_request(filter().peer(peer1).epoch(0));
// the beacon processor shouldn't have received any work
rig.expect_empty_processor();
rig.assert_empty_processor();
// while the ee is offline, more peers might arrive. Add a new finalized peer.
let _peer2 = rig.add_finalized_peer();
@@ -570,14 +570,14 @@ fn pause_and_resume_on_ee_offline() {
// epoch for the other batch. So we can either filter by epoch of by sync type.
rig.find_and_complete_blocks_by_range_request(filter().epoch(0));
// the beacon processor shouldn't have received any work
rig.expect_empty_processor();
rig.assert_empty_processor();
// make the beacon processor available again.
// update_execution_engine_state implicitly calls resume
// now resume range, we should have two processing requests in the beacon processor.
rig.update_execution_engine_state(EngineState::Online);
// The head chain and finalized chain (2) should be in the processing queue
rig.expect_chain_segments(2);
rig.assert_chain_segments(2);
}
/// To attempt to finalize the peer's status finalized checkpoint we synced to its finalized epoch +
@@ -587,7 +587,7 @@ const EXTRA_SYNCED_EPOCHS: u64 = 2 + 1;
#[test]
fn finalized_sync_enough_global_custody_peers_few_chain_peers() {
// Run for all forks
let mut r = TestRig::test_setup();
let mut r = TestRig::default();
let advanced_epochs: u64 = 2;
let remote_info = r.finalized_remote_info_advanced_by(advanced_epochs.into());
@@ -604,7 +604,7 @@ fn finalized_sync_enough_global_custody_peers_few_chain_peers() {
#[test]
fn finalized_sync_not_enough_custody_peers_on_start() {
let mut r = TestRig::test_setup();
let mut r = TestRig::default();
// Only run post-PeerDAS
if !r.fork_name.fulu_enabled() {
return;
@@ -621,7 +621,7 @@ fn finalized_sync_not_enough_custody_peers_on_start() {
// Because we don't have enough peers on all columns we haven't sent any request.
// NOTE: There's a small chance that this single peer happens to custody exactly the set we
// expect, in that case the test will fail. Find a way to make the test deterministic.
r.expect_empty_network();
r.assert_empty_network();
// Generate enough peers and supernodes to cover all custody columns
let peer_count = 100;

View File

@@ -49,7 +49,9 @@ impl TPublicKey for PublicKey {
}
fn serialize_uncompressed(&self) -> [u8; PUBLIC_KEY_UNCOMPRESSED_BYTES_LEN] {
panic!("fake_crypto does not support uncompressed keys")
let mut bytes = [0; PUBLIC_KEY_UNCOMPRESSED_BYTES_LEN];
bytes[0..PUBLIC_KEY_BYTES_LEN].copy_from_slice(&self.0);
bytes
}
fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
@@ -58,8 +60,17 @@ impl TPublicKey for PublicKey {
Ok(pubkey)
}
fn deserialize_uncompressed(_: &[u8]) -> Result<Self, Error> {
panic!("fake_crypto does not support uncompressed keys")
fn deserialize_uncompressed(bytes: &[u8]) -> Result<Self, Error> {
if bytes.len() == PUBLIC_KEY_UNCOMPRESSED_BYTES_LEN {
let mut pubkey = Self([0; PUBLIC_KEY_BYTES_LEN]);
pubkey.0.copy_from_slice(&bytes[0..PUBLIC_KEY_BYTES_LEN]);
Ok(pubkey)
} else {
Err(Error::InvalidByteLength {
got: bytes.len(),
expected: PUBLIC_KEY_UNCOMPRESSED_BYTES_LEN,
})
}
}
}
@@ -97,7 +108,7 @@ pub struct Signature([u8; SIGNATURE_BYTES_LEN]);
impl Signature {
fn infinity() -> Self {
Self([0; SIGNATURE_BYTES_LEN])
Self(INFINITY_SIGNATURE)
}
}
@@ -213,7 +224,11 @@ impl TSecretKey<Signature, PublicKey> for SecretKey {
}
fn public_key(&self) -> PublicKey {
PublicKey::infinity()
let mut bytes = [0; PUBLIC_KEY_BYTES_LEN];
bytes[0] = 0x01;
let to_copy = std::cmp::min(self.0.len(), bytes.len() - 1);
bytes[1..1 + to_copy].copy_from_slice(&self.0[..to_copy]);
PublicKey(bytes)
}
fn sign(&self, _msg: Hash256) -> Signature {

View File

@@ -5,6 +5,10 @@ authors = ["Pawan Dhananjay <pawandhananjay@gmail.com>"]
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
default = []
fake_crypto = []
[dependencies]
arbitrary = { workspace = true }
c-kzg = { workspace = true }

View File

@@ -134,6 +134,9 @@ impl Kzg {
kzg_commitment: KzgCommitment,
kzg_proof: KzgProof,
) -> Result<(), Error> {
if cfg!(feature = "fake_crypto") {
return Ok(());
}
if !self.trusted_setup.verify_blob_kzg_proof(
blob,
&kzg_commitment.into(),
@@ -155,6 +158,9 @@ impl Kzg {
kzg_commitments: &[KzgCommitment],
kzg_proofs: &[KzgProof],
) -> Result<(), Error> {
if cfg!(feature = "fake_crypto") {
return Ok(());
}
let commitments_bytes = kzg_commitments
.iter()
.map(|comm| Bytes48::from(*comm))
@@ -204,6 +210,9 @@ impl Kzg {
y: &Bytes32,
kzg_proof: KzgProof,
) -> Result<bool, Error> {
if cfg!(feature = "fake_crypto") {
return Ok(true);
}
self.trusted_setup
.verify_kzg_proof(&kzg_commitment.into(), z, y, &kzg_proof.into())
.map_err(Into::into)
@@ -240,6 +249,9 @@ impl Kzg {
indices: Vec<CellIndex>,
kzg_commitments: &[Bytes48],
) -> Result<(), (Option<u64>, Error)> {
if cfg!(feature = "fake_crypto") {
return Ok(());
}
let mut column_groups: HashMap<u64, Vec<(CellRef, Bytes48, Bytes48)>> = HashMap::new();
let expected_len = cells.len();