mirror of
https://github.com/sigp/lighthouse.git
synced 2026-06-17 18:58:23 +00:00
Tree-sync friendly lookup sync tests (#8592)
- Step 0 of the tree-sync roadmap https://github.com/sigp/lighthouse/issues/7678
Current lookup sync tests are written in an explicit way that assume how the internals of lookup sync work. For example the test would do:
- Emit unknown block parent message
- Expect block request for X
- Respond with successful block request
- Expect block processing request for X
- Response with successful processing request
- etc..
This is unnecessarily verbose. And it will requires a complete re-write when something changes in the internals of lookup sync (has happened a few times, mostly for deneb and fulu).
What we really want to assert is:
- WHEN: we receive an unknown block parent message
- THEN: Lookup sync can sync that block
- ASSERT: Without penalizing peers, without unnecessary retries
Keep all existing tests and add new cases but written in the new style described above. The logic to serve and respond to request is in this function `fn simulate` 2288a3aeb1/beacon_node/network/src/sync/tests/lookups.rs (L301)
- It controls peer behavior based on a `CompleteStrategy` where you can set for example "respond to BlocksByRoot requests with empty"
- It actually runs beacon processor messages running their clousures. Now sync tests actually import blocks, increasing the test coverage to the interaction of sync and the da_checker.
- To achieve the above the tests create real blocks with the test harness. To make the tests as fast as before, I disabled crypto with `TestConfig`
Along the way I found a couple bugs, which I documented on the diff.
Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com>
This commit is contained in:
@@ -121,15 +121,24 @@ pub struct BlockLookups<T: BeaconChainTypes> {
|
||||
|
||||
// TODO: Why not index lookups by block_root?
|
||||
single_block_lookups: FnvHashMap<SingleLookupId, SingleBlockLookup<T>>,
|
||||
|
||||
/// Used for testing assertions
|
||||
metrics: BlockLookupsMetrics,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
|
||||
#[cfg(test)]
|
||||
/// Tuple of `SingleLookupId`, requested block root, awaiting parent block root (if any),
|
||||
/// and list of peers that claim to have imported this set of block components.
|
||||
pub(crate) type BlockLookupSummary = (Id, Hash256, Option<Hash256>, Vec<PeerId>);
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct BlockLookupSummary {
|
||||
/// Lookup ID
|
||||
pub id: Id,
|
||||
/// Requested block root
|
||||
pub block_root: Hash256,
|
||||
/// List of peers that claim to have imported this set of block components.
|
||||
pub peers: Vec<PeerId>,
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
pub fn new() -> Self {
|
||||
@@ -138,9 +147,15 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
IGNORED_CHAINS_CACHE_EXPIRY_SECONDS,
|
||||
)),
|
||||
single_block_lookups: Default::default(),
|
||||
metrics: <_>::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn metrics(&self) -> &BlockLookupsMetrics {
|
||||
&self.metrics
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn insert_ignored_chain(&mut self, block_root: Hash256) {
|
||||
self.ignored_chains.insert(block_root);
|
||||
@@ -155,7 +170,11 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
pub(crate) fn active_single_lookups(&self) -> Vec<BlockLookupSummary> {
|
||||
self.single_block_lookups
|
||||
.iter()
|
||||
.map(|(id, l)| (*id, l.block_root(), l.awaiting_parent(), l.all_peers()))
|
||||
.map(|(id, l)| BlockLookupSummary {
|
||||
id: *id,
|
||||
block_root: l.block_root(),
|
||||
peers: l.all_peers(),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -306,7 +325,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
// attributability. A peer can send us garbage blocks over blocks_by_root, and
|
||||
// then correct blocks via blocks_by_range.
|
||||
|
||||
self.drop_lookup_and_children(*lookup_id);
|
||||
self.drop_lookup_and_children(*lookup_id, "chain_too_long");
|
||||
} else {
|
||||
// Should never happen
|
||||
error!(
|
||||
@@ -414,6 +433,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
"Created block lookup"
|
||||
);
|
||||
metrics::inc_counter(&metrics::SYNC_LOOKUP_CREATED);
|
||||
self.metrics.created_lookups += 1;
|
||||
|
||||
let result = lookup.continue_requests(cx);
|
||||
if self.on_lookup_result(id, result, "new_current_lookup", cx) {
|
||||
@@ -513,8 +533,11 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
/* Error responses */
|
||||
|
||||
pub fn peer_disconnected(&mut self, peer_id: &PeerId) {
|
||||
for (_, lookup) in self.single_block_lookups.iter_mut() {
|
||||
for (id, lookup) in self.single_block_lookups.iter_mut() {
|
||||
lookup.remove_peer(peer_id);
|
||||
if lookup.has_no_peers() {
|
||||
debug!(%id, "Lookup has no peers");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -566,7 +589,8 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
|
||||
let action = match result {
|
||||
BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_))
|
||||
| BlockProcessingResult::Err(BlockError::DuplicateFullyImported(..)) => {
|
||||
| BlockProcessingResult::Err(BlockError::DuplicateFullyImported(..))
|
||||
| BlockProcessingResult::Err(BlockError::GenesisBlock) => {
|
||||
// Successfully imported
|
||||
request_state.on_processing_success()?;
|
||||
Action::Continue
|
||||
@@ -747,6 +771,15 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
let lookup_result = if imported {
|
||||
Ok(LookupResult::Completed)
|
||||
} else {
|
||||
// A lookup may be in the following state:
|
||||
// - Block awaiting processing from a different source
|
||||
// - Blobs downloaded processed, and inserted into the da_checker
|
||||
//
|
||||
// At this point the block fails processing (e.g. execution engine offline) and it is
|
||||
// removed from the da_checker. Note that ALL components are removed from the da_checker
|
||||
// so when we re-download and process the block we get the error
|
||||
// MissingComponentsAfterAllProcessed and get stuck.
|
||||
lookup.reset_requests();
|
||||
lookup.continue_requests(cx)
|
||||
};
|
||||
let id = *id;
|
||||
@@ -779,14 +812,17 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
/// Drops `dropped_id` lookup and all its children recursively. Lookups awaiting a parent need
|
||||
/// the parent to make progress to resolve, therefore we must drop them if the parent is
|
||||
/// dropped.
|
||||
pub fn drop_lookup_and_children(&mut self, dropped_id: SingleLookupId) {
|
||||
pub fn drop_lookup_and_children(&mut self, dropped_id: SingleLookupId, reason: &'static str) {
|
||||
if let Some(dropped_lookup) = self.single_block_lookups.remove(&dropped_id) {
|
||||
debug!(
|
||||
id = ?dropped_id,
|
||||
block_root = ?dropped_lookup.block_root(),
|
||||
awaiting_parent = ?dropped_lookup.awaiting_parent(),
|
||||
reason,
|
||||
"Dropping lookup"
|
||||
);
|
||||
metrics::inc_counter_vec(&metrics::SYNC_LOOKUP_DROPPED, &[reason]);
|
||||
self.metrics.dropped_lookups += 1;
|
||||
|
||||
let child_lookups = self
|
||||
.single_block_lookups
|
||||
@@ -796,7 +832,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for id in child_lookups {
|
||||
self.drop_lookup_and_children(id);
|
||||
self.drop_lookup_and_children(id, reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -814,8 +850,13 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
Ok(LookupResult::Pending) => true, // no action
|
||||
Ok(LookupResult::Completed) => {
|
||||
if let Some(lookup) = self.single_block_lookups.remove(&id) {
|
||||
debug!(block = ?lookup.block_root(), id, "Dropping completed lookup");
|
||||
debug!(
|
||||
block = ?lookup.block_root(),
|
||||
id,
|
||||
"Dropping completed lookup"
|
||||
);
|
||||
metrics::inc_counter(&metrics::SYNC_LOOKUP_COMPLETED);
|
||||
self.metrics.completed_lookups += 1;
|
||||
// Block imported, continue the requests of pending child blocks
|
||||
self.continue_child_lookups(lookup.block_root(), cx);
|
||||
self.update_metrics();
|
||||
@@ -829,8 +870,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
Err(LookupRequestError::UnknownLookup) => false,
|
||||
Err(error) => {
|
||||
debug!(id, source, ?error, "Dropping lookup on request error");
|
||||
metrics::inc_counter_vec(&metrics::SYNC_LOOKUP_DROPPED, &[error.into()]);
|
||||
self.drop_lookup_and_children(id);
|
||||
self.drop_lookup_and_children(id, error.into());
|
||||
self.update_metrics();
|
||||
false
|
||||
}
|
||||
@@ -897,7 +937,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
%block_root,
|
||||
"Dropping lookup with no peers"
|
||||
);
|
||||
self.drop_lookup_and_children(lookup_id);
|
||||
self.drop_lookup_and_children(lookup_id, "no_peers");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -946,7 +986,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
}
|
||||
|
||||
metrics::inc_counter(&metrics::SYNC_LOOKUPS_STUCK);
|
||||
self.drop_lookup_and_children(ancestor_stuck_lookup.id);
|
||||
self.drop_lookup_and_children(ancestor_stuck_lookup.id, "lookup_stuck");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1022,3 +1062,10 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub(crate) struct BlockLookupsMetrics {
|
||||
pub created_lookups: usize,
|
||||
pub dropped_lookups: usize,
|
||||
pub completed_lookups: usize,
|
||||
}
|
||||
|
||||
@@ -109,6 +109,12 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset the status of all internal requests
|
||||
pub fn reset_requests(&mut self) {
|
||||
self.block_request_state = BlockRequestState::new(self.block_root);
|
||||
self.component_requests = ComponentRequests::WaitingForBlock;
|
||||
}
|
||||
|
||||
/// Return the slot of this lookup's block if it's currently cached as `AwaitingProcessing`
|
||||
pub fn peek_downloaded_block_slot(&self) -> Option<Slot> {
|
||||
self.block_request_state
|
||||
|
||||
@@ -70,6 +70,7 @@ use slot_clock::SlotClock;
|
||||
use std::ops::Sub;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use strum::IntoStaticStr;
|
||||
use tokio::sync::mpsc;
|
||||
use tracing::{debug, error, info, trace};
|
||||
use types::{
|
||||
@@ -90,7 +91,7 @@ pub const SLOT_IMPORT_TOLERANCE: usize = 32;
|
||||
/// arbitrary number that covers a full slot, but allows recovery if sync get stuck for a few slots.
|
||||
const NOTIFIED_UNKNOWN_ROOT_EXPIRY_SECONDS: u64 = 30;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, IntoStaticStr)]
|
||||
/// A message that can be sent to the sync manager thread.
|
||||
pub enum SyncMessage<E: EthSpec> {
|
||||
/// A useful peer has been discovered.
|
||||
@@ -323,17 +324,18 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn active_single_lookups(&self) -> Vec<super::block_lookups::BlockLookupSummary> {
|
||||
self.block_lookups.active_single_lookups()
|
||||
pub(crate) fn send_sync_message(&mut self, sync_message: SyncMessage<<T>::EthSpec>) {
|
||||
self.network.send_sync_message(sync_message);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn active_parent_lookups(&self) -> Vec<Vec<Hash256>> {
|
||||
self.block_lookups
|
||||
.active_parent_lookups()
|
||||
.iter()
|
||||
.map(|c| c.chain.clone())
|
||||
.collect()
|
||||
pub(crate) fn block_lookups(&self) -> &BlockLookups<T> {
|
||||
&self.block_lookups
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn range_sync(&self) -> &RangeSync<T> {
|
||||
&self.range_sync
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -512,17 +514,18 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
/// there is no way to guarantee that libp2p always emits a error along with
|
||||
/// the disconnect.
|
||||
fn peer_disconnect(&mut self, peer_id: &PeerId) {
|
||||
// Inject a Disconnected error on all requests associated with the disconnected peer
|
||||
// to retry all batches/lookups
|
||||
for sync_request_id in self.network.peer_disconnected(peer_id) {
|
||||
self.inject_error(*peer_id, sync_request_id, RPCError::Disconnected);
|
||||
}
|
||||
|
||||
// Remove peer from all data structures
|
||||
self.range_sync.peer_disconnect(&mut self.network, peer_id);
|
||||
let _ = self.backfill_sync.peer_disconnected(peer_id);
|
||||
self.block_lookups.peer_disconnected(peer_id);
|
||||
|
||||
// Inject a Disconnected error on all requests associated with the disconnected peer
|
||||
// to retry all batches/lookups. Only after removing the peer from the data structures to
|
||||
// avoid sending retry requests to the disconnecting peer.
|
||||
for sync_request_id in self.network.peer_disconnected(peer_id) {
|
||||
self.inject_error(*peer_id, sync_request_id, RPCError::Disconnected);
|
||||
}
|
||||
|
||||
// Regardless of the outcome, we update the sync status.
|
||||
self.update_sync_state();
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ use crate::sync::block_lookups::SingleLookupId;
|
||||
use crate::sync::block_sidecar_coupling::CouplingError;
|
||||
use crate::sync::network_context::requests::BlobsByRootSingleBlockRequest;
|
||||
use crate::sync::range_data_column_batch_request::RangeDataColumnBatchRequest;
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::block_verification_types::{AsBlock, RpcBlock};
|
||||
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
|
||||
use custody::CustodyRequestResult;
|
||||
use fnv::FnvHashMap;
|
||||
@@ -1095,13 +1095,14 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
||||
})?;
|
||||
|
||||
// Include only the blob indexes not yet imported (received through gossip)
|
||||
let custody_indexes_to_fetch = self
|
||||
let mut custody_indexes_to_fetch = self
|
||||
.chain
|
||||
.sampling_columns_for_epoch(current_epoch)
|
||||
.iter()
|
||||
.copied()
|
||||
.filter(|index| !custody_indexes_imported.contains(index))
|
||||
.collect::<Vec<_>>();
|
||||
custody_indexes_to_fetch.sort_unstable();
|
||||
|
||||
if custody_indexes_to_fetch.is_empty() {
|
||||
// No indexes required, do not issue any request
|
||||
@@ -1595,7 +1596,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
||||
)
|
||||
.map_err(|_| SendErrorProcessor::SendError)?;
|
||||
|
||||
debug!(block = ?block_root, id, "Sending block for processing");
|
||||
debug!(block = ?block_root, block_slot = %block.slot(), id, "Sending block for processing");
|
||||
// Lookup sync event safety: If `beacon_processor.send_rpc_beacon_block` returns Ok() sync
|
||||
// must receive a single `SyncMessage::BlockComponentProcessed` with this process type
|
||||
beacon_processor
|
||||
|
||||
@@ -198,7 +198,14 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> CustodyRequestResult<T::EthSpec> {
|
||||
let _guard = self.span.clone().entered();
|
||||
if self.column_requests.values().all(|r| r.is_downloaded()) {
|
||||
let total_requests = self.column_requests.len();
|
||||
let completed_requests = self
|
||||
.column_requests
|
||||
.values()
|
||||
.filter(|r| r.is_downloaded())
|
||||
.count();
|
||||
|
||||
if completed_requests >= total_requests {
|
||||
// All requests have completed successfully.
|
||||
let mut peers = HashMap::<PeerId, Vec<usize>>::new();
|
||||
let mut seen_timestamps = vec![];
|
||||
@@ -222,6 +229,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
|
||||
let active_request_count_by_peer = cx.active_request_count_by_peer();
|
||||
let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
|
||||
let mut columns_without_peers = vec![];
|
||||
let lookup_peers = self.lookup_peers.read();
|
||||
// Create deterministic hasher per request to ensure consistent peer ordering within
|
||||
// this request (avoiding fragmentation) while varying selection across different requests
|
||||
@@ -256,6 +264,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
return Err(Error::NoPeer(*column_index));
|
||||
} else {
|
||||
// Do not issue requests if there is no custody peer on this column
|
||||
columns_without_peers.push(*column_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -270,10 +279,13 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
lookup_peers = lookup_peers.len(),
|
||||
"Requesting {} columns from {} peers", columns_requested_count, peer_requests,
|
||||
);
|
||||
} else {
|
||||
} else if !columns_without_peers.is_empty() {
|
||||
debug!(
|
||||
lookup_peers = lookup_peers.len(),
|
||||
"No column peers found for look up",
|
||||
total_requests,
|
||||
completed_requests,
|
||||
?columns_without_peers,
|
||||
"No column peers found for lookup",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -288,7 +300,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
},
|
||||
// If peer is in the lookup peer set, it claims to have imported the block and
|
||||
// must have its columns in custody. In that case, set `true = enforce max_requests`
|
||||
// and downscore if data_columns_by_root does not returned the expected custody
|
||||
// and downscore if data_columns_by_root does not return the expected custody
|
||||
// columns. For the rest of peers, don't downscore if columns are missing.
|
||||
lookup_peers.contains(&peer_id),
|
||||
)
|
||||
|
||||
@@ -41,6 +41,13 @@ pub enum RangeSyncState {
|
||||
pub type SyncChainStatus =
|
||||
Result<Option<(RangeSyncType, Slot /* from */, Slot /* to */)>, &'static str>;
|
||||
|
||||
#[cfg(test)]
|
||||
#[derive(Default, Debug)]
|
||||
pub struct ChainCollectionMetrics {
|
||||
pub chains_added: usize,
|
||||
pub chains_removed: usize,
|
||||
}
|
||||
|
||||
/// A collection of finalized and head chains currently being processed.
|
||||
pub struct ChainCollection<T: BeaconChainTypes> {
|
||||
/// The beacon chain for processing.
|
||||
@@ -51,6 +58,9 @@ pub struct ChainCollection<T: BeaconChainTypes> {
|
||||
head_chains: FnvHashMap<ChainId, SyncingChain<T>>,
|
||||
/// The current sync state of the process.
|
||||
state: RangeSyncState,
|
||||
#[cfg(test)]
|
||||
/// Used for testing assertions
|
||||
metrics: ChainCollectionMetrics,
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
@@ -60,12 +70,23 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
finalized_chains: FnvHashMap::default(),
|
||||
head_chains: FnvHashMap::default(),
|
||||
state: RangeSyncState::Idle,
|
||||
#[cfg(test)]
|
||||
metrics: <_>::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn metrics(&self) -> &ChainCollectionMetrics {
|
||||
&self.metrics
|
||||
}
|
||||
|
||||
/// Updates the Syncing state of the collection after a chain is removed.
|
||||
fn on_chain_removed(&mut self, id: &ChainId, was_syncing: bool, sync_type: RangeSyncType) {
|
||||
metrics::inc_counter_vec(&metrics::SYNCING_CHAINS_REMOVED, &[sync_type.as_str()]);
|
||||
#[cfg(test)]
|
||||
{
|
||||
self.metrics.chains_removed += 1;
|
||||
}
|
||||
self.update_metrics();
|
||||
|
||||
match self.state {
|
||||
@@ -510,6 +531,10 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
);
|
||||
collection.insert(id, new_chain);
|
||||
metrics::inc_counter_vec(&metrics::SYNCING_CHAINS_ADDED, &[sync_type.as_str()]);
|
||||
#[cfg(test)]
|
||||
{
|
||||
self.metrics.chains_added += 1;
|
||||
}
|
||||
self.update_metrics();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,6 +98,11 @@ where
|
||||
self.failed_chains.keys().copied().collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn metrics(&self) -> &super::chain_collection::ChainCollectionMetrics {
|
||||
self.chains.metrics()
|
||||
}
|
||||
|
||||
pub fn state(&self) -> SyncChainStatus {
|
||||
self.chains.state()
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,13 +1,19 @@
|
||||
use crate::NetworkMessage;
|
||||
use crate::sync::SyncMessage;
|
||||
use crate::sync::block_lookups::BlockLookupsMetrics;
|
||||
use crate::sync::manager::SyncManager;
|
||||
use crate::sync::range_sync::RangeSyncType;
|
||||
use crate::sync::tests::lookups::SimulateConfig;
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::builder::Witness;
|
||||
use beacon_chain::custody_context::NodeCustodyType;
|
||||
use beacon_chain::test_utils::{BeaconChainHarness, EphemeralHarnessType};
|
||||
use beacon_processor::WorkEvent;
|
||||
use lighthouse_network::NetworkGlobals;
|
||||
use lighthouse_network::rpc::RequestType;
|
||||
use lighthouse_network::service::api_types::{AppRequestId, Id};
|
||||
use lighthouse_network::{NetworkGlobals, PeerId};
|
||||
use rand_chacha::ChaCha20Rng;
|
||||
use slot_clock::ManualSlotClock;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::Write;
|
||||
use std::sync::{Arc, Once};
|
||||
@@ -16,7 +22,7 @@ use tokio::sync::mpsc;
|
||||
use tracing_subscriber::fmt::MakeWriter;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::util::SubscriberInitExt;
|
||||
use types::{ForkName, MinimalEthSpec as E};
|
||||
use types::{ForkName, Hash256, MinimalEthSpec as E, Slot};
|
||||
|
||||
mod lookups;
|
||||
mod range;
|
||||
@@ -58,6 +64,8 @@ struct TestRig {
|
||||
network_rx_queue: Vec<NetworkMessage<E>>,
|
||||
/// Receiver for `SyncMessage` from the network
|
||||
sync_rx: mpsc::UnboundedReceiver<SyncMessage<E>>,
|
||||
/// Stores all `SyncMessage`s received from `sync_rx`
|
||||
sync_rx_queue: Vec<SyncMessage<E>>,
|
||||
/// To send `SyncMessage`. For sending RPC responses or block processing results to sync.
|
||||
sync_manager: SyncManager<T>,
|
||||
/// To manipulate sync state and peer connection status
|
||||
@@ -68,6 +76,65 @@ struct TestRig {
|
||||
rng_08: rand_chacha_03::ChaCha20Rng,
|
||||
rng: ChaCha20Rng,
|
||||
fork_name: ForkName,
|
||||
/// Blocks that will be used in the test but may not be known to `harness` yet.
|
||||
network_blocks_by_root: HashMap<Hash256, RpcBlock<E>>,
|
||||
network_blocks_by_slot: HashMap<Slot, RpcBlock<E>>,
|
||||
penalties: Vec<ReportedPenalty>,
|
||||
/// All seen lookups through the test run
|
||||
seen_lookups: HashMap<Id, SeenLookup>,
|
||||
/// Registry of all requests done by the test
|
||||
requests: Vec<(RequestType<E>, AppRequestId)>,
|
||||
/// Persistent config on how to complete request
|
||||
complete_strategy: SimulateConfig,
|
||||
/// Metrics values to allow a reset
|
||||
initial_block_lookups_metrics: BlockLookupsMetrics,
|
||||
/// Fulu test type
|
||||
fulu_test_type: FuluTestType,
|
||||
}
|
||||
|
||||
enum FuluTestType {
|
||||
WeSupernodeThemSupernode,
|
||||
WeSupernodeThemFullnodes,
|
||||
WeFullnodeThemSupernode,
|
||||
WeFullnodeThemFullnodes,
|
||||
}
|
||||
|
||||
impl FuluTestType {
|
||||
fn we_node_custody_type(&self) -> NodeCustodyType {
|
||||
match self {
|
||||
Self::WeSupernodeThemSupernode | Self::WeSupernodeThemFullnodes => {
|
||||
NodeCustodyType::Supernode
|
||||
}
|
||||
Self::WeFullnodeThemSupernode | Self::WeFullnodeThemFullnodes => {
|
||||
NodeCustodyType::Fullnode
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn them_node_custody_type(&self) -> NodeCustodyType {
|
||||
match self {
|
||||
Self::WeSupernodeThemSupernode | Self::WeFullnodeThemSupernode => {
|
||||
NodeCustodyType::Supernode
|
||||
}
|
||||
Self::WeSupernodeThemFullnodes | Self::WeFullnodeThemFullnodes => {
|
||||
NodeCustodyType::Fullnode
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SeenLookup {
|
||||
/// Lookup's Id
|
||||
id: Id,
|
||||
block_root: Hash256,
|
||||
seen_peers: HashSet<PeerId>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ReportedPenalty {
|
||||
pub peer_id: PeerId,
|
||||
pub msg: &'static str,
|
||||
}
|
||||
|
||||
// Environment variable to read if `fork_from_env` feature is enabled.
|
||||
|
||||
@@ -185,7 +185,7 @@ impl TestRig {
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn expect_chain_segments(&mut self, count: usize) {
|
||||
fn assert_chain_segments(&mut self, count: usize) {
|
||||
for i in 0..count {
|
||||
self.pop_received_processor_event(|ev| {
|
||||
(ev.work_type() == beacon_processor::WorkType::ChainSegment).then_some(())
|
||||
@@ -235,7 +235,7 @@ impl TestRig {
|
||||
panic!("Should have a BlocksByRange request, filter {request_filter:?}: {e:?}")
|
||||
});
|
||||
|
||||
let by_range_data_requests = if self.after_fulu() {
|
||||
let by_range_data_requests = if self.is_after_fulu() {
|
||||
let mut data_columns_requests = vec![];
|
||||
while let Ok(data_columns_request) = self.pop_received_network_event(|ev| match ev {
|
||||
NetworkMessage::SendRequest {
|
||||
@@ -254,7 +254,7 @@ impl TestRig {
|
||||
panic!("Found zero DataColumnsByRange requests, filter {request_filter:?}");
|
||||
}
|
||||
ByRangeDataRequestIds::PostPeerDAS(data_columns_requests)
|
||||
} else if self.after_deneb() {
|
||||
} else if self.is_after_deneb() {
|
||||
let (id, peer) = self
|
||||
.pop_received_network_event(|ev| match ev {
|
||||
NetworkMessage::SendRequest {
|
||||
@@ -489,7 +489,7 @@ fn build_rpc_block(
|
||||
fn head_chain_removed_while_finalized_syncing() {
|
||||
// NOTE: this is a regression test.
|
||||
// Added in PR https://github.com/sigp/lighthouse/pull/2821
|
||||
let mut rig = TestRig::test_setup();
|
||||
let mut rig = TestRig::default();
|
||||
|
||||
// Get a peer with an advanced head
|
||||
let head_peer = rig.add_head_peer();
|
||||
@@ -514,11 +514,11 @@ fn head_chain_removed_while_finalized_syncing() {
|
||||
async fn state_update_while_purging() {
|
||||
// NOTE: this is a regression test.
|
||||
// Added in PR https://github.com/sigp/lighthouse/pull/2827
|
||||
let mut rig = TestRig::test_setup_with_custody_type(NodeCustodyType::SemiSupernode);
|
||||
let mut rig = TestRig::with_custody_type(NodeCustodyType::SemiSupernode);
|
||||
|
||||
// Create blocks on a separate harness
|
||||
// SemiSupernode ensures enough columns are stored for sampling + custody RPC block validation
|
||||
let mut rig_2 = TestRig::test_setup_with_custody_type(NodeCustodyType::SemiSupernode);
|
||||
let mut rig_2 = TestRig::with_custody_type(NodeCustodyType::SemiSupernode);
|
||||
// Need to create blocks that can be inserted into the fork-choice and fit the "known
|
||||
// conditions" below.
|
||||
let head_peer_block = rig_2.create_canonical_block().await;
|
||||
@@ -550,7 +550,7 @@ async fn state_update_while_purging() {
|
||||
|
||||
#[test]
|
||||
fn pause_and_resume_on_ee_offline() {
|
||||
let mut rig = TestRig::test_setup();
|
||||
let mut rig = TestRig::default();
|
||||
|
||||
// add some peers
|
||||
let peer1 = rig.add_head_peer();
|
||||
@@ -559,7 +559,7 @@ fn pause_and_resume_on_ee_offline() {
|
||||
// send the response to the request
|
||||
rig.find_and_complete_blocks_by_range_request(filter().peer(peer1).epoch(0));
|
||||
// the beacon processor shouldn't have received any work
|
||||
rig.expect_empty_processor();
|
||||
rig.assert_empty_processor();
|
||||
|
||||
// while the ee is offline, more peers might arrive. Add a new finalized peer.
|
||||
let _peer2 = rig.add_finalized_peer();
|
||||
@@ -570,14 +570,14 @@ fn pause_and_resume_on_ee_offline() {
|
||||
// epoch for the other batch. So we can either filter by epoch of by sync type.
|
||||
rig.find_and_complete_blocks_by_range_request(filter().epoch(0));
|
||||
// the beacon processor shouldn't have received any work
|
||||
rig.expect_empty_processor();
|
||||
rig.assert_empty_processor();
|
||||
// make the beacon processor available again.
|
||||
// update_execution_engine_state implicitly calls resume
|
||||
// now resume range, we should have two processing requests in the beacon processor.
|
||||
rig.update_execution_engine_state(EngineState::Online);
|
||||
|
||||
// The head chain and finalized chain (2) should be in the processing queue
|
||||
rig.expect_chain_segments(2);
|
||||
rig.assert_chain_segments(2);
|
||||
}
|
||||
|
||||
/// To attempt to finalize the peer's status finalized checkpoint we synced to its finalized epoch +
|
||||
@@ -587,7 +587,7 @@ const EXTRA_SYNCED_EPOCHS: u64 = 2 + 1;
|
||||
#[test]
|
||||
fn finalized_sync_enough_global_custody_peers_few_chain_peers() {
|
||||
// Run for all forks
|
||||
let mut r = TestRig::test_setup();
|
||||
let mut r = TestRig::default();
|
||||
|
||||
let advanced_epochs: u64 = 2;
|
||||
let remote_info = r.finalized_remote_info_advanced_by(advanced_epochs.into());
|
||||
@@ -604,7 +604,7 @@ fn finalized_sync_enough_global_custody_peers_few_chain_peers() {
|
||||
|
||||
#[test]
|
||||
fn finalized_sync_not_enough_custody_peers_on_start() {
|
||||
let mut r = TestRig::test_setup();
|
||||
let mut r = TestRig::default();
|
||||
// Only run post-PeerDAS
|
||||
if !r.fork_name.fulu_enabled() {
|
||||
return;
|
||||
@@ -621,7 +621,7 @@ fn finalized_sync_not_enough_custody_peers_on_start() {
|
||||
// Because we don't have enough peers on all columns we haven't sent any request.
|
||||
// NOTE: There's a small chance that this single peer happens to custody exactly the set we
|
||||
// expect, in that case the test will fail. Find a way to make the test deterministic.
|
||||
r.expect_empty_network();
|
||||
r.assert_empty_network();
|
||||
|
||||
// Generate enough peers and supernodes to cover all custody columns
|
||||
let peer_count = 100;
|
||||
|
||||
Reference in New Issue
Block a user