mirror of
https://github.com/sigp/lighthouse.git
synced 2026-05-31 21:27:12 +00:00
Tree-sync friendly lookup sync tests (#8592)
- Step 0 of the tree-sync roadmap https://github.com/sigp/lighthouse/issues/7678
Current lookup sync tests are written in an explicit way that assume how the internals of lookup sync work. For example the test would do:
- Emit unknown block parent message
- Expect block request for X
- Respond with successful block request
- Expect block processing request for X
- Response with successful processing request
- etc..
This is unnecessarily verbose. And it will requires a complete re-write when something changes in the internals of lookup sync (has happened a few times, mostly for deneb and fulu).
What we really want to assert is:
- WHEN: we receive an unknown block parent message
- THEN: Lookup sync can sync that block
- ASSERT: Without penalizing peers, without unnecessary retries
Keep all existing tests and add new cases but written in the new style described above. The logic to serve and respond to request is in this function `fn simulate` 2288a3aeb1/beacon_node/network/src/sync/tests/lookups.rs (L301)
- It controls peer behavior based on a `CompleteStrategy` where you can set for example "respond to BlocksByRoot requests with empty"
- It actually runs beacon processor messages running their clousures. Now sync tests actually import blocks, increasing the test coverage to the interaction of sync and the da_checker.
- To achieve the above the tests create real blocks with the test harness. To make the tests as fast as before, I disabled crypto with `TestConfig`
Along the way I found a couple bugs, which I documented on the diff.
Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com>
This commit is contained in:
@@ -121,15 +121,24 @@ pub struct BlockLookups<T: BeaconChainTypes> {
|
||||
|
||||
// TODO: Why not index lookups by block_root?
|
||||
single_block_lookups: FnvHashMap<SingleLookupId, SingleBlockLookup<T>>,
|
||||
|
||||
/// Used for testing assertions
|
||||
metrics: BlockLookupsMetrics,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
|
||||
#[cfg(test)]
|
||||
/// Tuple of `SingleLookupId`, requested block root, awaiting parent block root (if any),
|
||||
/// and list of peers that claim to have imported this set of block components.
|
||||
pub(crate) type BlockLookupSummary = (Id, Hash256, Option<Hash256>, Vec<PeerId>);
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct BlockLookupSummary {
|
||||
/// Lookup ID
|
||||
pub id: Id,
|
||||
/// Requested block root
|
||||
pub block_root: Hash256,
|
||||
/// List of peers that claim to have imported this set of block components.
|
||||
pub peers: Vec<PeerId>,
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
pub fn new() -> Self {
|
||||
@@ -138,9 +147,15 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
IGNORED_CHAINS_CACHE_EXPIRY_SECONDS,
|
||||
)),
|
||||
single_block_lookups: Default::default(),
|
||||
metrics: <_>::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn metrics(&self) -> &BlockLookupsMetrics {
|
||||
&self.metrics
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn insert_ignored_chain(&mut self, block_root: Hash256) {
|
||||
self.ignored_chains.insert(block_root);
|
||||
@@ -155,7 +170,11 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
pub(crate) fn active_single_lookups(&self) -> Vec<BlockLookupSummary> {
|
||||
self.single_block_lookups
|
||||
.iter()
|
||||
.map(|(id, l)| (*id, l.block_root(), l.awaiting_parent(), l.all_peers()))
|
||||
.map(|(id, l)| BlockLookupSummary {
|
||||
id: *id,
|
||||
block_root: l.block_root(),
|
||||
peers: l.all_peers(),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -306,7 +325,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
// attributability. A peer can send us garbage blocks over blocks_by_root, and
|
||||
// then correct blocks via blocks_by_range.
|
||||
|
||||
self.drop_lookup_and_children(*lookup_id);
|
||||
self.drop_lookup_and_children(*lookup_id, "chain_too_long");
|
||||
} else {
|
||||
// Should never happen
|
||||
error!(
|
||||
@@ -414,6 +433,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
"Created block lookup"
|
||||
);
|
||||
metrics::inc_counter(&metrics::SYNC_LOOKUP_CREATED);
|
||||
self.metrics.created_lookups += 1;
|
||||
|
||||
let result = lookup.continue_requests(cx);
|
||||
if self.on_lookup_result(id, result, "new_current_lookup", cx) {
|
||||
@@ -513,8 +533,11 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
/* Error responses */
|
||||
|
||||
pub fn peer_disconnected(&mut self, peer_id: &PeerId) {
|
||||
for (_, lookup) in self.single_block_lookups.iter_mut() {
|
||||
for (id, lookup) in self.single_block_lookups.iter_mut() {
|
||||
lookup.remove_peer(peer_id);
|
||||
if lookup.has_no_peers() {
|
||||
debug!(%id, "Lookup has no peers");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -566,7 +589,8 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
|
||||
let action = match result {
|
||||
BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_))
|
||||
| BlockProcessingResult::Err(BlockError::DuplicateFullyImported(..)) => {
|
||||
| BlockProcessingResult::Err(BlockError::DuplicateFullyImported(..))
|
||||
| BlockProcessingResult::Err(BlockError::GenesisBlock) => {
|
||||
// Successfully imported
|
||||
request_state.on_processing_success()?;
|
||||
Action::Continue
|
||||
@@ -747,6 +771,15 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
let lookup_result = if imported {
|
||||
Ok(LookupResult::Completed)
|
||||
} else {
|
||||
// A lookup may be in the following state:
|
||||
// - Block awaiting processing from a different source
|
||||
// - Blobs downloaded processed, and inserted into the da_checker
|
||||
//
|
||||
// At this point the block fails processing (e.g. execution engine offline) and it is
|
||||
// removed from the da_checker. Note that ALL components are removed from the da_checker
|
||||
// so when we re-download and process the block we get the error
|
||||
// MissingComponentsAfterAllProcessed and get stuck.
|
||||
lookup.reset_requests();
|
||||
lookup.continue_requests(cx)
|
||||
};
|
||||
let id = *id;
|
||||
@@ -779,14 +812,17 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
/// Drops `dropped_id` lookup and all its children recursively. Lookups awaiting a parent need
|
||||
/// the parent to make progress to resolve, therefore we must drop them if the parent is
|
||||
/// dropped.
|
||||
pub fn drop_lookup_and_children(&mut self, dropped_id: SingleLookupId) {
|
||||
pub fn drop_lookup_and_children(&mut self, dropped_id: SingleLookupId, reason: &'static str) {
|
||||
if let Some(dropped_lookup) = self.single_block_lookups.remove(&dropped_id) {
|
||||
debug!(
|
||||
id = ?dropped_id,
|
||||
block_root = ?dropped_lookup.block_root(),
|
||||
awaiting_parent = ?dropped_lookup.awaiting_parent(),
|
||||
reason,
|
||||
"Dropping lookup"
|
||||
);
|
||||
metrics::inc_counter_vec(&metrics::SYNC_LOOKUP_DROPPED, &[reason]);
|
||||
self.metrics.dropped_lookups += 1;
|
||||
|
||||
let child_lookups = self
|
||||
.single_block_lookups
|
||||
@@ -796,7 +832,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for id in child_lookups {
|
||||
self.drop_lookup_and_children(id);
|
||||
self.drop_lookup_and_children(id, reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -814,8 +850,13 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
Ok(LookupResult::Pending) => true, // no action
|
||||
Ok(LookupResult::Completed) => {
|
||||
if let Some(lookup) = self.single_block_lookups.remove(&id) {
|
||||
debug!(block = ?lookup.block_root(), id, "Dropping completed lookup");
|
||||
debug!(
|
||||
block = ?lookup.block_root(),
|
||||
id,
|
||||
"Dropping completed lookup"
|
||||
);
|
||||
metrics::inc_counter(&metrics::SYNC_LOOKUP_COMPLETED);
|
||||
self.metrics.completed_lookups += 1;
|
||||
// Block imported, continue the requests of pending child blocks
|
||||
self.continue_child_lookups(lookup.block_root(), cx);
|
||||
self.update_metrics();
|
||||
@@ -829,8 +870,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
Err(LookupRequestError::UnknownLookup) => false,
|
||||
Err(error) => {
|
||||
debug!(id, source, ?error, "Dropping lookup on request error");
|
||||
metrics::inc_counter_vec(&metrics::SYNC_LOOKUP_DROPPED, &[error.into()]);
|
||||
self.drop_lookup_and_children(id);
|
||||
self.drop_lookup_and_children(id, error.into());
|
||||
self.update_metrics();
|
||||
false
|
||||
}
|
||||
@@ -897,7 +937,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
%block_root,
|
||||
"Dropping lookup with no peers"
|
||||
);
|
||||
self.drop_lookup_and_children(lookup_id);
|
||||
self.drop_lookup_and_children(lookup_id, "no_peers");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -946,7 +986,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
}
|
||||
|
||||
metrics::inc_counter(&metrics::SYNC_LOOKUPS_STUCK);
|
||||
self.drop_lookup_and_children(ancestor_stuck_lookup.id);
|
||||
self.drop_lookup_and_children(ancestor_stuck_lookup.id, "lookup_stuck");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1022,3 +1062,10 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub(crate) struct BlockLookupsMetrics {
|
||||
pub created_lookups: usize,
|
||||
pub dropped_lookups: usize,
|
||||
pub completed_lookups: usize,
|
||||
}
|
||||
|
||||
@@ -109,6 +109,12 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset the status of all internal requests
|
||||
pub fn reset_requests(&mut self) {
|
||||
self.block_request_state = BlockRequestState::new(self.block_root);
|
||||
self.component_requests = ComponentRequests::WaitingForBlock;
|
||||
}
|
||||
|
||||
/// Return the slot of this lookup's block if it's currently cached as `AwaitingProcessing`
|
||||
pub fn peek_downloaded_block_slot(&self) -> Option<Slot> {
|
||||
self.block_request_state
|
||||
|
||||
Reference in New Issue
Block a user