Don't penalize peers for extending ignored chains (#8042)

Lookup sync has a cache of block roots "failed_chains". If a peer triggers a lookup for a block or descendant of a root in failed_chains the lookup is dropped and the peer penalized. However blocks are inserted into failed_chains for a single reason:
- If a chain is longer than 32 blocks the lookup is dropped to prevent OOM risks. However the peer is not at fault, since discovering an unknown chain longer than 32 blocks is not malicious. We just drop the lookup to sync the blocks from range forward sync.

This discrepancy is probably an oversight when changing old code. Before we used to add blocks that failed too many times to process to that cache. However, we don't do that anymore. Adding a block that fails too many times to process is an optimization to save resources in rare cases where peers keep sending us invalid blocks. In case that happens, today we keep trying to process the block, downscoring the peers and eventually disconnecting them. _IF_ we found that optimization to be necessary we should merge this PR (_Stricter match of BlockError in lookup sync_) first. IMO we are fine without the failed_chains cache and the ignored_chains cache will be obsolete with [tree sync](https://github.com/sigp/lighthouse/issues/7678) as the OOM risk of long lookup chains does not exist anymore.

Closes https://github.com/sigp/lighthouse/issues/7577


  Rename `failed_chains` for `ignored_chains` and don't penalize peers that trigger lookups for those blocks


Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com>
This commit is contained in:
Lion - dapplion
2025-09-17 03:02:29 +02:00
committed by GitHub
parent 191570e4a1
commit b7d78a91e0
3 changed files with 50 additions and 55 deletions

View File

@@ -59,7 +59,7 @@ mod single_block_lookup;
/// reaches the maximum depth it will force trigger range sync.
pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE;
const FAILED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60;
const IGNORED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60;
pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 4;
/// Maximum time we allow a lookup to exist before assuming it is stuck and will never make
@@ -110,8 +110,10 @@ enum Action {
}
pub struct BlockLookups<T: BeaconChainTypes> {
/// A cache of failed chain lookups to prevent duplicate searches.
failed_chains: LRUTimeCache<Hash256>,
/// A cache of block roots that must be ignored for some time to prevent useless searches. For
/// example if a chain is too long, its lookup chain is dropped, and range sync is expected to
/// eventually sync those blocks
ignored_chains: LRUTimeCache<Hash256>,
// TODO: Why not index lookups by block_root?
single_block_lookups: FnvHashMap<SingleLookupId, SingleBlockLookup<T>>,
@@ -128,21 +130,21 @@ pub(crate) type BlockLookupSummary = (Id, Hash256, Option<Hash256>, Vec<PeerId>)
impl<T: BeaconChainTypes> BlockLookups<T> {
pub fn new() -> Self {
Self {
failed_chains: LRUTimeCache::new(Duration::from_secs(
FAILED_CHAINS_CACHE_EXPIRY_SECONDS,
ignored_chains: LRUTimeCache::new(Duration::from_secs(
IGNORED_CHAINS_CACHE_EXPIRY_SECONDS,
)),
single_block_lookups: Default::default(),
}
}
#[cfg(test)]
pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) {
self.failed_chains.insert(block_root);
pub(crate) fn insert_ignored_chain(&mut self, block_root: Hash256) {
self.ignored_chains.insert(block_root);
}
#[cfg(test)]
pub(crate) fn get_failed_chains(&mut self) -> Vec<Hash256> {
self.failed_chains.keys().cloned().collect()
pub(crate) fn get_ignored_chains(&mut self) -> Vec<Hash256> {
self.ignored_chains.keys().cloned().collect()
}
#[cfg(test)]
@@ -184,7 +186,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
self.search_parent_of_child(parent_root, block_root, &[peer_id], cx);
// Only create the child lookup if the parent exists
if parent_lookup_exists {
// `search_parent_of_child` ensures that parent root is not a failed chain
// `search_parent_of_child` ensures that the parent lookup exists so we can safely wait for it
self.new_current_lookup(
block_root,
Some(block_component),
@@ -244,8 +246,8 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
debug!(block_root = ?block_root_to_search, "Parent lookup chain too long");
// Searching for this parent would extend a parent chain over the max
// Insert the tip only to failed chains
self.failed_chains.insert(parent_chain.tip);
// Insert the tip only to chains to ignore
self.ignored_chains.insert(parent_chain.tip);
// Note: Drop only the chain that's too long until it merges with another chain
// that's not too long. Consider this attack: there's a chain of valid unknown
@@ -330,12 +332,9 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
peers: &[PeerId],
cx: &mut SyncNetworkContext<T>,
) -> bool {
// If this block or it's parent is part of a known failed chain, ignore it.
if self.failed_chains.contains(&block_root) {
debug!(?block_root, "Block is from a past failed chain. Dropping");
for peer_id in peers {
cx.report_peer(*peer_id, PeerAction::MidToleranceError, "failed_chain");
}
// If this block or it's parent is part of a known ignored chain, ignore it.
if self.ignored_chains.contains(&block_root) {
debug!(?block_root, "Dropping lookup for block marked ignored");
return false;
}

View File

@@ -328,13 +328,13 @@ impl<T: BeaconChainTypes> SyncManager<T> {
}
#[cfg(test)]
pub(crate) fn get_failed_chains(&mut self) -> Vec<Hash256> {
self.block_lookups.get_failed_chains()
pub(crate) fn get_ignored_chains(&mut self) -> Vec<Hash256> {
self.block_lookups.get_ignored_chains()
}
#[cfg(test)]
pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) {
self.block_lookups.insert_failed_chain(block_root);
pub(crate) fn insert_ignored_chain(&mut self, block_root: Hash256) {
self.block_lookups.insert_ignored_chain(block_root);
}
#[cfg(test)]

View File

@@ -285,21 +285,21 @@ impl TestRig {
);
}
fn insert_failed_chain(&mut self, block_root: Hash256) {
self.sync_manager.insert_failed_chain(block_root);
fn insert_ignored_chain(&mut self, block_root: Hash256) {
self.sync_manager.insert_ignored_chain(block_root);
}
fn assert_not_failed_chain(&mut self, chain_hash: Hash256) {
let failed_chains = self.sync_manager.get_failed_chains();
if failed_chains.contains(&chain_hash) {
panic!("failed chains contain {chain_hash:?}: {failed_chains:?}");
fn assert_not_ignored_chain(&mut self, chain_hash: Hash256) {
let chains = self.sync_manager.get_ignored_chains();
if chains.contains(&chain_hash) {
panic!("ignored chains contain {chain_hash:?}: {chains:?}");
}
}
fn assert_failed_chain(&mut self, chain_hash: Hash256) {
let failed_chains = self.sync_manager.get_failed_chains();
if !failed_chains.contains(&chain_hash) {
panic!("expected failed chains to contain {chain_hash:?}: {failed_chains:?}");
fn assert_ignored_chain(&mut self, chain_hash: Hash256) {
let chains = self.sync_manager.get_ignored_chains();
if !chains.contains(&chain_hash) {
panic!("expected ignored chains to contain {chain_hash:?}: {chains:?}");
}
}
@@ -1021,11 +1021,6 @@ impl TestRig {
self.log(&format!("Found expected penalty {penalty_msg}"));
}
pub fn expect_single_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) {
self.expect_penalty(peer_id, expect_penalty_msg);
self.expect_no_penalty_for(peer_id);
}
pub fn block_with_parent_and_blobs(
&mut self,
parent_root: Hash256,
@@ -1461,7 +1456,7 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() {
// Trigger the request
rig.trigger_unknown_parent_block(peer_id, block.into());
for i in 1..=PARENT_FAIL_TOLERANCE {
rig.assert_not_failed_chain(block_root);
rig.assert_not_ignored_chain(block_root);
let id = rig.expect_block_parent_request(parent_root);
if i % 2 != 0 {
// The request fails. It should be tried again.
@@ -1474,8 +1469,8 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() {
}
}
rig.assert_not_failed_chain(block_root);
rig.assert_not_failed_chain(parent.canonical_root());
rig.assert_not_ignored_chain(block_root);
rig.assert_not_ignored_chain(parent.canonical_root());
rig.expect_no_active_lookups_empty_network();
}
@@ -1500,7 +1495,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() {
for _ in 0..PROCESSING_FAILURES {
let id = rig.expect_block_parent_request(parent_root);
// Blobs are only requested in the previous first iteration as this test only retries blocks
rig.assert_not_failed_chain(block_root);
rig.assert_not_ignored_chain(block_root);
// send the right parent but fail processing
rig.parent_lookup_block_response(id, peer_id, Some(parent.clone().into()));
rig.parent_block_processed(block_root, BlockError::BlockSlotLimitReached.into());
@@ -1508,7 +1503,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() {
rig.expect_penalty(peer_id, "lookup_block_processing_failure");
}
rig.assert_not_failed_chain(block_root);
rig.assert_not_ignored_chain(block_root);
rig.expect_no_active_lookups_empty_network();
}
@@ -1551,12 +1546,14 @@ fn test_parent_lookup_too_deep_grow_ancestor() {
);
// Should not penalize peer, but network is not clear because of the blocks_by_range requests
rig.expect_no_penalty_for(peer_id);
rig.assert_failed_chain(chain_hash);
rig.assert_ignored_chain(chain_hash);
}
// Regression test for https://github.com/sigp/lighthouse/pull/7118
// 8042 UPDATE: block was previously added to the failed_chains cache, now it's inserted into the
// ignored chains cache. The regression test still applies as the chaild lookup is not created
#[test]
fn test_child_lookup_not_created_for_failed_chain_parent_after_processing() {
fn test_child_lookup_not_created_for_ignored_chain_parent_after_processing() {
// GIVEN: A parent chain longer than PARENT_DEPTH_TOLERANCE.
let mut rig = TestRig::test_setup();
let mut blocks = rig.rand_blockchain(PARENT_DEPTH_TOLERANCE + 1);
@@ -1586,8 +1583,8 @@ fn test_child_lookup_not_created_for_failed_chain_parent_after_processing() {
}
// At this point, the chain should have been deemed too deep and pruned.
// The tip root should have been inserted into failed chains.
rig.assert_failed_chain(tip_root);
// The tip root should have been inserted into ignored chains.
rig.assert_ignored_chain(tip_root);
rig.expect_no_penalty_for(peer_id);
// WHEN: Trigger the extending block that points to the tip.
@@ -1604,10 +1601,10 @@ fn test_child_lookup_not_created_for_failed_chain_parent_after_processing() {
}),
);
// THEN: The extending block should not create a lookup because the tip was inserted into failed chains.
// THEN: The extending block should not create a lookup because the tip was inserted into
// ignored chains.
rig.expect_no_active_lookups();
// AND: The peer should be penalized for extending a failed chain.
rig.expect_single_penalty(peer_id, "failed_chain");
rig.expect_no_penalty_for(peer_id);
rig.expect_empty_network();
}
@@ -1646,7 +1643,7 @@ fn test_parent_lookup_too_deep_grow_tip() {
);
// Should not penalize peer, but network is not clear because of the blocks_by_range requests
rig.expect_no_penalty_for(peer_id);
rig.assert_failed_chain(tip.canonical_root());
rig.assert_ignored_chain(tip.canonical_root());
}
#[test]
@@ -1699,15 +1696,14 @@ fn test_lookup_add_peers_to_parent() {
}
#[test]
fn test_skip_creating_failed_parent_lookup() {
fn test_skip_creating_ignored_parent_lookup() {
let mut rig = TestRig::test_setup();
let (_, block, parent_root, _) = rig.rand_block_and_parent();
let peer_id = rig.new_connected_peer();
rig.insert_failed_chain(parent_root);
rig.insert_ignored_chain(parent_root);
rig.trigger_unknown_parent_block(peer_id, block.into());
// Expect single penalty for peer, despite dropping two lookups
rig.expect_single_penalty(peer_id, "failed_chain");
// Both current and parent lookup should be rejected
rig.expect_no_penalty_for(peer_id);
// Both current and parent lookup should not be created
rig.expect_no_active_lookups();
}