keep failed finalized chains to avoid retries (#3142)

## Issue Addressed In very rare occasions we've seen most if not all our peers in a chain with which we don't agree. Purging these peers can take a very long time: number of retries of the chain. Meanwhile sync is caught in a loop trying the chain again and again. This makes it so that we fast track purging peers via registering the failed chain to prevent retrying for some time (30 seconds). Longer times could be dangerous since a chain can fail if a batch fails to download for example. In this case, I think it's still acceptable to fast track purging peers since they are nor providing the required info anyway Co-authored-by: Divma <26765164+divagant-martian@users.noreply.github.com>
2026-03-19 12:56:12 +00:00 · 2022-04-13 01:10:55 +00:00
parent aa72088f8f
commit 7366266bd1
6 changed files with 114 additions and 148 deletions
--- a/beacon_node/network/src/sync/block_lookups/mod.rs
+++ b/beacon_node/network/src/sync/block_lookups/mod.rs
@@ -4,7 +4,7 @@ use std::time::Duration;
 use beacon_chain::{BeaconChainTypes, BlockError};
 use fnv::FnvHashMap;
 use lighthouse_network::{PeerAction, PeerId};
-use lru_cache::LRUCache;
+use lru_cache::LRUTimeCache;
 use slog::{crit, debug, error, trace, warn, Logger};
 use smallvec::SmallVec;
 use store::{Hash256, SignedBeaconBlock};
@@ -29,7 +29,7 @@ mod single_block_lookup;
 #[cfg(test)]
 mod tests;

-const FAILED_CHAINS_CACHE_SIZE: usize = 500;
+const FAILED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60;
 const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 3;

 pub(crate) struct BlockLookups<T: BeaconChainTypes> {
@@ -37,7 +37,7 @@ pub(crate) struct BlockLookups<T: BeaconChainTypes> {
    parent_queue: SmallVec<[ParentLookup<T::EthSpec>; 3]>,

    /// A cache of failed chain lookups to prevent duplicate searches.
-    failed_chains: LRUCache<Hash256>,
+    failed_chains: LRUTimeCache<Hash256>,

    /// A collection of block hashes being searched for and a flag indicating if a result has been
    /// received or not.
@@ -56,7 +56,9 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
    pub fn new(beacon_processor_send: mpsc::Sender<WorkEvent<T>>, log: Logger) -> Self {
        Self {
            parent_queue: Default::default(),
-            failed_chains: LRUCache::new(FAILED_CHAINS_CACHE_SIZE),
+            failed_chains: LRUTimeCache::new(Duration::from_secs(
+                FAILED_CHAINS_CACHE_EXPIRY_SECONDS,
+            )),
            single_block_lookups: Default::default(),
            beacon_processor_send,
            log,
@@ -218,7 +220,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
            return;
        };

-        match parent_lookup.verify_block(block, &self.failed_chains) {
+        match parent_lookup.verify_block(block, &mut self.failed_chains) {
            Ok(Some(block)) => {
                // Block is correct, send to the beacon processor.
                let chain_hash = parent_lookup.chain_hash();
--- a/beacon_node/network/src/sync/block_lookups/parent_lookup.rs
+++ b/beacon_node/network/src/sync/block_lookups/parent_lookup.rs
@@ -117,7 +117,7 @@ impl<T: EthSpec> ParentLookup<T> {
    pub fn verify_block(
        &mut self,
        block: Option<Box<SignedBeaconBlock<T>>>,
-        failed_chains: &lru_cache::LRUCache<Hash256>,
+        failed_chains: &mut lru_cache::LRUTimeCache<Hash256>,
    ) -> Result<Option<Box<SignedBeaconBlock<T>>>, VerifyError> {
        let block = self.current_parent_request.verify_block(block)?;