Add AttesterCache for attestation production (#2478)

## Issue Addressed - Resolves #2169 ## Proposed Changes Adds the `AttesterCache` to allow validators to produce attestations for older slots. Presently, some arbitrary restrictions can force validators to receive an error when attesting to a slot earlier than the present one. This can cause attestation misses when there is excessive load on the validator client or time sync issues between the VC and BN. ## Additional Info NA
2026-04-18 05:18:30 +00:00 · 2021-07-29 04:38:26 +00:00
parent 1d4f90e2eb
commit 8efd9fc324
9 changed files with 663 additions and 58 deletions
--- a/beacon_node/beacon_chain/src/beacon_chain.rs
+++ b/beacon_node/beacon_chain/src/beacon_chain.rs
@@ -2,6 +2,7 @@ use crate::attestation_verification::{
    Error as AttestationError, SignatureVerifiedAttestation, VerifiedAggregatedAttestation,
    VerifiedUnaggregatedAttestation,
 };
+use crate::attester_cache::{AttesterCache, AttesterCacheKey};
 use crate::beacon_proposer_cache::BeaconProposerCache;
 use crate::block_verification::{
    check_block_is_finalized_descendant, check_block_relevancy, get_block_root,
@@ -289,6 +290,8 @@ pub struct BeaconChain<T: BeaconChainTypes> {
    pub beacon_proposer_cache: Mutex<BeaconProposerCache>,
    /// Caches a map of `validator_index -> validator_pubkey`.
    pub(crate) validator_pubkey_cache: TimeoutRwLock<ValidatorPubkeyCache<T>>,
+    /// A cache used when producing attestations.
+    pub(crate) attester_cache: Arc<AttesterCache>,
    /// A list of any hard-coded forks that have been disabled.
    pub disabled_forks: Vec<String>,
    /// Sender given to tasks, so that if they encounter a state in which execution cannot
@@ -1217,44 +1220,174 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
    /// validator that is in the committee for `slot` and `index` in the canonical chain.
    ///
    /// Always attests to the canonical chain.
+    ///
+    /// ## Errors
+    ///
+    /// May return an error if the `request_slot` is too far behind the head state.
    pub fn produce_unaggregated_attestation(
        &self,
-        slot: Slot,
-        index: CommitteeIndex,
+        request_slot: Slot,
+        request_index: CommitteeIndex,
    ) -> Result<Attestation<T::EthSpec>, Error> {
-        // Note: we're taking a lock on the head. The work involved here should be trivial enough
-        // that the lock should not be held for long.
-        let head = self
-            .canonical_head
-            .try_read_for(HEAD_LOCK_TIMEOUT)
-            .ok_or(Error::CanonicalHeadLockTimeout)?;
+        let _total_timer = metrics::start_timer(&metrics::ATTESTATION_PRODUCTION_SECONDS);

-        if slot >= head.beacon_block.slot() {
-            self.produce_unaggregated_attestation_for_block(
-                slot,
-                index,
-                head.beacon_block_root,
-                Cow::Borrowed(&head.beacon_state),
-                head.beacon_state_root(),
-            )
+        let slots_per_epoch = T::EthSpec::slots_per_epoch();
+        let request_epoch = request_slot.epoch(slots_per_epoch);
+
+        /*
+         * Phase 1/2:
+         *
+         * Take a short-lived read-lock on the head and copy the necessary information from it.
+         *
+         * It is important that this first phase is as quick as possible; creating contention for
+         * the head-lock is not desirable.
+         */
+
+        let head_state_slot;
+        let beacon_block_root;
+        let beacon_state_root;
+        let target;
+        let current_epoch_attesting_info: Option<(Checkpoint, usize)>;
+        let attester_cache_key;
+        let head_timer = metrics::start_timer(&metrics::ATTESTATION_PRODUCTION_HEAD_SCRAPE_SECONDS);
+        if let Some(head) = self.canonical_head.try_read_for(HEAD_LOCK_TIMEOUT) {
+            let head_state = &head.beacon_state;
+            head_state_slot = head_state.slot();
+
+            // There is no value in producing an attestation to a block that is pre-finalization and
+            // it is likely to cause expensive and pointless reads to the freezer database. Exit
+            // early if this is the case.
+            let finalized_slot = head_state
+                .finalized_checkpoint()
+                .epoch
+                .start_slot(slots_per_epoch);
+            if request_slot < finalized_slot {
+                return Err(Error::AttestingToFinalizedSlot {
+                    finalized_slot,
+                    request_slot,
+                });
+            }
+
+            // This function will eventually fail when trying to access a slot which is
+            // out-of-bounds of `state.block_roots`. This explicit error is intended to provide a
+            // clearer message to the user than an ambiguous `SlotOutOfBounds` error.
+            let slots_per_historical_root = T::EthSpec::slots_per_historical_root() as u64;
+            let lowest_permissible_slot =
+                head_state.slot().saturating_sub(slots_per_historical_root);
+            if request_slot < lowest_permissible_slot {
+                return Err(Error::AttestingToAncientSlot {
+                    lowest_permissible_slot,
+                    request_slot,
+                });
+            }
+
+            if request_slot >= head_state.slot() {
+                // When attesting to the head slot or later, always use the head of the chain.
+                beacon_block_root = head.beacon_block_root;
+                beacon_state_root = head.beacon_state_root();
+            } else {
+                // Permit attesting to slots *prior* to the current head. This is desirable when
+                // the VC and BN are out-of-sync due to time issues or overloading.
+                beacon_block_root = *head_state.get_block_root(request_slot)?;
+                beacon_state_root = *head_state.get_state_root(request_slot)?;
+            };
+
+            let target_slot = request_epoch.start_slot(T::EthSpec::slots_per_epoch());
+            let target_root = if head_state.slot() <= target_slot {
+                // If the state is earlier than the target slot then the target *must* be the head
+                // block root.
+                beacon_block_root
+            } else {
+                *head_state.get_block_root(target_slot)?
+            };
+            target = Checkpoint {
+                epoch: request_epoch,
+                root: target_root,
+            };
+
+            current_epoch_attesting_info = if head_state.current_epoch() == request_epoch {
+                // When the head state is in the same epoch as the request, all the information
+                // required to attest is available on the head state.
+                Some((
+                    head_state.current_justified_checkpoint(),
+                    head_state
+                        .get_beacon_committee(request_slot, request_index)?
+                        .committee
+                        .len(),
+                ))
+            } else {
+                // If the head state is in a *different* epoch to the request, more work is required
+                // to determine the justified checkpoint and committee length.
+                None
+            };
+
+            // Determine the key for `self.attester_cache`, in case it is required later in this
+            // routine.
+            attester_cache_key =
+                AttesterCacheKey::new(request_epoch, head_state, beacon_block_root)?;
        } else {
-            // We disallow producing attestations *prior* to the current head since such an
-            // attestation would require loading a `BeaconState` from disk. Loading `BeaconState`
-            // from disk is very resource intensive and proposes a DoS risk from validator clients.
-            //
-            // Although we generally allow validator clients to do things that might harm us (i.e.,
-            // we trust them), sometimes we need to protect the BN from accidental errors which
-            // could cause it significant harm.
-            //
-            // This case is particularity harmful since the HTTP API can effectively call this
-            // function an unlimited amount of times. If `n` validators all happen to call it at
-            // the same time, we're going to load `n` states (and tree hash caches) into memory all
-            // at once. With `n >= 10` we're looking at hundreds of MB or GBs of RAM.
-            Err(Error::AttestingPriorToHead {
-                head_slot: head.beacon_block.slot(),
-                request_slot: slot,
-            })
+            return Err(Error::CanonicalHeadLockTimeout);
        }
+        drop(head_timer);
+
+        /*
+         *  Phase 2/2:
+         *
+         *  If the justified checkpoint and committee length from the head are suitable for this
+         *  attestation, use them. If not, try the attester cache. If the cache misses, load a state
+         *  from disk and prime the cache with it.
+         */
+
+        let cache_timer =
+            metrics::start_timer(&metrics::ATTESTATION_PRODUCTION_CACHE_INTERACTION_SECONDS);
+        let (justified_checkpoint, committee_len) =
+            if let Some((justified_checkpoint, committee_len)) = current_epoch_attesting_info {
+                // The head state is in the same epoch as the attestation, so there is no more
+                // required information.
+                (justified_checkpoint, committee_len)
+            } else if let Some(cached_values) = self.attester_cache.get::<T::EthSpec>(
+                &attester_cache_key,
+                request_slot,
+                request_index,
+                &self.spec,
+            )? {
+                // The suitable values were already cached. Return them.
+                cached_values
+            } else {
+                debug!(
+                    self.log,
+                    "Attester cache miss";
+                    "beacon_block_root" => ?beacon_block_root,
+                    "head_state_slot" => %head_state_slot,
+                    "request_slot" => %request_slot,
+                );
+
+                // Neither the head state, nor the attester cache was able to produce the required
+                // information to attest in this epoch. So, load a `BeaconState` from disk and use
+                // it to fulfil the request (and prime the cache to avoid this next time).
+                let _cache_build_timer =
+                    metrics::start_timer(&metrics::ATTESTATION_PRODUCTION_CACHE_PRIME_SECONDS);
+                self.attester_cache.load_and_cache_state(
+                    beacon_state_root,
+                    attester_cache_key,
+                    request_slot,
+                    request_index,
+                    &self,
+                )?
+            };
+        drop(cache_timer);
+
+        Ok(Attestation {
+            aggregation_bits: BitList::with_capacity(committee_len)?,
+            data: AttestationData {
+                slot: request_slot,
+                index: request_index,
+                beacon_block_root,
+                source: justified_checkpoint,
+                target,
+            },
+            signature: AggregateSignature::empty(),
+        })
    }

    /// Produces an "unaggregated" attestation for the given `slot` and `index` that attests to
@@ -2023,6 +2156,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
        let block_root = fully_verified_block.block_root;
        let mut state = fully_verified_block.state;
        let current_slot = self.slot()?;
+        let current_epoch = current_slot.epoch(T::EthSpec::slots_per_epoch());
        let mut ops = fully_verified_block.confirmation_db_batch;

        let attestation_observation_timer =
@@ -2086,6 +2220,17 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
            }
        }

+        // Apply the state to the attester cache, only if it is from the previous epoch or later.
+        //
+        // In a perfect scenario there should be no need to add previous-epoch states to the cache.
+        // However, latency between the VC and the BN might cause the VC to produce attestations at
+        // a previous slot.
+        if state.current_epoch().saturating_add(1_u64) >= current_epoch {
+            self.attester_cache
+                .maybe_cache_state(&state, block_root, &self.spec)
+                .map_err(BeaconChainError::from)?;
+        }
+
        let mut fork_choice = self.fork_choice.write();

        // Do not import a block that doesn't descend from the finalized root.
@@ -2936,6 +3081,9 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
            self.head_tracker.clone(),
        )?;

+        self.attester_cache
+            .prune_below(new_finalized_checkpoint.epoch);
+
        if let Some(event_handler) = self.event_handler.as_ref() {
            if event_handler.has_finalized_subscribers() {
                event_handler.register(EventKind::FinalizedCheckpoint(SseFinalizedCheckpoint {