Implement tree states & hierarchical state DB

2026-03-23 14:54:45 +00:00 · 2023-06-19 10:14:47 +10:00
parent 2bb62b7f7d
commit 23db089a7a
193 changed files with 6093 additions and 5925 deletions
--- a/beacon_node/beacon_chain/src/state_advance_timer.rs
+++ b/beacon_node/beacon_chain/src/state_advance_timer.rs
@@ -15,9 +15,7 @@
 //! 2. There's a possibility that the head block is never built upon, causing wasted CPU cycles.
 use crate::validator_monitor::HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS;
 use crate::{
-    beacon_chain::{ATTESTATION_CACHE_LOCK_TIMEOUT, BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT},
-    chain_config::FORK_CHOICE_LOOKAHEAD_FACTOR,
-    snapshot_cache::StateAdvance,
+    beacon_chain::ATTESTATION_CACHE_LOCK_TIMEOUT, chain_config::FORK_CHOICE_LOOKAHEAD_FACTOR,
    BeaconChain, BeaconChainError, BeaconChainTypes,
 };
 use slog::{debug, error, warn, Logger};
@@ -29,7 +27,7 @@ use std::sync::{
 };
 use task_executor::TaskExecutor;
 use tokio::time::{sleep, sleep_until, Instant};
-use types::{AttestationShufflingId, EthSpec, Hash256, RelativeEpoch, Slot};
+use types::{AttestationShufflingId, BeaconStateError, EthSpec, Hash256, RelativeEpoch, Slot};

 /// If the head slot is more than `MAX_ADVANCE_DISTANCE` from the current slot, then don't perform
 /// the state advancement.
@@ -48,6 +46,8 @@ const MAX_FORK_CHOICE_DISTANCE: u64 = 256;
 #[derive(Debug)]
 enum Error {
    BeaconChain(BeaconChainError),
+    BeaconState(BeaconStateError),
+    Store(store::Error),
    HeadMissingFromSnapshotCache(Hash256),
    MaxDistanceExceeded {
        current_slot: Slot,
@@ -58,7 +58,7 @@ enum Error {
    },
    BadStateSlot {
        _state_slot: Slot,
-        _block_slot: Slot,
+        _current_slot: Slot,
    },
 }

@@ -68,6 +68,18 @@ impl From<BeaconChainError> for Error {
    }
 }

+impl From<BeaconStateError> for Error {
+    fn from(e: BeaconStateError) -> Self {
+        Self::BeaconState(e)
+    }
+}
+
+impl From<store::Error> for Error {
+    fn from(e: store::Error) -> Self {
+        Self::Store(e)
+    }
+}
+
 /// Provides a simple thread-safe lock to be used for task co-ordination. Practically equivalent to
 /// `Mutex<()>`.
 #[derive(Clone)]
@@ -263,11 +275,6 @@ async fn state_advance_timer<T: BeaconChainTypes>(
    }
 }

-/// Reads the `snapshot_cache` from the `beacon_chain` and attempts to take a clone of the
-/// `BeaconState` of the head block. If it obtains this clone, the state will be advanced a single
-/// slot then placed back in the `snapshot_cache` to be used for block verification.
-///
-/// See the module-level documentation for rationale.
 fn advance_head<T: BeaconChainTypes>(
    beacon_chain: &Arc<BeaconChain<T>>,
    log: &Logger,
@@ -290,46 +297,38 @@ fn advance_head<T: BeaconChainTypes>(
        }
    }

-    let head_root = beacon_chain.head_beacon_block_root();
-
-    let (head_slot, head_state_root, mut state) = match beacon_chain
-        .snapshot_cache
-        .try_write_for(BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT)
-        .ok_or(BeaconChainError::SnapshotCacheLockTimeout)?
-        .get_for_state_advance(head_root)
-    {
-        StateAdvance::AlreadyAdvanced => {
-            return Err(Error::StateAlreadyAdvanced {
-                block_root: head_root,
-            })
-        }
-        StateAdvance::BlockNotFound => return Err(Error::HeadMissingFromSnapshotCache(head_root)),
-        StateAdvance::State {
-            state,
-            state_root,
-            block_slot,
-        } => (block_slot, state_root, *state),
+    let (head_block_root, head_block_state_root) = {
+        let snapshot = beacon_chain.head_snapshot();
+        (snapshot.beacon_block_root, snapshot.beacon_state_root())
    };

-    let initial_slot = state.slot();
-    let initial_epoch = state.current_epoch();
+    let (head_state_root, mut state) = beacon_chain
+        .store
+        .get_advanced_state(head_block_root, current_slot, head_block_state_root)?
+        .ok_or(Error::HeadMissingFromSnapshotCache(head_block_root))?;

-    let state_root = if state.slot() == head_slot {
-        Some(head_state_root)
-    } else {
+    if state.slot() == current_slot + 1 {
+        return Err(Error::StateAlreadyAdvanced {
+            block_root: head_block_root,
+        });
+    } else if state.slot() != current_slot {
        // Protect against advancing a state more than a single slot.
        //
        // Advancing more than one slot without storing the intermediate state would corrupt the
        // database. Future works might store temporary, intermediate states inside this function.
        return Err(Error::BadStateSlot {
-            _block_slot: head_slot,
            _state_slot: state.slot(),
+            _current_slot: current_slot,
        });
-    };
+    }
+
+    let initial_slot = state.slot();
+    let initial_epoch = state.current_epoch();

    // Advance the state a single slot.
-    if let Some(summary) = per_slot_processing(&mut state, state_root, &beacon_chain.spec)
-        .map_err(BeaconChainError::from)?
+    if let Some(summary) =
+        per_slot_processing(&mut state, Some(head_state_root), &beacon_chain.spec)
+            .map_err(BeaconChainError::from)?
    {
        // Expose Prometheus metrics.
        if let Err(e) = summary.observe_metrics() {
@@ -363,7 +362,7 @@ fn advance_head<T: BeaconChainTypes>(
    debug!(
        log,
        "Advanced head state one slot";
-        "head_root" => ?head_root,
+        "head_block_root" => ?head_block_root,
        "state_slot" => state.slot(),
        "current_slot" => current_slot,
    );
@@ -382,14 +381,14 @@ fn advance_head<T: BeaconChainTypes>(
    if initial_epoch < state.current_epoch() {
        // Update the proposer cache.
        //
-        // We supply the `head_root` as the decision block since the prior `if` statement guarantees
+        // We supply the `head_block_root` as the decision block since the prior `if` statement guarantees
        // the head root is the latest block from the prior epoch.
        beacon_chain
            .beacon_proposer_cache
            .lock()
            .insert(
                state.current_epoch(),
-                head_root,
+                head_block_root,
                state
                    .get_beacon_proposer_indices(&beacon_chain.spec)
                    .map_err(BeaconChainError::from)?,
@@ -398,8 +397,9 @@ fn advance_head<T: BeaconChainTypes>(
            .map_err(BeaconChainError::from)?;

        // Update the attester cache.
-        let shuffling_id = AttestationShufflingId::new(head_root, &state, RelativeEpoch::Next)
-            .map_err(BeaconChainError::from)?;
+        let shuffling_id =
+            AttestationShufflingId::new(head_block_root, &state, RelativeEpoch::Next)
+                .map_err(BeaconChainError::from)?;
        let committee_cache = state
            .committee_cache(RelativeEpoch::Next)
            .map_err(BeaconChainError::from)?;
@@ -412,7 +412,7 @@ fn advance_head<T: BeaconChainTypes>(
        debug!(
            log,
            "Primed proposer and attester caches";
-            "head_root" => ?head_root,
+            "head_block_root" => ?head_block_root,
            "next_epoch_shuffling_root" => ?shuffling_id.shuffling_decision_block,
            "state_epoch" => state.current_epoch(),
            "current_epoch" => current_slot.epoch(T::EthSpec::slots_per_epoch()),
@@ -422,44 +422,19 @@ fn advance_head<T: BeaconChainTypes>(
    // Apply the state to the attester cache, if the cache deems it interesting.
    beacon_chain
        .attester_cache
-        .maybe_cache_state(&state, head_root, &beacon_chain.spec)
+        .maybe_cache_state(&state, head_block_root, &beacon_chain.spec)
        .map_err(BeaconChainError::from)?;

    let final_slot = state.slot();

-    // Insert the advanced state back into the snapshot cache.
-    beacon_chain
-        .snapshot_cache
-        .try_write_for(BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT)
-        .ok_or(BeaconChainError::SnapshotCacheLockTimeout)?
-        .update_pre_state(head_root, state)
-        .ok_or(Error::HeadMissingFromSnapshotCache(head_root))?;
-
-    // If we have moved into the next slot whilst processing the state then this function is going
-    // to become ineffective and likely become a hindrance as we're stealing the tree hash cache
-    // from the snapshot cache (which may force the next block to rebuild a new one).
-    //
-    // If this warning occurs very frequently on well-resourced machines then we should consider
-    // starting it earlier in the slot. Otherwise, it's a good indication that the machine is too
-    // slow/overloaded and will be useful information for the user.
-    let starting_slot = current_slot;
-    let current_slot = beacon_chain.slot()?;
-    if starting_slot < current_slot {
-        warn!(
-            log,
-            "State advance too slow";
-            "head_root" => %head_root,
-            "advanced_slot" => final_slot,
-            "current_slot" => current_slot,
-            "starting_slot" => starting_slot,
-            "msg" => "system resources may be overloaded",
-        );
-    }
+    // Write the advanced state to the database.
+    let advanced_state_root = state.update_tree_hash_cache()?;
+    beacon_chain.store.put_state(&advanced_state_root, &state)?;

    debug!(
        log,
        "Completed state advance";
-        "head_root" => ?head_root,
+        "head_block_root" => ?head_block_root,
        "advanced_slot" => final_slot,
        "initial_slot" => initial_slot,
    );