Hierarchical state diffs in hot DB (#6750)

This PR implements https://github.com/sigp/lighthouse/pull/5978 (tree-states) but on the hot DB. It allows Lighthouse to massively reduce its disk footprint during non-finality and overall I/O in all cases. Closes https://github.com/sigp/lighthouse/issues/6580 Conga into https://github.com/sigp/lighthouse/pull/6744 ### TODOs - [x] Fix OOM in CI https://github.com/sigp/lighthouse/pull/7176 - [x] optimise store_hot_state to avoid storing a duplicate state if the summary already exists (should be safe from races now that pruning is cleaner) - [x] mispelled: get_ancenstor_state_root - [x] get_ancestor_state_root should use state summaries - [x] Prevent split from changing during ancestor calc - [x] Use same hierarchy for hot and cold ### TODO Good optimization for future PRs - [ ] On the migration, if the latest hot snapshot is aligned with the cold snapshot migrate the diffs instead of the full states. ``` align slot time 10485760 Nov-26-2024 12582912 Sep-14-2025 14680064 Jul-02-2026 ``` ### TODO Maybe things good to have - [ ] Rename anchor_slot https://github.com/sigp/lighthouse/compare/tree-states-hot-rebase-oom...dapplion:lighthouse:tree-states-hot-anchor-slot-rename?expand=1 - [ ] Make anchor fields not public such that they must be mutated through a method. To prevent un-wanted changes of the anchor_slot ### NOTTODO - [ ] Use fork-choice and a new method [`descendants_of_checkpoint`](ca2388e196 (diff-046fbdb517ca16b80e4464c2c824cf001a74a0a94ac0065e635768ac391062a8)) to filter only the state summaries that descend of finalized checkpoint]
2026-04-18 21:38:31 +00:00 · 2025-06-19 04:43:25 +02:00
parent 6786b9d12a
commit dd98534158
33 changed files with 2695 additions and 812 deletions
--- a/beacon_node/beacon_chain/src/beacon_chain.rs
+++ b/beacon_node/beacon_chain/src/beacon_chain.rs
@@ -124,7 +124,7 @@ use std::time::Duration;
 use store::iter::{BlockRootsIterator, ParentRootBlockIterator, StateRootsIterator};
 use store::{
    BlobSidecarListFromRoot, DatabaseBlock, Error as DBError, HotColdDB, HotStateSummary,
-    KeyValueStore, KeyValueStoreOp, StoreItem, StoreOp,
+    KeyValueStoreOp, StoreItem, StoreOp,
 };
 use task_executor::{ShutdownReason, TaskExecutor};
 use tokio_stream::Stream;
@@ -4043,8 +4043,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
        ops.push(StoreOp::PutBlock(block_root, signed_block.clone()));
        ops.push(StoreOp::PutState(block.state_root(), &state));

-        let txn_lock = self.store.hot_db.begin_rw_transaction();
-
        if let Err(e) = self.store.do_atomically_with_block_and_blobs_cache(ops) {
            error!(
                msg = "Restoring fork choice from disk",
@@ -4056,7 +4054,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
                .err()
                .unwrap_or(e.into()));
        }
-        drop(txn_lock);

        // The fork choice write-lock is dropped *after* the on-disk database has been updated.
        // This prevents inconsistency between the two at the expense of concurrency.
@@ -6851,13 +6848,22 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
    #[allow(clippy::type_complexity)]
    pub fn chain_dump(
        &self,
+    ) -> Result<Vec<BeaconSnapshot<T::EthSpec, BlindedPayload<T::EthSpec>>>, Error> {
+        self.chain_dump_from_slot(Slot::new(0))
+    }
+
+    /// As for `chain_dump` but dumping only the portion of the chain newer than `from_slot`.
+    #[allow(clippy::type_complexity)]
+    pub fn chain_dump_from_slot(
+        &self,
+        from_slot: Slot,
    ) -> Result<Vec<BeaconSnapshot<T::EthSpec, BlindedPayload<T::EthSpec>>>, Error> {
        let mut dump = vec![];

        let mut prev_block_root = None;
        let mut prev_beacon_state = None;

-        for res in self.forwards_iter_block_roots(Slot::new(0))? {
+        for res in self.forwards_iter_block_roots(from_slot)? {
            let (beacon_block_root, _) = res?;

            // Do not include snapshots at skipped slots.