Hierarchical state diffs in hot DB (#6750)

This PR implements https://github.com/sigp/lighthouse/pull/5978 (tree-states) but on the hot DB. It allows Lighthouse to massively reduce its disk footprint during non-finality and overall I/O in all cases. Closes https://github.com/sigp/lighthouse/issues/6580 Conga into https://github.com/sigp/lighthouse/pull/6744 ### TODOs - [x] Fix OOM in CI https://github.com/sigp/lighthouse/pull/7176 - [x] optimise store_hot_state to avoid storing a duplicate state if the summary already exists (should be safe from races now that pruning is cleaner) - [x] mispelled: get_ancenstor_state_root - [x] get_ancestor_state_root should use state summaries - [x] Prevent split from changing during ancestor calc - [x] Use same hierarchy for hot and cold ### TODO Good optimization for future PRs - [ ] On the migration, if the latest hot snapshot is aligned with the cold snapshot migrate the diffs instead of the full states. ``` align slot time 10485760 Nov-26-2024 12582912 Sep-14-2025 14680064 Jul-02-2026 ``` ### TODO Maybe things good to have - [ ] Rename anchor_slot https://github.com/sigp/lighthouse/compare/tree-states-hot-rebase-oom...dapplion:lighthouse:tree-states-hot-anchor-slot-rename?expand=1 - [ ] Make anchor fields not public such that they must be mutated through a method. To prevent un-wanted changes of the anchor_slot ### NOTTODO - [ ] Use fork-choice and a new method [`descendants_of_checkpoint`](ca2388e196 (diff-046fbdb517ca16b80e4464c2c824cf001a74a0a94ac0065e635768ac391062a8)) to filter only the state summaries that descend of finalized checkpoint]
2026-05-30 20:57:10 +00:00 · 2025-06-19 04:43:25 +02:00
parent 6786b9d12a
commit dd98534158
33 changed files with 2695 additions and 812 deletions
--- a/beacon_node/store/src/state_cache.rs
+++ b/beacon_node/store/src/state_cache.rs
@@ -1,4 +1,8 @@
-use crate::Error;
+use crate::hdiff::HDiffBuffer;
+use crate::{
+    metrics::{self, HOT_METRIC},
+    Error,
+};
 use lru::LruCache;
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::num::NonZeroUsize;
@@ -37,26 +41,53 @@ pub struct StateCache<E: EthSpec> {
    // the state_root
    states: LruCache<Hash256, (Hash256, BeaconState<E>)>,
    block_map: BlockMap,
+    hdiff_buffers: HotHDiffBufferCache,
    max_epoch: Epoch,
    head_block_root: Hash256,
    headroom: NonZeroUsize,
 }

+/// Cache of hdiff buffers for hot states.
+///
+/// This cache only keeps buffers prior to the finalized state, which are required by the
+/// hierarchical state diff scheme to construct newer unfinalized states.
+///
+/// The cache always retains the hdiff buffer for the most recent snapshot so that even if the
+/// cache capacity is 1, this snapshot never needs to be loaded from disk.
+#[derive(Debug)]
+pub struct HotHDiffBufferCache {
+    /// Cache of HDiffBuffers for states *prior* to the `finalized_state`.
+    ///
+    /// Maps state_root -> (slot, buffer).
+    hdiff_buffers: LruCache<Hash256, (Slot, HDiffBuffer)>,
+}
+
 #[derive(Debug)]
 pub enum PutStateOutcome {
+    /// State is prior to the cache's finalized state (lower slot) and was cached as an HDiffBuffer.
+    PreFinalizedHDiffBuffer,
+    /// State is equal to the cache's finalized state and was not inserted.
    Finalized,
+    /// State was already present in the cache.
    Duplicate,
-    /// Includes deleted states as a result of this insertion
+    /// State is new to the cache and was inserted.
+    ///
+    /// Includes deleted states as a result of this insertion.
    New(Vec<Hash256>),
 }

 #[allow(clippy::len_without_is_empty)]
 impl<E: EthSpec> StateCache<E> {
-    pub fn new(capacity: NonZeroUsize, headroom: NonZeroUsize) -> Self {
+    pub fn new(
+        state_capacity: NonZeroUsize,
+        headroom: NonZeroUsize,
+        hdiff_capacity: NonZeroUsize,
+    ) -> Self {
        StateCache {
            finalized_state: None,
-            states: LruCache::new(capacity),
+            states: LruCache::new(state_capacity),
            block_map: BlockMap::default(),
+            hdiff_buffers: HotHDiffBufferCache::new(hdiff_capacity),
            max_epoch: Epoch::new(0),
            head_block_root: Hash256::ZERO,
            headroom,
@@ -71,11 +102,20 @@ impl<E: EthSpec> StateCache<E> {
        self.states.cap().get()
    }

+    pub fn num_hdiff_buffers(&self) -> usize {
+        self.hdiff_buffers.len()
+    }
+
+    pub fn hdiff_buffer_mem_usage(&self) -> usize {
+        self.hdiff_buffers.mem_usage()
+    }
+
    pub fn update_finalized_state(
        &mut self,
        state_root: Hash256,
        block_root: Hash256,
        state: BeaconState<E>,
+        pre_finalized_slots_to_retain: &[Slot],
    ) -> Result<(), Error> {
        if state.slot() % E::slots_per_epoch() != 0 {
            return Err(Error::FinalizedStateUnaligned);
@@ -95,9 +135,31 @@ impl<E: EthSpec> StateCache<E> {
        // Prune block map.
        let state_roots_to_prune = self.block_map.prune(state.slot());

+        // Prune HDiffBuffers that are no longer required by the hdiff grid of the finalized state.
+        // We need to do this prior to copying in any new hdiff buffers, because the cache
+        // preferences older slots.
+        // NOTE: This isn't perfect as it prunes by slot: there could be multiple buffers
+        // at some slots in the case of long forks without finality.
+        let new_hdiff_cache = HotHDiffBufferCache::new(self.hdiff_buffers.cap());
+        let old_hdiff_cache = std::mem::replace(&mut self.hdiff_buffers, new_hdiff_cache);
+        for (state_root, (slot, buffer)) in old_hdiff_cache.hdiff_buffers {
+            if pre_finalized_slots_to_retain.contains(&slot) {
+                self.hdiff_buffers.put(state_root, slot, buffer);
+            }
+        }
+
        // Delete states.
        for state_root in state_roots_to_prune {
-            self.states.pop(&state_root);
+            if let Some((_, state)) = self.states.pop(&state_root) {
+                // Add the hdiff buffer for this state to the hdiff cache if it is now part of
+                // the pre-finalized grid. The `put` method will take care of keeping the most
+                // useful buffers.
+                let slot = state.slot();
+                if pre_finalized_slots_to_retain.contains(&slot) {
+                    let hdiff_buffer = HDiffBuffer::from_state(state);
+                    self.hdiff_buffers.put(state_root, slot, hdiff_buffer);
+                }
+            }
        }

        // Update finalized state.
@@ -136,12 +198,19 @@ impl<E: EthSpec> StateCache<E> {
        block_root: Hash256,
        state: &BeaconState<E>,
    ) -> Result<PutStateOutcome, Error> {
-        if self
-            .finalized_state
-            .as_ref()
-            .is_some_and(|finalized_state| finalized_state.state_root == state_root)
-        {
-            return Ok(PutStateOutcome::Finalized);
+        if let Some(ref finalized_state) = self.finalized_state {
+            if finalized_state.state_root == state_root {
+                return Ok(PutStateOutcome::Finalized);
+            } else if state.slot() <= finalized_state.state.slot() {
+                // We assume any state being inserted into the cache is grid-aligned (it is the
+                // caller's responsibility to not feed us garbage) as we don't want to thread the
+                // hierarchy config through here. So any state received is converted to an
+                // HDiffBuffer and saved.
+                let hdiff_buffer = HDiffBuffer::from_state(state.clone());
+                self.hdiff_buffers
+                    .put(state_root, state.slot(), hdiff_buffer);
+                return Ok(PutStateOutcome::PreFinalizedHDiffBuffer);
+            }
        }

        if self.states.peek(&state_root).is_some() {
@@ -192,6 +261,37 @@ impl<E: EthSpec> StateCache<E> {
        self.states.get(&state_root).map(|(_, state)| state.clone())
    }

+    pub fn put_hdiff_buffer(&mut self, state_root: Hash256, slot: Slot, buffer: &HDiffBuffer) {
+        // Only accept HDiffBuffers prior to finalization. Later states should be stored as proper
+        // states, not HDiffBuffers.
+        if let Some(finalized_state) = &self.finalized_state {
+            if slot >= finalized_state.state.slot() {
+                return;
+            }
+        }
+        self.hdiff_buffers.put(state_root, slot, buffer.clone());
+    }
+
+    pub fn get_hdiff_buffer_by_state_root(&mut self, state_root: Hash256) -> Option<HDiffBuffer> {
+        if let Some(buffer) = self.hdiff_buffers.get(&state_root) {
+            metrics::inc_counter_vec(&metrics::STORE_BEACON_HDIFF_BUFFER_CACHE_HIT, HOT_METRIC);
+            let timer =
+                metrics::start_timer_vec(&metrics::BEACON_HDIFF_BUFFER_CLONE_TIME, HOT_METRIC);
+            let result = Some(buffer.clone());
+            drop(timer);
+            return result;
+        }
+        if let Some(buffer) = self
+            .get_by_state_root(state_root)
+            .map(HDiffBuffer::from_state)
+        {
+            metrics::inc_counter_vec(&metrics::STORE_BEACON_HDIFF_BUFFER_CACHE_HIT, HOT_METRIC);
+            return Some(buffer);
+        }
+        metrics::inc_counter_vec(&metrics::STORE_BEACON_HDIFF_BUFFER_CACHE_MISS, HOT_METRIC);
+        None
+    }
+
    pub fn get_by_block_root(
        &mut self,
        block_root: Hash256,
@@ -325,3 +425,80 @@ impl BlockMap {
        self.blocks.remove(block_root)
    }
 }
+
+impl HotHDiffBufferCache {
+    pub fn new(capacity: NonZeroUsize) -> Self {
+        Self {
+            hdiff_buffers: LruCache::new(capacity),
+        }
+    }
+
+    pub fn get(&mut self, state_root: &Hash256) -> Option<HDiffBuffer> {
+        self.hdiff_buffers
+            .get(state_root)
+            .map(|(_, buffer)| buffer.clone())
+    }
+
+    /// Put a value in the cache, making room for it if necessary.
+    ///
+    /// If the value was inserted then `true` is returned.
+    pub fn put(&mut self, state_root: Hash256, slot: Slot, buffer: HDiffBuffer) -> bool {
+        // If the cache is not full, simply insert the value.
+        if self.hdiff_buffers.len() != self.hdiff_buffers.cap().get() {
+            self.hdiff_buffers.put(state_root, (slot, buffer));
+            return true;
+        }
+
+        // If the cache is full, it has room for this new entry if:
+        //
+        // - The capacity is greater than 1: we can retain the snapshot and the new entry, or
+        // - The capacity is 1 and the slot of the new entry is older than the min_slot in the
+        //   cache. This is a simplified way of retaining the snapshot in the cache. We don't need
+        //   to worry about inserting/retaining states older than the snapshot because these are
+        //   pruned on finalization and never reinserted.
+        let Some(min_slot) = self.hdiff_buffers.iter().map(|(_, (slot, _))| *slot).min() else {
+            // Unreachable: cache is full so should have >0 entries.
+            return false;
+        };
+
+        if self.hdiff_buffers.cap().get() > 1 || slot < min_slot {
+            // Remove LRU value. Cache is now at size `cap - 1`.
+            let Some((removed_state_root, (removed_slot, removed_buffer))) =
+                self.hdiff_buffers.pop_lru()
+            else {
+                // Unreachable: cache is full so should have at least one entry to pop.
+                return false;
+            };
+
+            // Insert new value. Cache size is now at size `cap`.
+            self.hdiff_buffers.put(state_root, (slot, buffer));
+
+            // If the removed value had the min slot and we didn't intend to replace it (cap=1)
+            // then we reinsert it.
+            if removed_slot == min_slot && slot >= min_slot {
+                self.hdiff_buffers
+                    .put(removed_state_root, (removed_slot, removed_buffer));
+            }
+            true
+        } else {
+            // No room.
+            false
+        }
+    }
+
+    pub fn cap(&self) -> NonZeroUsize {
+        self.hdiff_buffers.cap()
+    }
+
+    #[allow(clippy::len_without_is_empty)]
+    pub fn len(&self) -> usize {
+        self.hdiff_buffers.len()
+    }
+
+    pub fn mem_usage(&self) -> usize {
+        self.hdiff_buffers
+            .iter()
+            .map(|(_, (_, buffer))| buffer.size())
+            .sum()
+    }
+}