Hierarchical state diffs in hot DB (#6750)

This PR implements https://github.com/sigp/lighthouse/pull/5978 (tree-states) but on the hot DB. It allows Lighthouse to massively reduce its disk footprint during non-finality and overall I/O in all cases. Closes https://github.com/sigp/lighthouse/issues/6580 Conga into https://github.com/sigp/lighthouse/pull/6744 ### TODOs - [x] Fix OOM in CI https://github.com/sigp/lighthouse/pull/7176 - [x] optimise store_hot_state to avoid storing a duplicate state if the summary already exists (should be safe from races now that pruning is cleaner) - [x] mispelled: get_ancenstor_state_root - [x] get_ancestor_state_root should use state summaries - [x] Prevent split from changing during ancestor calc - [x] Use same hierarchy for hot and cold ### TODO Good optimization for future PRs - [ ] On the migration, if the latest hot snapshot is aligned with the cold snapshot migrate the diffs instead of the full states. ``` align slot time 10485760 Nov-26-2024 12582912 Sep-14-2025 14680064 Jul-02-2026 ``` ### TODO Maybe things good to have - [ ] Rename anchor_slot https://github.com/sigp/lighthouse/compare/tree-states-hot-rebase-oom...dapplion:lighthouse:tree-states-hot-anchor-slot-rename?expand=1 - [ ] Make anchor fields not public such that they must be mutated through a method. To prevent un-wanted changes of the anchor_slot ### NOTTODO - [ ] Use fork-choice and a new method [`descendants_of_checkpoint`](ca2388e196 (diff-046fbdb517ca16b80e4464c2c824cf001a74a0a94ac0065e635768ac391062a8)) to filter only the state summaries that descend of finalized checkpoint]
2026-05-31 13:17:09 +00:00 · 2025-06-19 04:43:25 +02:00
parent 6786b9d12a
commit dd98534158
33 changed files with 2695 additions and 812 deletions
--- a/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs
+++ b/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs
@@ -43,21 +43,26 @@ pub fn upgrade_to_v23<T: BeaconChainTypes>(
        let state_root = state_root_result?;
        debug!(
            ?state_root,
-            "Deleting temporary state flag on v23 schema migration"
+            "Deleting temporary state on v23 schema migration"
        );
        ops.push(KeyValueStoreOp::DeleteKey(
            DBColumn::BeaconStateTemporary,
            state_root.as_slice().to_vec(),
        ));
-        // Here we SHOULD delete the items for key `state_root` in columns `BeaconState` and
-        // `BeaconStateSummary`. However, in the event we have dangling temporary states at the time
-        // of the migration, the first pruning routine will prune them. They will be a tree branch /
-        // root not part of the finalized tree and trigger a warning log once.
-        //
-        // We believe there may be race conditions concerning temporary flags where a necessary
-        // canonical state is marked as temporary. In current stable, a restart with that DB will
-        // corrupt the DB. In the unlikely case this happens we choose to leave the states and
-        // allow pruning to clean them.
+
+        // We also delete the temporary states themselves. Although there are known issue with
+        // temporary states and this could lead to DB corruption, we will only corrupt the DB in
+        // cases where the DB would be corrupted by restarting on v7.0.x. We consider these DBs
+        // "too far gone". Deleting here has the advantage of not generating warnings about
+        // disjoint state DAGs in the v24 upgrade, or the first pruning after migration.
+        ops.push(KeyValueStoreOp::DeleteKey(
+            DBColumn::BeaconState,
+            state_root.as_slice().to_vec(),
+        ));
+        ops.push(KeyValueStoreOp::DeleteKey(
+            DBColumn::BeaconStateSummary,
+            state_root.as_slice().to_vec(),
+        ));
    }

    Ok(ops)
--- a/beacon_node/beacon_chain/src/schema_change/migration_schema_v24.rs
+++ b/beacon_node/beacon_chain/src/schema_change/migration_schema_v24.rs
@@ -0,0 +1,605 @@
+use crate::{
+    beacon_chain::BeaconChainTypes,
+    summaries_dag::{DAGStateSummary, DAGStateSummaryV22, StateSummariesDAG},
+};
+use ssz::{Decode, DecodeError, Encode};
+use ssz_derive::Encode;
+use std::{
+    sync::Arc,
+    time::{Duration, Instant},
+};
+use store::{
+    hdiff::StorageStrategy,
+    hot_cold_store::{HotStateSummaryV22, OptionalDiffBaseState},
+    DBColumn, Error, HotColdDB, HotStateSummary, KeyValueStore, KeyValueStoreOp, StoreItem,
+};
+use tracing::{debug, info, warn};
+use types::{
+    BeaconState, ChainSpec, Checkpoint, CommitteeCache, EthSpec, Hash256, Slot, CACHED_EPOCHS,
+};
+
+/// We stopped using the pruning checkpoint in schema v23 but never explicitly deleted it.
+///
+/// We delete it as part of the v24 migration.
+pub const PRUNING_CHECKPOINT_KEY: Hash256 = Hash256::repeat_byte(3);
+
+pub fn store_full_state_v22<E: EthSpec>(
+    state_root: &Hash256,
+    state: &BeaconState<E>,
+    ops: &mut Vec<KeyValueStoreOp>,
+) -> Result<(), Error> {
+    let bytes = StorageContainer::new(state).as_ssz_bytes();
+    ops.push(KeyValueStoreOp::PutKeyValue(
+        DBColumn::BeaconState,
+        state_root.as_slice().to_vec(),
+        bytes,
+    ));
+    Ok(())
+}
+
+/// Fetch a V22 state from the database either as a full state or using block replay.
+pub fn get_state_v22<T: BeaconChainTypes>(
+    db: &Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
+    state_root: &Hash256,
+    spec: &ChainSpec,
+) -> Result<Option<BeaconState<T::EthSpec>>, Error> {
+    let Some(summary) = db.get_item::<HotStateSummaryV22>(state_root)? else {
+        return Ok(None);
+    };
+    let Some(base_state) =
+        get_full_state_v22(&db.hot_db, &summary.epoch_boundary_state_root, spec)?
+    else {
+        return Ok(None);
+    };
+    // Loading hot states via block replay doesn't care about the schema version, so we can use
+    // the DB's current method for this.
+    let update_cache = false;
+    db.load_hot_state_using_replay(
+        base_state,
+        summary.slot,
+        summary.latest_block_root,
+        update_cache,
+    )
+    .map(Some)
+}
+
+pub fn get_full_state_v22<KV: KeyValueStore<E>, E: EthSpec>(
+    db: &KV,
+    state_root: &Hash256,
+    spec: &ChainSpec,
+) -> Result<Option<BeaconState<E>>, Error> {
+    match db.get_bytes(DBColumn::BeaconState, state_root.as_slice())? {
+        Some(bytes) => {
+            let container = StorageContainer::from_ssz_bytes(&bytes, spec)?;
+            Ok(Some(container.try_into()?))
+        }
+        None => Ok(None),
+    }
+}
+
+/// A container for storing `BeaconState` components.
+///
+/// DEPRECATED.
+#[derive(Encode)]
+pub struct StorageContainer<E: EthSpec> {
+    state: BeaconState<E>,
+    committee_caches: Vec<Arc<CommitteeCache>>,
+}
+
+impl<E: EthSpec> StorageContainer<E> {
+    /// Create a new instance for storing a `BeaconState`.
+    pub fn new(state: &BeaconState<E>) -> Self {
+        Self {
+            state: state.clone(),
+            committee_caches: state.committee_caches().to_vec(),
+        }
+    }
+
+    pub fn from_ssz_bytes(bytes: &[u8], spec: &ChainSpec) -> Result<Self, ssz::DecodeError> {
+        // We need to use the slot-switching `from_ssz_bytes` of `BeaconState`, which doesn't
+        // compose with the other SSZ utils, so we duplicate some parts of `ssz_derive` here.
+        let mut builder = ssz::SszDecoderBuilder::new(bytes);
+
+        builder.register_anonymous_variable_length_item()?;
+        builder.register_type::<Vec<CommitteeCache>>()?;
+
+        let mut decoder = builder.build()?;
+
+        let state = decoder.decode_next_with(|bytes| BeaconState::from_ssz_bytes(bytes, spec))?;
+        let committee_caches = decoder.decode_next()?;
+
+        Ok(Self {
+            state,
+            committee_caches,
+        })
+    }
+}
+
+impl<E: EthSpec> TryInto<BeaconState<E>> for StorageContainer<E> {
+    type Error = Error;
+
+    fn try_into(mut self) -> Result<BeaconState<E>, Error> {
+        let mut state = self.state;
+
+        for i in (0..CACHED_EPOCHS).rev() {
+            if i >= self.committee_caches.len() {
+                return Err(Error::SszDecodeError(DecodeError::BytesInvalid(
+                    "Insufficient committees for BeaconState".to_string(),
+                )));
+            };
+
+            state.committee_caches_mut()[i] = self.committee_caches.remove(i);
+        }
+
+        Ok(state)
+    }
+}
+
+/// The checkpoint used for pruning the database.
+///
+/// Updated whenever pruning is successful.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct PruningCheckpoint {
+    pub checkpoint: Checkpoint,
+}
+
+impl StoreItem for PruningCheckpoint {
+    fn db_column() -> DBColumn {
+        DBColumn::BeaconMeta
+    }
+
+    fn as_store_bytes(&self) -> Vec<u8> {
+        self.checkpoint.as_ssz_bytes()
+    }
+
+    fn from_store_bytes(bytes: &[u8]) -> Result<Self, Error> {
+        Ok(PruningCheckpoint {
+            checkpoint: Checkpoint::from_ssz_bytes(bytes)?,
+        })
+    }
+}
+
+pub fn upgrade_to_v24<T: BeaconChainTypes>(
+    db: Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
+) -> Result<Vec<KeyValueStoreOp>, Error> {
+    let mut migrate_ops = vec![];
+    let split = db.get_split_info();
+    let hot_hdiff_start_slot = split.slot;
+
+    // Delete the `PruningCheckpoint` (no longer used).
+    migrate_ops.push(KeyValueStoreOp::DeleteKey(
+        DBColumn::BeaconMeta,
+        PRUNING_CHECKPOINT_KEY.as_slice().to_vec(),
+    ));
+
+    // Sanity check to make sure the HDiff grid is aligned with the epoch start
+    if hot_hdiff_start_slot % T::EthSpec::slots_per_epoch() != 0 {
+        return Err(Error::MigrationError(format!(
+            "hot_hdiff_start_slot is not first slot in epoch {hot_hdiff_start_slot}"
+        )));
+    }
+
+    // After V24 hot tree states, the in-memory `anchor_info.anchor_slot` is the start slot of the
+    // hot HDiff grid. Before the migration, it's set to the slot of the anchor state in the DB:
+    // - the genesis state on a genesis sync, or
+    // - the checkpoint state on a checkpoint sync.
+    //
+    // If the node has been running for a while the `anchor_slot` might be less than the finalized
+    // checkpoint. This upgrade constructs a grid only with unfinalized states, rooted in the
+    // current finalized state. So we set the `anchor_slot` to `split.slot` to root the grid in the
+    // current finalized state. Each migration sets the split to
+    // ```
+    // Split { slot: finalized_state.slot(), state_root: finalized_state_root }
+    // ```
+    {
+        let anchor_info = db.get_anchor_info();
+
+        // If the node is already an archive node, we can set the anchor slot to 0 and copy
+        // snapshots and diffs from the freezer DB to the hot DB in order to establish an initial
+        // hot grid that is aligned/"perfect" (no `start_slot`/`anchor_slot` to worry about).
+        //
+        // This only works if all of the following are true:
+        //
+        // - We have the previous snapshot for the split state stored in the freezer DB, i.e.
+        //   if `previous_snapshot_slot >= state_upper_limit`.
+        // - The split state itself will be stored as a diff or snapshot in the new grid. We choose
+        //   not to support a split state that requires block replay, because computing its previous
+        //   state root from the DAG is not straight-forward.
+        let dummy_start_slot = Slot::new(0);
+        let closest_layer_points = db
+            .hierarchy
+            .closest_layer_points(split.slot, dummy_start_slot);
+
+        let previous_snapshot_slot =
+            closest_layer_points
+                .iter()
+                .copied()
+                .min()
+                .ok_or(Error::MigrationError(
+                    "closest_layer_points must not be empty".to_string(),
+                ))?;
+
+        if previous_snapshot_slot >= anchor_info.state_upper_limit
+            && db
+                .hierarchy
+                .storage_strategy(split.slot, dummy_start_slot)
+                .is_ok_and(|strategy| !strategy.is_replay_from())
+        {
+            info!(
+                %previous_snapshot_slot,
+                split_slot = %split.slot,
+                "Aligning hot diff grid to freezer"
+            );
+
+            // Set anchor slot to 0 in case it was set to something else by a previous checkpoint
+            // sync.
+            let mut new_anchor_info = anchor_info.clone();
+            new_anchor_info.anchor_slot = Slot::new(0);
+
+            // Update the anchor on disk atomically if migration is successful
+            migrate_ops.push(db.compare_and_set_anchor_info(anchor_info, new_anchor_info)?);
+
+            // Copy each of the freezer layers to the hot DB in slot ascending order.
+            for layer_slot in closest_layer_points.into_iter().rev() {
+                // Do not try to load the split state itself from the freezer, it won't be there.
+                // It will be migrated in the main loop below.
+                if layer_slot == split.slot {
+                    continue;
+                }
+
+                let mut freezer_state = db.load_cold_state_by_slot(layer_slot)?;
+
+                let state_root = freezer_state.canonical_root()?;
+
+                let mut state_ops = vec![];
+                db.store_hot_state(&state_root, &freezer_state, &mut state_ops)?;
+                db.hot_db.do_atomically(state_ops)?;
+            }
+        } else {
+            // Otherwise for non-archive nodes, set the anchor slot for the hot grid to the current
+            // split slot (the oldest slot available).
+            let mut new_anchor_info = anchor_info.clone();
+            new_anchor_info.anchor_slot = hot_hdiff_start_slot;
+
+            // Update the anchor in disk atomically if migration is successful
+            migrate_ops.push(db.compare_and_set_anchor_info(anchor_info, new_anchor_info)?);
+        }
+    }
+
+    let state_summaries_dag = new_dag::<T>(&db)?;
+
+    // We compute the state summaries DAG outside of a DB migration. Therefore if the DB is properly
+    // prunned, it should have a single root equal to the split.
+    let state_summaries_dag_roots = state_summaries_dag.tree_roots();
+    if state_summaries_dag_roots.len() == 1 {
+        let (root_summary_state_root, root_summary) =
+            state_summaries_dag_roots.first().expect("len == 1");
+        if *root_summary_state_root != split.state_root {
+            warn!(
+                ?root_summary_state_root,
+                ?root_summary,
+                ?split,
+                "State summaries DAG root is not the split"
+            );
+        }
+    } else {
+        warn!(
+            location = "migration",
+            state_summaries_dag_roots = ?state_summaries_dag_roots,
+            "State summaries DAG found more than one root"
+        );
+    }
+
+    // Sort summaries by slot so we have their ancestor diffs already stored when we store them.
+    // If the summaries are sorted topologically we can insert them into the DB like if they were a
+    // new state, re-using existing code. As states are likely to be sequential the diff cache
+    // should kick in making the migration more efficient. If we just iterate the column of
+    // summaries we may get distance state of each iteration.
+    let summaries_by_slot = state_summaries_dag.summaries_by_slot_ascending();
+    debug!(
+        summaries_count = state_summaries_dag.summaries_count(),
+        slots_count = summaries_by_slot.len(),
+        min_slot = ?summaries_by_slot.first_key_value().map(|(slot, _)| slot),
+        max_slot = ?summaries_by_slot.last_key_value().map(|(slot, _)| slot),
+        ?state_summaries_dag_roots,
+        %hot_hdiff_start_slot,
+        split_state_root = ?split.state_root,
+        "Starting hot states migration"
+    );
+
+    // Upgrade all hot DB state summaries to the new type:
+    // - Set all summaries of boundary states to `Snapshot` type
+    // - Set all others to `Replay` pointing to `epoch_boundary_state_root`
+
+    let mut diffs_written = 0;
+    let mut summaries_written = 0;
+    let mut last_log_time = Instant::now();
+
+    for (slot, old_hot_state_summaries) in summaries_by_slot {
+        for (state_root, old_summary) in old_hot_state_summaries {
+            if slot < hot_hdiff_start_slot {
+                // To reach here, there must be some pruning issue with the DB where we still have
+                // hot states below the split slot. This states can't be migrated as we can't compute
+                // a storage strategy for them. After this if else block, the summary and state are
+                // scheduled for deletion.
+                debug!(
+                    %slot,
+                    ?state_root,
+                    "Ignoring state summary prior to split slot"
+                );
+            } else {
+                // 1. Store snapshot or diff at this slot (if required).
+                let storage_strategy = db.hot_storage_strategy(slot)?;
+                debug!(
+                    %slot,
+                    ?state_root,
+                    ?storage_strategy,
+                    "Migrating state summary"
+                );
+
+                match storage_strategy {
+                    StorageStrategy::DiffFrom(_) | StorageStrategy::Snapshot => {
+                        // Load the state and re-store it as a snapshot or diff.
+                        let state = get_state_v22::<T>(&db, &state_root, &db.spec)?
+                            .ok_or(Error::MissingState(state_root))?;
+
+                        // Store immediately so that future diffs can load and diff from it.
+                        let mut ops = vec![];
+                        // We must commit the hot state summary immediately, otherwise we can't diff
+                        // against it and future writes will fail. That's why we write the new hot
+                        // summaries in a different column to have both new and old data present at
+                        // once. Otherwise if the process crashes during the migration the database will
+                        // be broken.
+                        db.store_hot_state_summary(&state_root, &state, &mut ops)?;
+                        db.store_hot_state_diffs(&state_root, &state, &mut ops)?;
+                        db.hot_db.do_atomically(ops)?;
+                        diffs_written += 1;
+                    }
+                    StorageStrategy::ReplayFrom(diff_base_slot) => {
+                        // Optimization: instead of having to load the state of each summary we load x32
+                        // less states by manually computing the HotStateSummary roots using the
+                        // computed state dag.
+                        //
+                        // No need to store diffs for states that will be reconstructed by replaying
+                        // blocks.
+                        //
+                        // 2. Convert the summary to the new format.
+                        if state_root == split.state_root {
+                            return Err(Error::MigrationError(
+                                "unreachable: split state should be stored as a snapshot or diff"
+                                    .to_string(),
+                            ));
+                        }
+                        let previous_state_root = state_summaries_dag
+                            .previous_state_root(state_root)
+                            .map_err(|e| {
+                                Error::MigrationError(format!(
+                                    "error computing previous_state_root {e:?}"
+                                ))
+                            })?;
+
+                        let diff_base_state = OptionalDiffBaseState::new(
+                            diff_base_slot,
+                            state_summaries_dag
+                                .ancestor_state_root_at_slot(state_root, diff_base_slot)
+                                .map_err(|e| {
+                                    Error::MigrationError(format!(
+                                        "error computing ancestor_state_root_at_slot \
+                                         ({state_root:?}, {diff_base_slot}): {e:?}"
+                                    ))
+                                })?,
+                        );
+
+                        let new_summary = HotStateSummary {
+                            slot,
+                            latest_block_root: old_summary.latest_block_root,
+                            latest_block_slot: old_summary.latest_block_slot,
+                            previous_state_root,
+                            diff_base_state,
+                        };
+                        let op = new_summary.as_kv_store_op(state_root);
+                        // It's not necessary to immediately commit the summaries of states that are
+                        // ReplayFrom. However we do so for simplicity.
+                        db.hot_db.do_atomically(vec![op])?;
+                    }
+                }
+            }
+
+            // 3. Stage old data for deletion.
+            if slot % T::EthSpec::slots_per_epoch() == 0 {
+                migrate_ops.push(KeyValueStoreOp::DeleteKey(
+                    DBColumn::BeaconState,
+                    state_root.as_slice().to_vec(),
+                ));
+            }
+
+            // Delete previous summaries
+            migrate_ops.push(KeyValueStoreOp::DeleteKey(
+                DBColumn::BeaconStateSummary,
+                state_root.as_slice().to_vec(),
+            ));
+
+            summaries_written += 1;
+            if last_log_time.elapsed() > Duration::from_secs(5) {
+                last_log_time = Instant::now();
+                info!(
+                    diffs_written,
+                    summaries_written,
+                    summaries_count = state_summaries_dag.summaries_count(),
+                    "Hot states migration in progress"
+                );
+            }
+        }
+    }
+
+    info!(
+        diffs_written,
+        summaries_written,
+        summaries_count = state_summaries_dag.summaries_count(),
+        "Hot states migration complete"
+    );
+
+    Ok(migrate_ops)
+}
+
+pub fn downgrade_from_v24<T: BeaconChainTypes>(
+    db: Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
+) -> Result<Vec<KeyValueStoreOp>, Error> {
+    let state_summaries = db
+        .load_hot_state_summaries()?
+        .into_iter()
+        .map(|(state_root, summary)| (state_root, summary.into()))
+        .collect::<Vec<(Hash256, DAGStateSummary)>>();
+
+    info!(
+        summaries_count = state_summaries.len(),
+        "DB downgrade of v24 state summaries started"
+    );
+
+    let state_summaries_dag = StateSummariesDAG::new(state_summaries)
+        .map_err(|e| Error::MigrationError(format!("Error on new StateSumariesDAG {e:?}")))?;
+
+    let mut migrate_ops = vec![];
+    let mut states_written = 0;
+    let mut summaries_written = 0;
+    let mut summaries_skipped = 0;
+    let mut last_log_time = Instant::now();
+
+    // Rebuild the PruningCheckpoint from the split.
+    let split = db.get_split_info();
+    let pruning_checkpoint = PruningCheckpoint {
+        checkpoint: Checkpoint {
+            epoch: split.slot.epoch(T::EthSpec::slots_per_epoch()),
+            root: split.block_root,
+        },
+    };
+    migrate_ops.push(pruning_checkpoint.as_kv_store_op(PRUNING_CHECKPOINT_KEY));
+
+    // Convert state summaries back to the old format.
+    for (state_root, summary) in state_summaries_dag
+        .summaries_by_slot_ascending()
+        .into_iter()
+        .flat_map(|(_, summaries)| summaries)
+    {
+        // No need to migrate any states prior to the split. The v22 schema does not need them, and
+        // they would generate warnings about a disjoint DAG when re-upgrading to V24.
+        if summary.slot < split.slot {
+            debug!(
+                slot = %summary.slot,
+                ?state_root,
+                "Skipping migration of pre-split state"
+            );
+            summaries_skipped += 1;
+            continue;
+        }
+
+        // If boundary state: persist.
+        // Do not cache these states as they are unlikely to be relevant later.
+        let update_cache = false;
+        if summary.slot % T::EthSpec::slots_per_epoch() == 0 {
+            let (state, _) = db
+                .load_hot_state(&state_root, update_cache)?
+                .ok_or(Error::MissingState(state_root))?;
+
+            // Immediately commit the state, so we don't OOM. It's stored in a different
+            // column so if the migration crashes we'll just store extra harmless junk in the DB.
+            let mut state_write_ops = vec![];
+            store_full_state_v22(&state_root, &state, &mut state_write_ops)?;
+            db.hot_db.do_atomically(state_write_ops)?;
+            states_written += 1;
+        }
+
+        // Persist old summary.
+        let epoch_boundary_state_slot = summary.slot - summary.slot % T::EthSpec::slots_per_epoch();
+        let old_summary = HotStateSummaryV22 {
+            slot: summary.slot,
+            latest_block_root: summary.latest_block_root,
+            epoch_boundary_state_root: state_summaries_dag
+                .ancestor_state_root_at_slot(state_root, epoch_boundary_state_slot)
+                .map_err(|e| {
+                    Error::MigrationError(format!(
+                        "error computing ancestor_state_root_at_slot({state_root:?}, {epoch_boundary_state_slot}) {e:?}"
+                    ))
+                })?,
+        };
+        migrate_ops.push(KeyValueStoreOp::PutKeyValue(
+            DBColumn::BeaconStateSummary,
+            state_root.as_slice().to_vec(),
+            old_summary.as_ssz_bytes(),
+        ));
+        summaries_written += 1;
+
+        if last_log_time.elapsed() > Duration::from_secs(5) {
+            last_log_time = Instant::now();
+            info!(
+                states_written,
+                summaries_written,
+                summaries_count = state_summaries_dag.summaries_count(),
+                "DB downgrade of v24 state summaries in progress"
+            );
+        }
+    }
+
+    // Delete all V24 schema data. We do this outside the loop over summaries to ensure we cover
+    // every piece of data and to simplify logic around skipping certain summaries that do not get
+    // migrated.
+    for db_column in [
+        DBColumn::BeaconStateHotSummary,
+        DBColumn::BeaconStateHotDiff,
+        DBColumn::BeaconStateHotSnapshot,
+    ] {
+        for key in db.hot_db.iter_column_keys::<Hash256>(db_column) {
+            let state_root = key?;
+            migrate_ops.push(KeyValueStoreOp::DeleteKey(
+                db_column,
+                state_root.as_slice().to_vec(),
+            ));
+        }
+    }
+
+    info!(
+        states_written,
+        summaries_written,
+        summaries_skipped,
+        summaries_count = state_summaries_dag.summaries_count(),
+        "DB downgrade of v24 state summaries completed"
+    );
+
+    Ok(migrate_ops)
+}
+
+fn new_dag<T: BeaconChainTypes>(
+    db: &HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>,
+) -> Result<StateSummariesDAG, Error> {
+    // Collect all sumaries for unfinalized states
+    let state_summaries_v22 = db
+        .hot_db
+        // Collect summaries from the legacy V22 column BeaconStateSummary
+        .iter_column::<Hash256>(DBColumn::BeaconStateSummary)
+        .map(|res| {
+            let (key, value) = res?;
+            let state_root: Hash256 = key;
+            let summary = HotStateSummaryV22::from_ssz_bytes(&value)?;
+            let block_root = summary.latest_block_root;
+            // Read blocks to get the block slot and parent root. In Holesky forced finalization it
+            // took 5100 ms to read 15072 state summaries, so it's not really necessary to
+            // de-duplicate block reads.
+            let block = db
+                .get_blinded_block(&block_root)?
+                .ok_or(Error::MissingBlock(block_root))?;
+
+            Ok((
+                state_root,
+                DAGStateSummaryV22 {
+                    slot: summary.slot,
+                    latest_block_root: summary.latest_block_root,
+                    block_slot: block.slot(),
+                    block_parent_root: block.parent_root(),
+                },
+            ))
+        })
+        .collect::<Result<Vec<_>, Error>>()?;
+
+    StateSummariesDAG::new_from_v22(state_summaries_v22)
+        .map_err(|e| Error::MigrationError(format!("error computing states summaries dag {e:?}")))
+}