mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-20 21:34:46 +00:00
Hierarchical state diffs in hot DB (#6750)
This PR implements https://github.com/sigp/lighthouse/pull/5978 (tree-states) but on the hot DB. It allows Lighthouse to massively reduce its disk footprint during non-finality and overall I/O in all cases.
Closes https://github.com/sigp/lighthouse/issues/6580
Conga into https://github.com/sigp/lighthouse/pull/6744
### TODOs
- [x] Fix OOM in CI https://github.com/sigp/lighthouse/pull/7176
- [x] optimise store_hot_state to avoid storing a duplicate state if the summary already exists (should be safe from races now that pruning is cleaner)
- [x] mispelled: get_ancenstor_state_root
- [x] get_ancestor_state_root should use state summaries
- [x] Prevent split from changing during ancestor calc
- [x] Use same hierarchy for hot and cold
### TODO Good optimization for future PRs
- [ ] On the migration, if the latest hot snapshot is aligned with the cold snapshot migrate the diffs instead of the full states.
```
align slot time
10485760 Nov-26-2024
12582912 Sep-14-2025
14680064 Jul-02-2026
```
### TODO Maybe things good to have
- [ ] Rename anchor_slot https://github.com/sigp/lighthouse/compare/tree-states-hot-rebase-oom...dapplion:lighthouse:tree-states-hot-anchor-slot-rename?expand=1
- [ ] Make anchor fields not public such that they must be mutated through a method. To prevent un-wanted changes of the anchor_slot
### NOTTODO
- [ ] Use fork-choice and a new method [`descendants_of_checkpoint`](ca2388e196 (diff-046fbdb517ca16b80e4464c2c824cf001a74a0a94ac0065e635768ac391062a8)) to filter only the state summaries that descend of finalized checkpoint]
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
use crate::errors::BeaconChainError;
|
||||
use crate::summaries_dag::{DAGStateSummaryV22, Error as SummariesDagError, StateSummariesDAG};
|
||||
use crate::summaries_dag::{DAGStateSummary, Error as SummariesDagError, StateSummariesDAG};
|
||||
use parking_lot::Mutex;
|
||||
use std::collections::HashSet;
|
||||
use std::mem;
|
||||
@@ -7,7 +7,7 @@ use std::sync::{mpsc, Arc};
|
||||
use std::thread;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
use store::hot_cold_store::{migrate_database, HotColdDBError};
|
||||
use store::{Error, ItemStore, StoreOp};
|
||||
use store::{Error, ItemStore, Split, StoreOp};
|
||||
pub use store::{HotColdDB, MemoryStore};
|
||||
use tracing::{debug, error, info, warn};
|
||||
use types::{BeaconState, BeaconStateHash, Checkpoint, Epoch, EthSpec, Hash256, Slot};
|
||||
@@ -343,18 +343,23 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
}
|
||||
};
|
||||
|
||||
match migrate_database(
|
||||
let split_prior_to_migration = match migrate_database(
|
||||
db.clone(),
|
||||
finalized_state_root.into(),
|
||||
finalized_block_root,
|
||||
&finalized_state,
|
||||
) {
|
||||
Ok(()) => {}
|
||||
Ok(split_change) => {
|
||||
// Migration run, return the split before the migration
|
||||
split_change.previous
|
||||
}
|
||||
Err(Error::HotColdDBError(HotColdDBError::FreezeSlotUnaligned(slot))) => {
|
||||
debug!(
|
||||
slot = slot.as_u64(),
|
||||
"Database migration postponed, unaligned finalized block"
|
||||
);
|
||||
// Migration did not run, return the current split info
|
||||
db.get_split_info()
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = ?e, "Database migration failed");
|
||||
@@ -367,6 +372,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
finalized_state_root.into(),
|
||||
&finalized_state,
|
||||
notif.finalized_checkpoint,
|
||||
split_prior_to_migration,
|
||||
) {
|
||||
Ok(PruningOutcome::Successful {
|
||||
old_finalized_checkpoint_epoch,
|
||||
@@ -503,6 +509,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
new_finalized_state_root: Hash256,
|
||||
new_finalized_state: &BeaconState<E>,
|
||||
new_finalized_checkpoint: Checkpoint,
|
||||
split_prior_to_migration: Split,
|
||||
) -> Result<PruningOutcome, BeaconChainError> {
|
||||
let new_finalized_slot = new_finalized_checkpoint
|
||||
.epoch
|
||||
@@ -519,6 +526,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
}
|
||||
|
||||
debug!(
|
||||
split_prior_to_migration = ?split_prior_to_migration,
|
||||
new_finalized_checkpoint = ?new_finalized_checkpoint,
|
||||
new_finalized_state_root = %new_finalized_state_root,
|
||||
"Starting database pruning"
|
||||
@@ -528,33 +536,14 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
let state_summaries = store
|
||||
.load_hot_state_summaries()?
|
||||
.into_iter()
|
||||
.map(|(state_root, summary)| {
|
||||
let block_root = summary.latest_block_root;
|
||||
// This error should never happen unless we break a DB invariant
|
||||
let block = store
|
||||
.get_blinded_block(&block_root)?
|
||||
.ok_or(PruningError::MissingBlindedBlock(block_root))?;
|
||||
Ok((
|
||||
state_root,
|
||||
DAGStateSummaryV22 {
|
||||
slot: summary.slot,
|
||||
latest_block_root: summary.latest_block_root,
|
||||
block_slot: block.slot(),
|
||||
block_parent_root: block.parent_root(),
|
||||
},
|
||||
))
|
||||
})
|
||||
.collect::<Result<Vec<(Hash256, DAGStateSummaryV22)>, BeaconChainError>>()?;
|
||||
|
||||
// De-duplicate block roots to reduce block reads below
|
||||
let summary_block_roots = HashSet::<Hash256>::from_iter(
|
||||
state_summaries
|
||||
.iter()
|
||||
.map(|(_, summary)| summary.latest_block_root),
|
||||
);
|
||||
.map(|(state_root, summary)| (state_root, summary.into()))
|
||||
.collect::<Vec<(Hash256, DAGStateSummary)>>();
|
||||
|
||||
// Sanity check, there is at least one summary with the new finalized block root
|
||||
if !summary_block_roots.contains(&new_finalized_checkpoint.root) {
|
||||
if !state_summaries
|
||||
.iter()
|
||||
.any(|(_, s)| s.latest_block_root == new_finalized_checkpoint.root)
|
||||
{
|
||||
return Err(BeaconChainError::PruningError(
|
||||
PruningError::MissingSummaryForFinalizedCheckpoint(
|
||||
new_finalized_checkpoint.root,
|
||||
@@ -562,16 +551,31 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
));
|
||||
}
|
||||
|
||||
StateSummariesDAG::new_from_v22(state_summaries)
|
||||
StateSummariesDAG::new(state_summaries)
|
||||
.map_err(|e| PruningError::SummariesDagError("new StateSumariesDAG", e))?
|
||||
};
|
||||
|
||||
// To debug faulty trees log if we unexpectedly have more than one root. These trees may not
|
||||
// result in an error, as they may not be queried in the codepaths below.
|
||||
let state_summaries_dag_roots = state_summaries_dag.tree_roots();
|
||||
if state_summaries_dag_roots.len() > 1 {
|
||||
let state_summaries_dag_roots_post_split = state_summaries_dag_roots
|
||||
.iter()
|
||||
.filter(|(_, s)| s.slot >= split_prior_to_migration.slot)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Because of the additional HDiffs kept for the grid prior to finalization the tree_roots
|
||||
// function will consider them roots. Those are expected. We just want to assert that the
|
||||
// relevant tree of states (post-split) is well-formed.
|
||||
//
|
||||
// This warning could also fire if we have imported a block that doesn't descend from the
|
||||
// new finalized state, and has had its ancestor state summaries pruned by a previous
|
||||
// run. See: https://github.com/sigp/lighthouse/issues/7270.
|
||||
if state_summaries_dag_roots_post_split.len() > 1 {
|
||||
warn!(
|
||||
state_summaries_dag_roots = ?state_summaries_dag_roots,
|
||||
location = "pruning",
|
||||
new_finalized_state_root = ?new_finalized_state_root,
|
||||
split_prior_to_migration_slot = %split_prior_to_migration.slot,
|
||||
state_summaries_dag_roots_post_split = ?state_summaries_dag_roots_post_split,
|
||||
error = "summaries dag found more than one root",
|
||||
"Notify the devs your hot DB has some inconsistency. Pruning will fix it but devs want to know about it",
|
||||
);
|
||||
@@ -626,10 +630,17 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
.blocks_of_states(newly_finalized_state_roots.iter())
|
||||
.map_err(|e| PruningError::SummariesDagError("blocks of newly finalized", e))?;
|
||||
|
||||
// Compute the set of finalized state roots that we must keep to make the dynamic HDiff system
|
||||
// work.
|
||||
let required_finalized_diff_state_slots = store
|
||||
.hierarchy
|
||||
.closest_layer_points(new_finalized_slot, store.hot_hdiff_start_slot()?);
|
||||
|
||||
// We don't know which blocks are shared among abandoned chains, so we buffer and delete
|
||||
// everything in one fell swoop.
|
||||
let mut blocks_to_prune: HashSet<Hash256> = HashSet::new();
|
||||
let mut states_to_prune: HashSet<(Slot, Hash256)> = HashSet::new();
|
||||
let mut kept_summaries_for_hdiff = vec![];
|
||||
|
||||
// Consider the following block tree where we finalize block `[0]` at the checkpoint `(f)`.
|
||||
// There's a block `[3]` that descendends from the finalized block but NOT from the
|
||||
@@ -650,6 +661,30 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
// This state is a viable descendant of the finalized checkpoint, so does not
|
||||
// conflict with finality and can be built on or become a head
|
||||
false
|
||||
} else if required_finalized_diff_state_slots.contains(&summary.slot) {
|
||||
// Keep this state and diff as it's necessary for the finalized portion of the
|
||||
// HDiff links. `required_finalized_diff_state_slots` tracks the set of slots on
|
||||
// each diff layer, and by checking `newly_finalized_state_roots` which only
|
||||
// keep those on the finalized canonical chain. Checking the state root ensures
|
||||
// we avoid lingering forks.
|
||||
|
||||
// In the diagram below, `o` are diffs by slot that we must keep. In the prior
|
||||
// finalized section there's only one chain so we preserve them unconditionally.
|
||||
// For the newly finalized chain, we check which of these is canonical and only
|
||||
// keep those. Slots below `min_finalized_state_slot` we don't have canonical
|
||||
// information so we assume they are part of the finalized pruned chain.
|
||||
//
|
||||
// /-----o----
|
||||
// o-------o------/-------o----
|
||||
if summary.slot < newly_finalized_states_min_slot
|
||||
|| newly_finalized_state_roots.contains(&state_root)
|
||||
{
|
||||
// Track kept summaries to debug hdiff inconsistencies with "Extra pruning information"
|
||||
kept_summaries_for_hdiff.push((state_root, summary.slot));
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
} else {
|
||||
// Everything else, prune
|
||||
true
|
||||
@@ -703,27 +738,29 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
|
||||
debug!(
|
||||
new_finalized_checkpoint = ?new_finalized_checkpoint,
|
||||
new_finalized_state_root = ?new_finalized_state_root,
|
||||
split_prior_to_migration = ?split_prior_to_migration,
|
||||
newly_finalized_blocks = newly_finalized_blocks.len(),
|
||||
newly_finalized_state_roots = newly_finalized_state_roots.len(),
|
||||
newly_finalized_states_min_slot = %newly_finalized_states_min_slot,
|
||||
required_finalized_diff_state_slots = ?required_finalized_diff_state_slots,
|
||||
kept_summaries_for_hdiff = ?kept_summaries_for_hdiff,
|
||||
state_summaries_count = state_summaries_dag.summaries_count(),
|
||||
state_summaries_dag_roots = ?state_summaries_dag_roots,
|
||||
finalized_and_descendant_state_roots_of_finalized_checkpoint = finalized_and_descendant_state_roots_of_finalized_checkpoint.len(),
|
||||
finalized_and_descendant_state_roots_of_finalized_checkpoint = finalized_and_descendant_state_roots_of_finalized_checkpoint.len(),
|
||||
blocks_to_prune = blocks_to_prune.len(),
|
||||
states_to_prune = states_to_prune.len(),
|
||||
"Extra pruning information"
|
||||
);
|
||||
// Don't log the full `states_to_prune` in the log statement above as it can result in a
|
||||
// single log line of +1Kb and break logging setups.
|
||||
// single log line of +1Kb and break logging setups. Log `new_finalized_state_root` as a
|
||||
// prunning ID to trace these individual logs to the above "Extra pruning information"
|
||||
for block_root in &blocks_to_prune {
|
||||
debug!(
|
||||
block_root = ?block_root,
|
||||
"Pruning block"
|
||||
);
|
||||
debug!(?new_finalized_state_root, ?block_root, "Pruning block");
|
||||
}
|
||||
for (slot, state_root) in &states_to_prune {
|
||||
debug!(
|
||||
?new_finalized_state_root,
|
||||
?state_root,
|
||||
%slot,
|
||||
"Pruning hot state"
|
||||
@@ -756,7 +793,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
|
||||
store.do_atomically_with_block_and_blobs_cache(batch)?;
|
||||
|
||||
debug!("Database pruning complete");
|
||||
debug!(?new_finalized_state_root, "Database pruning complete");
|
||||
|
||||
Ok(PruningOutcome::Successful {
|
||||
// Approximation of the previous finalized checkpoint. Only used in the compaction to
|
||||
|
||||
Reference in New Issue
Block a user