mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-19 12:56:12 +00:00
Hierarchical state diffs in hot DB (#6750)
This PR implements https://github.com/sigp/lighthouse/pull/5978 (tree-states) but on the hot DB. It allows Lighthouse to massively reduce its disk footprint during non-finality and overall I/O in all cases.
Closes https://github.com/sigp/lighthouse/issues/6580
Conga into https://github.com/sigp/lighthouse/pull/6744
### TODOs
- [x] Fix OOM in CI https://github.com/sigp/lighthouse/pull/7176
- [x] optimise store_hot_state to avoid storing a duplicate state if the summary already exists (should be safe from races now that pruning is cleaner)
- [x] mispelled: get_ancenstor_state_root
- [x] get_ancestor_state_root should use state summaries
- [x] Prevent split from changing during ancestor calc
- [x] Use same hierarchy for hot and cold
### TODO Good optimization for future PRs
- [ ] On the migration, if the latest hot snapshot is aligned with the cold snapshot migrate the diffs instead of the full states.
```
align slot time
10485760 Nov-26-2024
12582912 Sep-14-2025
14680064 Jul-02-2026
```
### TODO Maybe things good to have
- [ ] Rename anchor_slot https://github.com/sigp/lighthouse/compare/tree-states-hot-rebase-oom...dapplion:lighthouse:tree-states-hot-anchor-slot-rename?expand=1
- [ ] Make anchor fields not public such that they must be mutated through a method. To prevent un-wanted changes of the anchor_slot
### NOTTODO
- [ ] Use fork-choice and a new method [`descendants_of_checkpoint`](ca2388e196 (diff-046fbdb517ca16b80e4464c2c824cf001a74a0a94ac0065e635768ac391062a8)) to filter only the state summaries that descend of finalized checkpoint]
This commit is contained in:
@@ -44,8 +44,8 @@ use store::{Error as StoreError, HotColdDB, ItemStore, KeyValueStoreOp};
|
||||
use task_executor::{ShutdownReason, TaskExecutor};
|
||||
use tracing::{debug, error, info};
|
||||
use types::{
|
||||
BeaconBlock, BeaconState, BlobSidecarList, ChainSpec, Checkpoint, DataColumnSidecarList, Epoch,
|
||||
EthSpec, FixedBytesExtended, Hash256, Signature, SignedBeaconBlock, Slot,
|
||||
BeaconBlock, BeaconState, BlobSidecarList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec,
|
||||
FixedBytesExtended, Hash256, Signature, SignedBeaconBlock, Slot,
|
||||
};
|
||||
|
||||
/// An empty struct used to "witness" all the `BeaconChainTypes` traits. It has no user-facing
|
||||
@@ -382,21 +382,29 @@ where
|
||||
}
|
||||
|
||||
/// Starts a new chain from a genesis state.
|
||||
pub fn genesis_state(mut self, beacon_state: BeaconState<E>) -> Result<Self, String> {
|
||||
pub fn genesis_state(mut self, mut beacon_state: BeaconState<E>) -> Result<Self, String> {
|
||||
let store = self.store.clone().ok_or("genesis_state requires a store")?;
|
||||
|
||||
// Initialize anchor info before attempting to write the genesis state.
|
||||
// Since v4.4.0 we will set the anchor with a dummy state upper limit in order to prevent
|
||||
// historic states from being retained (unless `--reconstruct-historic-states` is set).
|
||||
let retain_historic_states = self.chain_config.reconstruct_historic_states;
|
||||
let genesis_beacon_block = genesis_block(&mut beacon_state, &self.spec)?;
|
||||
self.pending_io_batch.push(
|
||||
store
|
||||
.init_anchor_info(
|
||||
genesis_beacon_block.parent_root(),
|
||||
genesis_beacon_block.slot(),
|
||||
Slot::new(0),
|
||||
retain_historic_states,
|
||||
)
|
||||
.map_err(|e| format!("Failed to initialize genesis anchor: {:?}", e))?,
|
||||
);
|
||||
|
||||
let (genesis, updated_builder) = self.set_genesis_state(beacon_state)?;
|
||||
self = updated_builder;
|
||||
|
||||
// Stage the database's metadata fields for atomic storage when `build` is called.
|
||||
// Since v4.4.0 we will set the anchor with a dummy state upper limit in order to prevent
|
||||
// historic states from being retained (unless `--reconstruct-historic-states` is set).
|
||||
let retain_historic_states = self.chain_config.reconstruct_historic_states;
|
||||
self.pending_io_batch.push(
|
||||
store
|
||||
.init_anchor_info(genesis.beacon_block.message(), retain_historic_states)
|
||||
.map_err(|e| format!("Failed to initialize genesis anchor: {:?}", e))?,
|
||||
);
|
||||
self.pending_io_batch.push(
|
||||
store
|
||||
.init_blob_info(genesis.beacon_block.slot())
|
||||
@@ -521,6 +529,13 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
debug!(
|
||||
slot = %weak_subj_slot,
|
||||
state_root = ?weak_subj_state_root,
|
||||
block_root = ?weak_subj_block_root,
|
||||
"Storing split from weak subjectivity state"
|
||||
);
|
||||
|
||||
// Set the store's split point *before* storing genesis so that genesis is stored
|
||||
// immediately in the freezer DB.
|
||||
store.set_split(weak_subj_slot, weak_subj_state_root, weak_subj_block_root);
|
||||
@@ -541,6 +556,26 @@ where
|
||||
.cold_db
|
||||
.do_atomically(block_root_batch)
|
||||
.map_err(|e| format!("Error writing frozen block roots: {e:?}"))?;
|
||||
debug!(
|
||||
from = %weak_subj_block.slot(),
|
||||
to_excl = %weak_subj_state.slot(),
|
||||
block_root = ?weak_subj_block_root,
|
||||
"Stored frozen block roots at skipped slots"
|
||||
);
|
||||
|
||||
// Write the anchor to memory before calling `put_state` otherwise hot hdiff can't store
|
||||
// states that do not align with the `start_slot` grid.
|
||||
let retain_historic_states = self.chain_config.reconstruct_historic_states;
|
||||
self.pending_io_batch.push(
|
||||
store
|
||||
.init_anchor_info(
|
||||
weak_subj_block.parent_root(),
|
||||
weak_subj_block.slot(),
|
||||
weak_subj_slot,
|
||||
retain_historic_states,
|
||||
)
|
||||
.map_err(|e| format!("Failed to initialize anchor info: {:?}", e))?,
|
||||
);
|
||||
|
||||
// Write the state, block and blobs non-atomically, it doesn't matter if they're forgotten
|
||||
// about on a crash restart.
|
||||
@@ -551,6 +586,8 @@ where
|
||||
weak_subj_state.clone(),
|
||||
)
|
||||
.map_err(|e| format!("Failed to set checkpoint state as finalized state: {:?}", e))?;
|
||||
// Note: post hot hdiff must update the anchor info before attempting to put_state otherwise
|
||||
// the write will fail if the weak_subj_slot is not aligned with the snapshot moduli.
|
||||
store
|
||||
.put_state(&weak_subj_state_root, &weak_subj_state)
|
||||
.map_err(|e| format!("Failed to store weak subjectivity state: {e:?}"))?;
|
||||
@@ -580,13 +617,7 @@ where
|
||||
// Stage the database's metadata fields for atomic storage when `build` is called.
|
||||
// This prevents the database from restarting in an inconsistent state if the anchor
|
||||
// info or split point is written before the `PersistedBeaconChain`.
|
||||
let retain_historic_states = self.chain_config.reconstruct_historic_states;
|
||||
self.pending_io_batch.push(store.store_split_in_batch());
|
||||
self.pending_io_batch.push(
|
||||
store
|
||||
.init_anchor_info(weak_subj_block.message(), retain_historic_states)
|
||||
.map_err(|e| format!("Failed to initialize anchor info: {:?}", e))?,
|
||||
);
|
||||
self.pending_io_batch.push(
|
||||
store
|
||||
.init_blob_info(weak_subj_block.slot())
|
||||
@@ -598,13 +629,6 @@ where
|
||||
.map_err(|e| format!("Failed to initialize data column info: {:?}", e))?,
|
||||
);
|
||||
|
||||
// Store pruning checkpoint to prevent attempting to prune before the anchor state.
|
||||
self.pending_io_batch
|
||||
.push(store.pruning_checkpoint_store_op(Checkpoint {
|
||||
root: weak_subj_block_root,
|
||||
epoch: weak_subj_state.slot().epoch(E::slots_per_epoch()),
|
||||
}));
|
||||
|
||||
let snapshot = BeaconSnapshot {
|
||||
beacon_block_root: weak_subj_block_root,
|
||||
beacon_block: Arc::new(weak_subj_block),
|
||||
|
||||
Reference in New Issue
Block a user