Hierarchical state diffs in hot DB (#6750)

This PR implements https://github.com/sigp/lighthouse/pull/5978 (tree-states) but on the hot DB. It allows Lighthouse to massively reduce its disk footprint during non-finality and overall I/O in all cases.

Closes https://github.com/sigp/lighthouse/issues/6580

Conga into https://github.com/sigp/lighthouse/pull/6744

### TODOs

- [x] Fix OOM in CI https://github.com/sigp/lighthouse/pull/7176
- [x] optimise store_hot_state to avoid storing a duplicate state if the summary already exists (should be safe from races now that pruning is cleaner)
- [x] mispelled: get_ancenstor_state_root
- [x] get_ancestor_state_root should use state summaries
- [x] Prevent split from changing during ancestor calc
- [x] Use same hierarchy for hot and cold

### TODO Good optimization for future PRs

- [ ] On the migration, if the latest hot snapshot is aligned with the cold snapshot migrate the diffs instead of the full states.
```
align slot  time
10485760    Nov-26-2024
12582912    Sep-14-2025
14680064    Jul-02-2026
```

### TODO Maybe things good to have

- [ ] Rename anchor_slot https://github.com/sigp/lighthouse/compare/tree-states-hot-rebase-oom...dapplion:lighthouse:tree-states-hot-anchor-slot-rename?expand=1
- [ ] Make anchor fields not public such that they must be mutated through a method. To prevent un-wanted changes of the anchor_slot

### NOTTODO

- [ ] Use fork-choice and a new method [`descendants_of_checkpoint`](ca2388e196 (diff-046fbdb517ca16b80e4464c2c824cf001a74a0a94ac0065e635768ac391062a8)) to filter only the state summaries that descend of finalized checkpoint]
This commit is contained in:
Lion - dapplion
2025-06-19 04:43:25 +02:00
committed by GitHub
parent 6786b9d12a
commit dd98534158
33 changed files with 2695 additions and 812 deletions

View File

@@ -2,9 +2,9 @@ use crate::{DBColumn, Error, StoreItem};
use serde::{Deserialize, Serialize};
use ssz::{Decode, Encode};
use ssz_derive::{Decode, Encode};
use types::{Checkpoint, Hash256, Slot};
use types::{Hash256, Slot};
pub const CURRENT_SCHEMA_VERSION: SchemaVersion = SchemaVersion(23);
pub const CURRENT_SCHEMA_VERSION: SchemaVersion = SchemaVersion(24);
// All the keys that get stored under the `BeaconMeta` column.
//
@@ -12,7 +12,8 @@ pub const CURRENT_SCHEMA_VERSION: SchemaVersion = SchemaVersion(23);
pub const SCHEMA_VERSION_KEY: Hash256 = Hash256::repeat_byte(0);
pub const CONFIG_KEY: Hash256 = Hash256::repeat_byte(1);
pub const SPLIT_KEY: Hash256 = Hash256::repeat_byte(2);
pub const PRUNING_CHECKPOINT_KEY: Hash256 = Hash256::repeat_byte(3);
// DEPRECATED
// pub const PRUNING_CHECKPOINT_KEY: Hash256 = Hash256::repeat_byte(3);
pub const COMPACTION_TIMESTAMP_KEY: Hash256 = Hash256::repeat_byte(4);
pub const ANCHOR_INFO_KEY: Hash256 = Hash256::repeat_byte(5);
pub const BLOB_INFO_KEY: Hash256 = Hash256::repeat_byte(6);
@@ -21,15 +22,6 @@ pub const DATA_COLUMN_INFO_KEY: Hash256 = Hash256::repeat_byte(7);
/// State upper limit value used to indicate that a node is not storing historic states.
pub const STATE_UPPER_LIMIT_NO_RETAIN: Slot = Slot::new(u64::MAX);
/// The `AnchorInfo` encoding full availability of all historic blocks & states.
pub const ANCHOR_FOR_ARCHIVE_NODE: AnchorInfo = AnchorInfo {
anchor_slot: Slot::new(0),
oldest_block_slot: Slot::new(0),
oldest_block_parent: Hash256::ZERO,
state_upper_limit: Slot::new(0),
state_lower_limit: Slot::new(0),
};
/// The `AnchorInfo` encoding an uninitialized anchor.
///
/// This value should never exist except on initial start-up prior to the anchor being initialised
@@ -65,30 +57,6 @@ impl StoreItem for SchemaVersion {
}
}
/// The checkpoint used for pruning the database.
///
/// Updated whenever pruning is successful.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct PruningCheckpoint {
pub checkpoint: Checkpoint,
}
impl StoreItem for PruningCheckpoint {
fn db_column() -> DBColumn {
DBColumn::BeaconMeta
}
fn as_store_bytes(&self) -> Vec<u8> {
self.checkpoint.as_ssz_bytes()
}
fn from_store_bytes(bytes: &[u8]) -> Result<Self, Error> {
Ok(PruningCheckpoint {
checkpoint: Checkpoint::from_ssz_bytes(bytes)?,
})
}
}
/// The last time the database was compacted.
pub struct CompactionTimestamp(pub u64);
@@ -111,7 +79,8 @@ impl StoreItem for CompactionTimestamp {
pub struct AnchorInfo {
/// The slot at which the anchor state is present and which we cannot revert. Values on start:
/// - Genesis start: 0
/// - Checkpoint sync: Slot of the finalized checkpoint block
/// - Checkpoint sync: Slot of the finalized state advanced to the checkpoint epoch
/// - Existing DB prior to v23: Finalized state slot at the migration moment
///
/// Immutable
pub anchor_slot: Slot,
@@ -175,6 +144,21 @@ impl AnchorInfo {
pub fn full_state_pruning_enabled(&self) -> bool {
self.state_lower_limit == 0 && self.state_upper_limit == STATE_UPPER_LIMIT_NO_RETAIN
}
/// Compute the correct `AnchorInfo` for an archive node created from the current node.
///
/// This method ensures that the `anchor_slot` which is used for the hot database's diff grid is
/// preserved.
pub fn as_archive_anchor(&self) -> Self {
Self {
// Anchor slot MUST be the same. It is immutable.
anchor_slot: self.anchor_slot,
oldest_block_slot: Slot::new(0),
oldest_block_parent: Hash256::ZERO,
state_upper_limit: Slot::new(0),
state_lower_limit: Slot::new(0),
}
}
}
impl StoreItem for AnchorInfo {