mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-19 13:58:28 +00:00
Hierarchical state diffs in hot DB (#6750)
This PR implements https://github.com/sigp/lighthouse/pull/5978 (tree-states) but on the hot DB. It allows Lighthouse to massively reduce its disk footprint during non-finality and overall I/O in all cases.
Closes https://github.com/sigp/lighthouse/issues/6580
Conga into https://github.com/sigp/lighthouse/pull/6744
### TODOs
- [x] Fix OOM in CI https://github.com/sigp/lighthouse/pull/7176
- [x] optimise store_hot_state to avoid storing a duplicate state if the summary already exists (should be safe from races now that pruning is cleaner)
- [x] mispelled: get_ancenstor_state_root
- [x] get_ancestor_state_root should use state summaries
- [x] Prevent split from changing during ancestor calc
- [x] Use same hierarchy for hot and cold
### TODO Good optimization for future PRs
- [ ] On the migration, if the latest hot snapshot is aligned with the cold snapshot migrate the diffs instead of the full states.
```
align slot time
10485760 Nov-26-2024
12582912 Sep-14-2025
14680064 Jul-02-2026
```
### TODO Maybe things good to have
- [ ] Rename anchor_slot https://github.com/sigp/lighthouse/compare/tree-states-hot-rebase-oom...dapplion:lighthouse:tree-states-hot-anchor-slot-rename?expand=1
- [ ] Make anchor fields not public such that they must be mutated through a method. To prevent un-wanted changes of the anchor_slot
### NOTTODO
- [ ] Use fork-choice and a new method [`descendants_of_checkpoint`](ca2388e196 (diff-046fbdb517ca16b80e4464c2c824cf001a74a0a94ac0065e635768ac391062a8)) to filter only the state summaries that descend of finalized checkpoint]
This commit is contained in:
@@ -4,6 +4,10 @@ use directory::size_of_dir;
|
||||
use std::path::Path;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
// Labels used for histogram timer vecs that are tracked per DB (hot and cold).
|
||||
pub const HOT_METRIC: &[&str] = &["hot"];
|
||||
pub const COLD_METRIC: &[&str] = &["cold"];
|
||||
|
||||
/*
|
||||
* General
|
||||
*/
|
||||
@@ -142,66 +146,61 @@ pub static BEACON_STATE_HOT_GET_COUNT: LazyLock<Result<IntCounter>> = LazyLock::
|
||||
"Total number of hot beacon states requested from the store (cache or DB)",
|
||||
)
|
||||
});
|
||||
pub static BEACON_STATE_READ_TIMES: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
"store_beacon_state_read_seconds",
|
||||
"Total time required to read a BeaconState from the database",
|
||||
)
|
||||
});
|
||||
pub static BEACON_STATE_READ_OVERHEAD_TIMES: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
"store_beacon_state_read_overhead_seconds",
|
||||
"Overhead on reading a beacon state from the DB (e.g., decoding)",
|
||||
)
|
||||
});
|
||||
pub static BEACON_STATE_READ_COUNT: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
||||
try_create_int_counter(
|
||||
"store_beacon_state_read_total",
|
||||
"Total number of beacon state reads from the DB",
|
||||
)
|
||||
});
|
||||
pub static BEACON_STATE_READ_BYTES: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
||||
try_create_int_counter(
|
||||
"store_beacon_state_read_bytes_total",
|
||||
"Total number of beacon state bytes read from the DB",
|
||||
)
|
||||
});
|
||||
pub static BEACON_STATE_WRITE_OVERHEAD_TIMES: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
"store_beacon_state_write_overhead_seconds",
|
||||
"Overhead on writing a beacon state to the DB (e.g., encoding)",
|
||||
)
|
||||
});
|
||||
pub static BEACON_STATE_WRITE_COUNT: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
||||
try_create_int_counter(
|
||||
"store_beacon_state_write_total",
|
||||
"Total number of beacon state writes the DB",
|
||||
)
|
||||
});
|
||||
pub static BEACON_STATE_WRITE_BYTES: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
||||
try_create_int_counter(
|
||||
"store_beacon_state_write_bytes_total",
|
||||
"Total number of beacon state bytes written to the DB",
|
||||
)
|
||||
});
|
||||
pub static BEACON_HDIFF_READ_TIMES: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
|
||||
/*
|
||||
* HDiffs
|
||||
*/
|
||||
pub static BEACON_HDIFF_READ_TIME: LazyLock<Result<HistogramVec>> = LazyLock::new(|| {
|
||||
try_create_histogram_vec(
|
||||
"store_hdiff_read_seconds",
|
||||
"Time required to read the hierarchical diff bytes from the database",
|
||||
"Time taken to read hdiff bytes from disk",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static BEACON_HDIFF_DECODE_TIMES: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
pub static BEACON_HDIFF_DECODE_TIME: LazyLock<Result<HistogramVec>> = LazyLock::new(|| {
|
||||
try_create_histogram_vec(
|
||||
"store_hdiff_decode_seconds",
|
||||
"Time required to decode hierarchical diff bytes",
|
||||
"Time taken to decode hdiff bytes",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static BEACON_HDIFF_BUFFER_CLONE_TIMES: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
pub static BEACON_HDIFF_APPLY_TIME: LazyLock<Result<HistogramVec>> = LazyLock::new(|| {
|
||||
try_create_histogram_vec(
|
||||
"store_hdiff_apply_seconds",
|
||||
"Time taken to apply an hdiff to a buffer",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static BEACON_HDIFF_COMPUTE_TIME: LazyLock<Result<HistogramVec>> = LazyLock::new(|| {
|
||||
try_create_histogram_vec(
|
||||
"store_hdiff_compute_seconds",
|
||||
"Time taken to compute an hdiff for a state",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static BEACON_HDIFF_BUFFER_LOAD_TIME: LazyLock<Result<HistogramVec>> = LazyLock::new(|| {
|
||||
try_create_histogram_vec(
|
||||
"store_hdiff_buffer_load_seconds",
|
||||
"Time taken to load an hdiff buffer for a state",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static BEACON_HDIFF_BUFFER_CLONE_TIME: LazyLock<Result<HistogramVec>> = LazyLock::new(|| {
|
||||
try_create_histogram_vec(
|
||||
"store_hdiff_buffer_clone_seconds",
|
||||
"Time required to clone hierarchical diff buffer bytes",
|
||||
"Time taken to clone an hdiff buffer from a cache",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static BEACON_HDIFF_BUFFER_LOAD_BEFORE_STORE_TIME: LazyLock<Result<HistogramVec>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_histogram_vec(
|
||||
"store_hdiff_buffer_load_before_store_seconds",
|
||||
"Time taken to load the hdiff buffer required for the storage of a new state",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
// This metric is not split hot/cold because it is recorded in a place where that info is not known.
|
||||
pub static BEACON_HDIFF_BUFFER_APPLY_RESIZES: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||
try_create_histogram_with_buckets(
|
||||
"store_hdiff_buffer_apply_resizes",
|
||||
@@ -209,6 +208,24 @@ pub static BEACON_HDIFF_BUFFER_APPLY_RESIZES: LazyLock<Result<Histogram>> = Lazy
|
||||
Ok(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0])
|
||||
)
|
||||
});
|
||||
// This metric is not split hot/cold because both databases use the same hierarchy config anyway
|
||||
// and that's all that affects diff sizes.
|
||||
pub static BEACON_HDIFF_SIZES: LazyLock<Result<HistogramVec>> = LazyLock::new(|| {
|
||||
try_create_histogram_vec_with_buckets(
|
||||
"store_hdiff_sizes",
|
||||
"Size of hdiffs in bytes by layer (exponent)",
|
||||
Ok(vec![
|
||||
500_000.0,
|
||||
2_000_000.0,
|
||||
5_000_000.0,
|
||||
10_000_000.0,
|
||||
15_000_000.0,
|
||||
20_000_000.0,
|
||||
50_000_000.0,
|
||||
]),
|
||||
&["exponent"],
|
||||
)
|
||||
});
|
||||
/*
|
||||
* Beacon Block
|
||||
*/
|
||||
@@ -259,17 +276,20 @@ pub static STORE_BEACON_HISTORIC_STATE_CACHE_SIZE: LazyLock<Result<IntGauge>> =
|
||||
"Current count of states in the historic state cache",
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_CACHE_SIZE: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
|
||||
try_create_int_gauge(
|
||||
"store_beacon_hdiff_buffer_cache_size",
|
||||
"Current count of hdiff buffers in the historic state cache",
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_CACHE_BYTE_SIZE: LazyLock<Result<IntGauge>> =
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_CACHE_SIZE: LazyLock<Result<IntGaugeVec>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_int_gauge(
|
||||
try_create_int_gauge_vec(
|
||||
"store_beacon_hdiff_buffer_cache_size",
|
||||
"Current count of hdiff buffers cached in memory",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_CACHE_BYTE_SIZE: LazyLock<Result<IntGaugeVec>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_int_gauge_vec(
|
||||
"store_beacon_hdiff_buffer_cache_byte_size",
|
||||
"Memory consumed by hdiff buffers in the historic state cache",
|
||||
"Memory consumed by hdiff buffers cached in memory",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_STATE_FREEZER_COMPRESS_TIME: LazyLock<Result<Histogram>> =
|
||||
@@ -286,33 +306,6 @@ pub static STORE_BEACON_STATE_FREEZER_DECOMPRESS_TIME: LazyLock<Result<Histogram
|
||||
"Time taken to decompress a state snapshot for the freezer DB",
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_APPLY_TIME: LazyLock<Result<Histogram>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
"store_beacon_hdiff_buffer_apply_seconds",
|
||||
"Time taken to apply hdiff buffer to a state buffer",
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_COMPUTE_TIME: LazyLock<Result<Histogram>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
"store_beacon_hdiff_buffer_compute_seconds",
|
||||
"Time taken to compute hdiff buffer to a state buffer",
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_LOAD_TIME: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
"store_beacon_hdiff_buffer_load_seconds",
|
||||
"Time taken to load an hdiff buffer",
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_LOAD_FOR_STORE_TIME: LazyLock<Result<Histogram>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_histogram(
|
||||
"store_beacon_hdiff_buffer_load_for_store_seconds",
|
||||
"Time taken to load an hdiff buffer to store another hdiff",
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HISTORIC_STATE_CACHE_HIT: LazyLock<Result<IntCounter>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_int_counter(
|
||||
@@ -327,18 +320,20 @@ pub static STORE_BEACON_HISTORIC_STATE_CACHE_MISS: LazyLock<Result<IntCounter>>
|
||||
"Total count of historic state cache misses for full states",
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_CACHE_HIT: LazyLock<Result<IntCounter>> =
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_CACHE_HIT: LazyLock<Result<IntCounterVec>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_int_counter(
|
||||
try_create_int_counter_vec(
|
||||
"store_beacon_hdiff_buffer_cache_hit_total",
|
||||
"Total count of hdiff buffer cache hits",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_CACHE_MISS: LazyLock<Result<IntCounter>> =
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_CACHE_MISS: LazyLock<Result<IntCounterVec>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_int_counter(
|
||||
try_create_int_counter_vec(
|
||||
"store_beacon_hdiff_buffer_cache_miss_total",
|
||||
"Total count of hdiff buffer cache miss",
|
||||
&["db"],
|
||||
)
|
||||
});
|
||||
pub static STORE_BEACON_HDIFF_BUFFER_INTO_STATE_TIME: LazyLock<Result<Histogram>> =
|
||||
|
||||
Reference in New Issue
Block a user