mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-18 21:38:31 +00:00
Hierarchical state diffs (#5978)
* Start extracting freezer changes for tree-states * Remove unused config args * Add comments * Remove unwraps * Subjective more clear implementation * Clean up hdiff * Update xdelta3 * Tree states archive metrics (#6040) * Add store cache size metrics * Add compress timer metrics * Add diff apply compute timer metrics * Add diff buffer cache hit metrics * Add hdiff buffer load times * Add blocks replayed metric * Move metrics to store * Future proof some metrics --------- Co-authored-by: Michael Sproul <michael@sigmaprime.io> * Port and clean up forwards iterator changes * Add and polish hierarchy-config flag * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Cleaner errors * Fix beacon_chain test compilation * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Patch a few more freezer block roots * Fix genesis block root bug * Fix test failing due to pending updates * Beacon chain tests passing * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Fix doc lint * Implement DB schema upgrade for hierarchical state diffs (#6193) * DB upgrade * Add flag * Delete RestorePointHash * Update docs * Update docs * Implement hierarchical state diffs config migration (#6245) * Implement hierarchical state diffs config migration * Review PR * Remove TODO * Set CURRENT_SCHEMA_VERSION correctly * Fix genesis state loading * Re-delete some PartialBeaconState stuff --------- Co-authored-by: Michael Sproul <michael@sigmaprime.io> * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Fix test compilation * Update schema downgrade test * Fix tests * Fix null anchor migration * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Fix tree states upgrade migration (#6328) * Towards crash safety * Fix compilation * Move cold summaries and state roots to new columns * Rename StateRoots chunked field * Update prune states * Clean hdiff CLI flag and metrics * Fix "staged reconstruction" * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Fix alloy issues * Fix staged reconstruction logic * Prevent weird slot drift * Remove "allow" flag * Update CLI help * Remove FIXME about downgrade * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Remove some unnecessary error variants * Fix new test * Tree states archive - review comments and metrics (#6386) * Review PR comments and metrics * Comments * Add anchor metrics * drop prev comment * Update metadata.rs * Apply suggestions from code review --------- Co-authored-by: Michael Sproul <micsproul@gmail.com> * Update beacon_node/store/src/hot_cold_store.rs Co-authored-by: Lion - dapplion <35266934+dapplion@users.noreply.github.com> * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Clarify comment and remove anchor_slot garbage * Simplify database anchor (#6397) * Simplify database anchor * Update beacon_node/store/src/reconstruct.rs * Add migration for anchor * Fix and simplify light_client store tests * Fix incompatible config test * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * More metrics * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * New historic state cache (#6475) * New historic state cache * Add more metrics * State cache hit rate metrics * Fix store metrics * More logs and metrics * Fix logger * Ensure cached states have built caches :O * Replay blocks in preference to diffing * Two separate caches * Distribute cache build time to next slot * Re-plumb historic-state-cache flag * Clean up metrics * Update book * Update beacon_node/store/src/hdiff.rs Co-authored-by: Lion - dapplion <35266934+dapplion@users.noreply.github.com> * Update beacon_node/store/src/historic_state_cache.rs Co-authored-by: Lion - dapplion <35266934+dapplion@users.noreply.github.com> --------- Co-authored-by: Lion - dapplion <35266934+dapplion@users.noreply.github.com> * Update database docs * Update diagram * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Update lockbud to work with bindgen/etc * Correct pkg name for Debian * Remove vestigial epochs_per_state_diff * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Markdown lint * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Address Jimmy's review comments * Simplify ReplayFrom case * Fix and document genesis_state_root * Typo Co-authored-by: Jimmy Chen <jchen.tc@gmail.com> * Merge branch 'unstable' into tree-states-archive * Compute diff of validators list manually (#6556) * Split hdiff computation * Dedicated logic for historical roots and summaries * Benchmark against real states * Mutated source? * Version the hdiff * Add lighthouse DB config for hierarchy exponents * Tidy up hierarchy exponents flag * Apply suggestions from code review Co-authored-by: Michael Sproul <micsproul@gmail.com> * Address PR review * Remove hardcoded paths in benchmarks * Delete unused function in benches * lint --------- Co-authored-by: Michael Sproul <michael@sigmaprime.io> * Test hdiff binary format stability (#6585) * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Add deprecation warning for SPRP * Update xdelta to get rid of duplicate deps * Document test
This commit is contained in:
@@ -24,6 +24,10 @@ const MAX_COMPACTION_PERIOD_SECONDS: u64 = 604800;
|
||||
const MIN_COMPACTION_PERIOD_SECONDS: u64 = 7200;
|
||||
/// Compact after a large finality gap, if we respect `MIN_COMPACTION_PERIOD_SECONDS`.
|
||||
const COMPACTION_FINALITY_DISTANCE: u64 = 1024;
|
||||
/// Maximum number of blocks applied in each reconstruction burst.
|
||||
///
|
||||
/// This limits the amount of time that the finalization migration is paused for.
|
||||
const BLOCKS_PER_RECONSTRUCTION: usize = 8192 * 4;
|
||||
|
||||
/// Default number of epochs to wait between finalization migrations.
|
||||
pub const DEFAULT_EPOCHS_PER_MIGRATION: u64 = 1;
|
||||
@@ -188,7 +192,9 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
if let Some(Notification::Reconstruction) =
|
||||
self.send_background_notification(Notification::Reconstruction)
|
||||
{
|
||||
Self::run_reconstruction(self.db.clone(), &self.log);
|
||||
// If we are running in foreground mode (as in tests), then this will just run a single
|
||||
// batch. We may need to tweak this in future.
|
||||
Self::run_reconstruction(self.db.clone(), None, &self.log);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,13 +206,34 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run_reconstruction(db: Arc<HotColdDB<E, Hot, Cold>>, log: &Logger) {
|
||||
if let Err(e) = db.reconstruct_historic_states() {
|
||||
error!(
|
||||
log,
|
||||
"State reconstruction failed";
|
||||
"error" => ?e,
|
||||
);
|
||||
pub fn run_reconstruction(
|
||||
db: Arc<HotColdDB<E, Hot, Cold>>,
|
||||
opt_tx: Option<mpsc::Sender<Notification>>,
|
||||
log: &Logger,
|
||||
) {
|
||||
match db.reconstruct_historic_states(Some(BLOCKS_PER_RECONSTRUCTION)) {
|
||||
Ok(()) => {
|
||||
// Schedule another reconstruction batch if required and we have access to the
|
||||
// channel for requeueing.
|
||||
if let Some(tx) = opt_tx {
|
||||
if !db.get_anchor_info().all_historic_states_stored() {
|
||||
if let Err(e) = tx.send(Notification::Reconstruction) {
|
||||
error!(
|
||||
log,
|
||||
"Unable to requeue reconstruction notification";
|
||||
"error" => ?e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
log,
|
||||
"State reconstruction failed";
|
||||
"error" => ?e,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -388,6 +415,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
log: Logger,
|
||||
) -> (mpsc::Sender<Notification>, thread::JoinHandle<()>) {
|
||||
let (tx, rx) = mpsc::channel();
|
||||
let inner_tx = tx.clone();
|
||||
let thread = thread::spawn(move || {
|
||||
while let Ok(notif) = rx.recv() {
|
||||
let mut reconstruction_notif = None;
|
||||
@@ -418,16 +446,17 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
}
|
||||
}
|
||||
}
|
||||
// If reconstruction is on-going, ignore finalization migration and blob pruning.
|
||||
// Run finalization and blob pruning migrations first, then a reconstruction batch.
|
||||
// This prevents finalization from being starved while reconstruciton runs (a
|
||||
// problem in previous LH versions).
|
||||
if let Some(fin) = finalization_notif {
|
||||
Self::run_migration(db.clone(), fin, &log);
|
||||
}
|
||||
if let Some(dab) = prune_blobs_notif {
|
||||
Self::run_prune_blobs(db.clone(), dab, &log);
|
||||
}
|
||||
if reconstruction_notif.is_some() {
|
||||
Self::run_reconstruction(db.clone(), &log);
|
||||
} else {
|
||||
if let Some(fin) = finalization_notif {
|
||||
Self::run_migration(db.clone(), fin, &log);
|
||||
}
|
||||
if let Some(dab) = prune_blobs_notif {
|
||||
Self::run_prune_blobs(db.clone(), dab, &log);
|
||||
}
|
||||
Self::run_reconstruction(db.clone(), Some(inner_tx.clone()), &log);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user