mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-19 05:48:31 +00:00
Hierarchical state diffs (#5978)
* Start extracting freezer changes for tree-states * Remove unused config args * Add comments * Remove unwraps * Subjective more clear implementation * Clean up hdiff * Update xdelta3 * Tree states archive metrics (#6040) * Add store cache size metrics * Add compress timer metrics * Add diff apply compute timer metrics * Add diff buffer cache hit metrics * Add hdiff buffer load times * Add blocks replayed metric * Move metrics to store * Future proof some metrics --------- Co-authored-by: Michael Sproul <michael@sigmaprime.io> * Port and clean up forwards iterator changes * Add and polish hierarchy-config flag * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Cleaner errors * Fix beacon_chain test compilation * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Patch a few more freezer block roots * Fix genesis block root bug * Fix test failing due to pending updates * Beacon chain tests passing * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Fix doc lint * Implement DB schema upgrade for hierarchical state diffs (#6193) * DB upgrade * Add flag * Delete RestorePointHash * Update docs * Update docs * Implement hierarchical state diffs config migration (#6245) * Implement hierarchical state diffs config migration * Review PR * Remove TODO * Set CURRENT_SCHEMA_VERSION correctly * Fix genesis state loading * Re-delete some PartialBeaconState stuff --------- Co-authored-by: Michael Sproul <michael@sigmaprime.io> * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Fix test compilation * Update schema downgrade test * Fix tests * Fix null anchor migration * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Fix tree states upgrade migration (#6328) * Towards crash safety * Fix compilation * Move cold summaries and state roots to new columns * Rename StateRoots chunked field * Update prune states * Clean hdiff CLI flag and metrics * Fix "staged reconstruction" * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Fix alloy issues * Fix staged reconstruction logic * Prevent weird slot drift * Remove "allow" flag * Update CLI help * Remove FIXME about downgrade * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Remove some unnecessary error variants * Fix new test * Tree states archive - review comments and metrics (#6386) * Review PR comments and metrics * Comments * Add anchor metrics * drop prev comment * Update metadata.rs * Apply suggestions from code review --------- Co-authored-by: Michael Sproul <micsproul@gmail.com> * Update beacon_node/store/src/hot_cold_store.rs Co-authored-by: Lion - dapplion <35266934+dapplion@users.noreply.github.com> * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Clarify comment and remove anchor_slot garbage * Simplify database anchor (#6397) * Simplify database anchor * Update beacon_node/store/src/reconstruct.rs * Add migration for anchor * Fix and simplify light_client store tests * Fix incompatible config test * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * More metrics * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * New historic state cache (#6475) * New historic state cache * Add more metrics * State cache hit rate metrics * Fix store metrics * More logs and metrics * Fix logger * Ensure cached states have built caches :O * Replay blocks in preference to diffing * Two separate caches * Distribute cache build time to next slot * Re-plumb historic-state-cache flag * Clean up metrics * Update book * Update beacon_node/store/src/hdiff.rs Co-authored-by: Lion - dapplion <35266934+dapplion@users.noreply.github.com> * Update beacon_node/store/src/historic_state_cache.rs Co-authored-by: Lion - dapplion <35266934+dapplion@users.noreply.github.com> --------- Co-authored-by: Lion - dapplion <35266934+dapplion@users.noreply.github.com> * Update database docs * Update diagram * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Update lockbud to work with bindgen/etc * Correct pkg name for Debian * Remove vestigial epochs_per_state_diff * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Markdown lint * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Address Jimmy's review comments * Simplify ReplayFrom case * Fix and document genesis_state_root * Typo Co-authored-by: Jimmy Chen <jchen.tc@gmail.com> * Merge branch 'unstable' into tree-states-archive * Compute diff of validators list manually (#6556) * Split hdiff computation * Dedicated logic for historical roots and summaries * Benchmark against real states * Mutated source? * Version the hdiff * Add lighthouse DB config for hierarchy exponents * Tidy up hierarchy exponents flag * Apply suggestions from code review Co-authored-by: Michael Sproul <micsproul@gmail.com> * Address PR review * Remove hardcoded paths in benchmarks * Delete unused function in benches * lint --------- Co-authored-by: Michael Sproul <michael@sigmaprime.io> * Test hdiff binary format stability (#6585) * Merge remote-tracking branch 'origin/unstable' into tree-states-archive * Add deprecation warning for SPRP * Update xdelta to get rid of duplicate deps * Document test
This commit is contained in:
@@ -0,0 +1,210 @@
|
||||
use crate::beacon_chain::BeaconChainTypes;
|
||||
use slog::{info, Logger};
|
||||
use std::sync::Arc;
|
||||
use store::chunked_iter::ChunkedVectorIter;
|
||||
use store::{
|
||||
chunked_vector::BlockRootsChunked,
|
||||
get_key_for_col,
|
||||
metadata::{
|
||||
SchemaVersion, ANCHOR_FOR_ARCHIVE_NODE, ANCHOR_UNINITIALIZED, STATE_UPPER_LIMIT_NO_RETAIN,
|
||||
},
|
||||
partial_beacon_state::PartialBeaconState,
|
||||
AnchorInfo, DBColumn, Error, HotColdDB, KeyValueStore, KeyValueStoreOp,
|
||||
};
|
||||
use types::{BeaconState, Hash256, Slot};
|
||||
|
||||
const LOG_EVERY: usize = 200_000;
|
||||
|
||||
fn load_old_schema_frozen_state<T: BeaconChainTypes>(
|
||||
db: &HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>,
|
||||
state_root: Hash256,
|
||||
) -> Result<Option<BeaconState<T::EthSpec>>, Error> {
|
||||
let Some(partial_state_bytes) = db
|
||||
.cold_db
|
||||
.get_bytes(DBColumn::BeaconState.into(), state_root.as_slice())?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
let mut partial_state: PartialBeaconState<T::EthSpec> =
|
||||
PartialBeaconState::from_ssz_bytes(&partial_state_bytes, db.get_chain_spec())?;
|
||||
|
||||
// Fill in the fields of the partial state.
|
||||
partial_state.load_block_roots(&db.cold_db, db.get_chain_spec())?;
|
||||
partial_state.load_state_roots(&db.cold_db, db.get_chain_spec())?;
|
||||
partial_state.load_historical_roots(&db.cold_db, db.get_chain_spec())?;
|
||||
partial_state.load_randao_mixes(&db.cold_db, db.get_chain_spec())?;
|
||||
partial_state.load_historical_summaries(&db.cold_db, db.get_chain_spec())?;
|
||||
|
||||
partial_state.try_into().map(Some)
|
||||
}
|
||||
|
||||
pub fn upgrade_to_v22<T: BeaconChainTypes>(
|
||||
db: Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
|
||||
genesis_state_root: Option<Hash256>,
|
||||
log: Logger,
|
||||
) -> Result<(), Error> {
|
||||
info!(log, "Upgrading from v21 to v22");
|
||||
|
||||
let mut old_anchor = db.get_anchor_info();
|
||||
|
||||
// If the anchor was uninitialized in the old schema (`None`), this represents a full archive
|
||||
// node.
|
||||
if old_anchor == ANCHOR_UNINITIALIZED {
|
||||
old_anchor = ANCHOR_FOR_ARCHIVE_NODE;
|
||||
}
|
||||
|
||||
let split_slot = db.get_split_slot();
|
||||
let genesis_state_root = genesis_state_root.ok_or(Error::GenesisStateUnknown)?;
|
||||
|
||||
let mut cold_ops = vec![];
|
||||
|
||||
// Load the genesis state in the previous chunked format, BEFORE we go deleting or rewriting
|
||||
// anything.
|
||||
let mut genesis_state = load_old_schema_frozen_state::<T>(&db, genesis_state_root)?
|
||||
.ok_or(Error::MissingGenesisState)?;
|
||||
let genesis_state_root = genesis_state.update_tree_hash_cache()?;
|
||||
let genesis_block_root = genesis_state.get_latest_block_root(genesis_state_root);
|
||||
|
||||
// Store the genesis state in the new format, prior to updating the schema version on disk.
|
||||
// In case of a crash no data is lost because we will re-load it in the old format and re-do
|
||||
// this write.
|
||||
if split_slot > 0 {
|
||||
info!(
|
||||
log,
|
||||
"Re-storing genesis state";
|
||||
"state_root" => ?genesis_state_root,
|
||||
);
|
||||
db.store_cold_state(&genesis_state_root, &genesis_state, &mut cold_ops)?;
|
||||
}
|
||||
|
||||
// Write the block roots in the new format in a new column. Similar to above, we do this
|
||||
// separately from deleting the old format block roots so that this is crash safe.
|
||||
let oldest_block_slot = old_anchor.oldest_block_slot;
|
||||
write_new_schema_block_roots::<T>(
|
||||
&db,
|
||||
genesis_block_root,
|
||||
oldest_block_slot,
|
||||
split_slot,
|
||||
&mut cold_ops,
|
||||
&log,
|
||||
)?;
|
||||
|
||||
// Commit this first batch of non-destructive cold database ops.
|
||||
db.cold_db.do_atomically(cold_ops)?;
|
||||
|
||||
// Now we update the anchor and the schema version atomically in the hot database.
|
||||
//
|
||||
// If we crash after commiting this change, then there will be some leftover cruft left in the
|
||||
// freezer database, but no corruption because all the new-format data has already been written
|
||||
// above.
|
||||
let new_anchor = AnchorInfo {
|
||||
state_upper_limit: STATE_UPPER_LIMIT_NO_RETAIN,
|
||||
state_lower_limit: Slot::new(0),
|
||||
..old_anchor.clone()
|
||||
};
|
||||
let hot_ops = vec![db.compare_and_set_anchor_info(old_anchor, new_anchor)?];
|
||||
db.store_schema_version_atomically(SchemaVersion(22), hot_ops)?;
|
||||
|
||||
// Finally, clean up the old-format data from the freezer database.
|
||||
delete_old_schema_freezer_data::<T>(&db, &log)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn delete_old_schema_freezer_data<T: BeaconChainTypes>(
|
||||
db: &Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
|
||||
log: &Logger,
|
||||
) -> Result<(), Error> {
|
||||
let mut cold_ops = vec![];
|
||||
|
||||
let columns = [
|
||||
DBColumn::BeaconState,
|
||||
// Cold state summaries indexed by state root were stored in this column.
|
||||
DBColumn::BeaconStateSummary,
|
||||
// Mapping from restore point number to state root was stored in this column.
|
||||
DBColumn::BeaconRestorePoint,
|
||||
// Chunked vector values were stored in these columns.
|
||||
DBColumn::BeaconHistoricalRoots,
|
||||
DBColumn::BeaconRandaoMixes,
|
||||
DBColumn::BeaconHistoricalSummaries,
|
||||
DBColumn::BeaconBlockRootsChunked,
|
||||
DBColumn::BeaconStateRootsChunked,
|
||||
];
|
||||
|
||||
for column in columns {
|
||||
for res in db.cold_db.iter_column_keys::<Vec<u8>>(column) {
|
||||
let key = res?;
|
||||
cold_ops.push(KeyValueStoreOp::DeleteKey(get_key_for_col(
|
||||
column.as_str(),
|
||||
&key,
|
||||
)));
|
||||
}
|
||||
}
|
||||
let delete_ops = cold_ops.len();
|
||||
|
||||
info!(
|
||||
log,
|
||||
"Deleting historic states";
|
||||
"delete_ops" => delete_ops,
|
||||
);
|
||||
db.cold_db.do_atomically(cold_ops)?;
|
||||
|
||||
// In order to reclaim space, we need to compact the freezer DB as well.
|
||||
db.cold_db.compact()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_new_schema_block_roots<T: BeaconChainTypes>(
|
||||
db: &HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>,
|
||||
genesis_block_root: Hash256,
|
||||
oldest_block_slot: Slot,
|
||||
split_slot: Slot,
|
||||
cold_ops: &mut Vec<KeyValueStoreOp>,
|
||||
log: &Logger,
|
||||
) -> Result<(), Error> {
|
||||
info!(
|
||||
log,
|
||||
"Starting beacon block root migration";
|
||||
"oldest_block_slot" => oldest_block_slot,
|
||||
"genesis_block_root" => ?genesis_block_root,
|
||||
);
|
||||
|
||||
// Store the genesis block root if it would otherwise not be stored.
|
||||
if oldest_block_slot != 0 {
|
||||
cold_ops.push(KeyValueStoreOp::PutKeyValue(
|
||||
get_key_for_col(DBColumn::BeaconBlockRoots.into(), &0u64.to_be_bytes()),
|
||||
genesis_block_root.as_slice().to_vec(),
|
||||
));
|
||||
}
|
||||
|
||||
// Block roots are available from the `oldest_block_slot` to the `split_slot`.
|
||||
let start_vindex = oldest_block_slot.as_usize();
|
||||
let block_root_iter = ChunkedVectorIter::<BlockRootsChunked, _, _, _>::new(
|
||||
db,
|
||||
start_vindex,
|
||||
split_slot,
|
||||
db.get_chain_spec(),
|
||||
);
|
||||
|
||||
// OK to hold these in memory (10M slots * 43 bytes per KV ~= 430 MB).
|
||||
for (i, (slot, block_root)) in block_root_iter.enumerate() {
|
||||
cold_ops.push(KeyValueStoreOp::PutKeyValue(
|
||||
get_key_for_col(
|
||||
DBColumn::BeaconBlockRoots.into(),
|
||||
&(slot as u64).to_be_bytes(),
|
||||
),
|
||||
block_root.as_slice().to_vec(),
|
||||
));
|
||||
|
||||
if i > 0 && i % LOG_EVERY == 0 {
|
||||
info!(
|
||||
log,
|
||||
"Beacon block root migration in progress";
|
||||
"roots_migrated" => i
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user