Add commmand for pruning states (#4835)

## Issue Addressed

Closes #4481. 

(Continuation of #4648)

## Proposed Changes

- [x] Add `lighthouse db prune-states`
- [x] Make it work
- [x] Ensure block roots are handled correctly (to be addressed in 4735)
- [x] Check perf on mainnet/Goerli/Gnosis (takes a few seconds max)
- [x] Run block root healing logic (#4875 ) at the beginning
- [x] Add some tests
- [x] Update docs
- [x] Add `--freezer` flag and other improvements to `lighthouse db inspect`

Co-authored-by: Michael Sproul <michael@sigmaprime.io>
Co-authored-by: Jimmy Chen <jimmy@sigmaprime.io>
Co-authored-by: Michael Sproul <micsproul@gmail.com>
This commit is contained in:
Jimmy Chen
2023-11-03 00:12:19 +00:00
parent 07f53b18fc
commit 36d8849813
8 changed files with 373 additions and 29 deletions

View File

@@ -2222,6 +2222,8 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
Ok(())
}
/// This function fills in missing block roots between last restore point slot and split
/// slot, if any.
pub fn heal_freezer_block_roots(&self) -> Result<(), Error> {
let split = self.get_split_info();
let last_restore_point_slot = (split.slot - 1) / self.config.slots_per_restore_point
@@ -2250,6 +2252,93 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
Ok(())
}
/// Delete *all* states from the freezer database and update the anchor accordingly.
///
/// WARNING: this method deletes the genesis state and replaces it with the provided
/// `genesis_state`. This is to support its use in schema migrations where the storage scheme of
/// the genesis state may be modified. It is the responsibility of the caller to ensure that the
/// genesis state is correct, else a corrupt database will be created.
pub fn prune_historic_states(
&self,
genesis_state_root: Hash256,
genesis_state: &BeaconState<E>,
) -> Result<(), Error> {
// Make sure there is no missing block roots before pruning
self.heal_freezer_block_roots()?;
// Update the anchor to use the dummy state upper limit and disable historic state storage.
let old_anchor = self.get_anchor_info();
let new_anchor = if let Some(old_anchor) = old_anchor.clone() {
AnchorInfo {
state_upper_limit: STATE_UPPER_LIMIT_NO_RETAIN,
state_lower_limit: Slot::new(0),
..old_anchor.clone()
}
} else {
AnchorInfo {
anchor_slot: Slot::new(0),
oldest_block_slot: Slot::new(0),
oldest_block_parent: Hash256::zero(),
state_upper_limit: STATE_UPPER_LIMIT_NO_RETAIN,
state_lower_limit: Slot::new(0),
}
};
// Commit the anchor change immediately: if the cold database ops fail they can always be
// retried, and we can't do them atomically with this change anyway.
self.compare_and_set_anchor_info_with_write(old_anchor, Some(new_anchor))?;
// Stage freezer data for deletion. Do not bother loading and deserializing values as this
// wastes time and is less schema-agnostic. My hope is that this method will be useful for
// migrating to the tree-states schema (delete everything in the freezer then start afresh).
let mut cold_ops = vec![];
let columns = [
DBColumn::BeaconState,
DBColumn::BeaconStateSummary,
DBColumn::BeaconRestorePoint,
DBColumn::BeaconStateRoots,
DBColumn::BeaconHistoricalRoots,
DBColumn::BeaconRandaoMixes,
DBColumn::BeaconHistoricalSummaries,
];
for column in columns {
for res in self.cold_db.iter_column_keys::<Vec<u8>>(column) {
let key = res?;
cold_ops.push(KeyValueStoreOp::DeleteKey(get_key_for_col(
column.as_str(),
&key,
)));
}
}
// XXX: We need to commit the mass deletion here *before* re-storing the genesis state, as
// the current schema performs reads as part of `store_cold_state`. This can be deleted
// once the target schema is tree-states. If the process is killed before the genesis state
// is written this can be fixed by re-running.
info!(
self.log,
"Deleting historic states";
"num_kv" => cold_ops.len(),
);
self.cold_db.do_atomically(std::mem::take(&mut cold_ops))?;
// If we just deleted the the genesis state, re-store it using the *current* schema, which
// may be different from the schema of the genesis state we just deleted.
if self.get_split_slot() > 0 {
info!(
self.log,
"Re-storing genesis state";
"state_root" => ?genesis_state_root,
);
self.store_cold_state(&genesis_state_root, genesis_state, &mut cold_ops)?;
self.cold_db.do_atomically(cold_ops)?;
}
Ok(())
}
}
/// Advance the split point of the store, moving new finalized states to the freezer.

View File

@@ -169,6 +169,7 @@ impl<E: EthSpec> KeyValueStore<E> for LevelDB<E> {
for (start_key, end_key) in [
endpoints(DBColumn::BeaconStateTemporary),
endpoints(DBColumn::BeaconState),
endpoints(DBColumn::BeaconStateSummary),
] {
self.db.compact(&start_key, &end_key);
}
@@ -225,9 +226,9 @@ impl<E: EthSpec> KeyValueStore<E> for LevelDB<E> {
}
/// Iterate through all keys and values in a particular column.
fn iter_column_keys(&self, column: DBColumn) -> ColumnKeyIter {
fn iter_column_keys<K: Key>(&self, column: DBColumn) -> ColumnKeyIter<K> {
let start_key =
BytesKey::from_vec(get_key_for_col(column.into(), Hash256::zero().as_bytes()));
BytesKey::from_vec(get_key_for_col(column.into(), &vec![0; column.key_size()]));
let iter = self.db.keys_iter(self.read_options());
iter.seek(&start_key);
@@ -235,13 +236,12 @@ impl<E: EthSpec> KeyValueStore<E> for LevelDB<E> {
Box::new(
iter.take_while(move |key| key.matches_column(column))
.map(move |bytes_key| {
let key =
bytes_key
.remove_column(column)
.ok_or(HotColdDBError::IterationError {
unexpected_key: bytes_key,
})?;
Ok(key)
let key = bytes_key.remove_column_variable(column).ok_or_else(|| {
HotColdDBError::IterationError {
unexpected_key: bytes_key.clone(),
}
})?;
K::from_bytes(key)
}),
)
}

View File

@@ -45,7 +45,7 @@ use strum::{EnumString, IntoStaticStr};
pub use types::*;
pub type ColumnIter<'a, K> = Box<dyn Iterator<Item = Result<(K, Vec<u8>), Error>> + 'a>;
pub type ColumnKeyIter<'a> = Box<dyn Iterator<Item = Result<Hash256, Error>> + 'a>;
pub type ColumnKeyIter<'a, K> = Box<dyn Iterator<Item = Result<K, Error>> + 'a>;
pub type RawEntryIter<'a> = Box<dyn Iterator<Item = Result<(Vec<u8>, Vec<u8>), Error>> + 'a>;
pub type RawKeyIter<'a> = Box<dyn Iterator<Item = Result<Vec<u8>, Error>> + 'a>;
@@ -88,6 +88,7 @@ pub trait KeyValueStore<E: EthSpec>: Sync + Send + Sized + 'static {
self.iter_column_from(column, &vec![0; column.key_size()])
}
/// Iterate through all keys and values in a column from a given starting point.
fn iter_column_from<K: Key>(&self, column: DBColumn, from: &[u8]) -> ColumnIter<K>;
fn iter_raw_entries(&self, _column: DBColumn, _prefix: &[u8]) -> RawEntryIter {
@@ -99,7 +100,7 @@ pub trait KeyValueStore<E: EthSpec>: Sync + Send + Sized + 'static {
}
/// Iterate through all keys in a particular column.
fn iter_column_keys(&self, column: DBColumn) -> ColumnKeyIter;
fn iter_column_keys<K: Key>(&self, column: DBColumn) -> ColumnKeyIter<K>;
}
pub trait Key: Sized + 'static {
@@ -274,7 +275,7 @@ impl DBColumn {
/// This function returns the number of bytes used by keys in a given column.
pub fn key_size(self) -> usize {
match self {
Self::OverflowLRUCache => 40,
Self::OverflowLRUCache => 33, // See `OverflowKey` encode impl.
Self::BeaconMeta
| Self::BeaconBlock
| Self::BeaconState

View File

@@ -100,7 +100,7 @@ impl<E: EthSpec> KeyValueStore<E> for MemoryStore<E> {
}))
}
fn iter_column_keys(&self, column: DBColumn) -> ColumnKeyIter {
fn iter_column_keys<K: Key>(&self, column: DBColumn) -> ColumnKeyIter<K> {
Box::new(self.iter_column(column).map(|res| res.map(|(k, _)| k)))
}