Fix block backfill with genesis skip slots (#4820)

## Issue Addressed

Closes #4817.

## Proposed Changes

- Fill in the linear block roots array between 0 and the slot of the first block (e.g. slots 0 and 1 on Holesky).
- Backport the `--freezer`, `--skip` and `--limit` options for `lighthouse db inspect` from tree-states. This allows us to easily view the database corruption of 4817 using `lighthouse db inspect --network holesky --freezer --column bbr --output values --limit 2`.
- Backport the `iter_column_from` change and `MemoryStore` overhaul from tree-states. These are required to enable `lighthouse db inspect`.
- Rework `freezer_upper_limit` to allow state lookups for slots below the `state_lower_limit`. Currently state lookups will fail until state reconstruction completes entirely.

There is a new regression test for the main bug, but no test for the `freezer_upper_limit` fix because we don't currently support running state reconstruction partially (see #3026). This will be fixed once we merge `tree-states`! In lieu of an automated test, I've tested manually on a Holesky node while it was reconstructing.

## Additional Info

Users who backfilled Holesky to slot 0 (e.g. using `--reconstruct-historic-states`) need to either:

- Re-sync from genesis.
- Re-sync using checkpoint sync and the changes from this PR.

Due to the recency of the Holesky genesis, writing a custom pass to fix up broken databases (which would require its own thorough testing) was deemed unnecessary. This is the primary reason for this PR being marked `backwards-incompat`.

This will create few conflicts with Deneb, which I've already resolved on `tree-states-deneb` and will be happy to backport to Deneb once this PR is merged to unstable.
This commit is contained in:
Michael Sproul
2023-10-27 05:08:49 +00:00
parent b82d1a993c
commit c574f8136e
11 changed files with 207 additions and 101 deletions

View File

@@ -1,17 +1,17 @@
use super::{Error, ItemStore, KeyValueStore, KeyValueStoreOp};
use crate::{ColumnIter, DBColumn};
use crate::{
get_key_for_col, leveldb_store::BytesKey, ColumnIter, ColumnKeyIter, DBColumn, Error,
ItemStore, Key, KeyValueStore, KeyValueStoreOp,
};
use parking_lot::{Mutex, MutexGuard, RwLock};
use std::collections::{HashMap, HashSet};
use std::collections::BTreeMap;
use std::marker::PhantomData;
use types::*;
type DBHashMap = HashMap<Vec<u8>, Vec<u8>>;
type DBKeyMap = HashMap<Vec<u8>, HashSet<Vec<u8>>>;
type DBMap = BTreeMap<BytesKey, Vec<u8>>;
/// A thread-safe `HashMap` wrapper.
/// A thread-safe `BTreeMap` wrapper.
pub struct MemoryStore<E: EthSpec> {
db: RwLock<DBHashMap>,
col_keys: RwLock<DBKeyMap>,
db: RwLock<DBMap>,
transaction_mutex: Mutex<()>,
_phantom: PhantomData<E>,
}
@@ -20,36 +20,24 @@ impl<E: EthSpec> MemoryStore<E> {
/// Create a new, empty database.
pub fn open() -> Self {
Self {
db: RwLock::new(HashMap::new()),
col_keys: RwLock::new(HashMap::new()),
db: RwLock::new(BTreeMap::new()),
transaction_mutex: Mutex::new(()),
_phantom: PhantomData,
}
}
fn get_key_for_col(col: &str, key: &[u8]) -> Vec<u8> {
let mut col = col.as_bytes().to_vec();
col.append(&mut key.to_vec());
col
}
}
impl<E: EthSpec> KeyValueStore<E> for MemoryStore<E> {
/// Get the value of some key from the database. Returns `None` if the key does not exist.
fn get_bytes(&self, col: &str, key: &[u8]) -> Result<Option<Vec<u8>>, Error> {
let column_key = Self::get_key_for_col(col, key);
let column_key = BytesKey::from_vec(get_key_for_col(col, key));
Ok(self.db.read().get(&column_key).cloned())
}
/// Puts a key in the database.
fn put_bytes(&self, col: &str, key: &[u8], val: &[u8]) -> Result<(), Error> {
let column_key = Self::get_key_for_col(col, key);
let column_key = BytesKey::from_vec(get_key_for_col(col, key));
self.db.write().insert(column_key, val.to_vec());
self.col_keys
.write()
.entry(col.as_bytes().to_vec())
.or_default()
.insert(key.to_vec());
Ok(())
}
@@ -64,18 +52,14 @@ impl<E: EthSpec> KeyValueStore<E> for MemoryStore<E> {
/// Return true if some key exists in some column.
fn key_exists(&self, col: &str, key: &[u8]) -> Result<bool, Error> {
let column_key = Self::get_key_for_col(col, key);
let column_key = BytesKey::from_vec(get_key_for_col(col, key));
Ok(self.db.read().contains_key(&column_key))
}
/// Delete some key from the database.
fn key_delete(&self, col: &str, key: &[u8]) -> Result<(), Error> {
let column_key = Self::get_key_for_col(col, key);
let column_key = BytesKey::from_vec(get_key_for_col(col, key));
self.db.write().remove(&column_key);
self.col_keys
.write()
.get_mut(&col.as_bytes().to_vec())
.map(|set| set.remove(key));
Ok(())
}
@@ -83,35 +67,41 @@ impl<E: EthSpec> KeyValueStore<E> for MemoryStore<E> {
for op in batch {
match op {
KeyValueStoreOp::PutKeyValue(key, value) => {
self.db.write().insert(key, value);
self.db.write().insert(BytesKey::from_vec(key), value);
}
KeyValueStoreOp::DeleteKey(hash) => {
self.db.write().remove(&hash);
KeyValueStoreOp::DeleteKey(key) => {
self.db.write().remove(&BytesKey::from_vec(key));
}
}
}
Ok(())
}
// pub type ColumnIter<'a> = Box<dyn Iterator<Item = Result<(Hash256, Vec<u8>), Error>> + 'a>;
fn iter_column(&self, column: DBColumn) -> ColumnIter {
fn iter_column_from<K: Key>(&self, column: DBColumn, from: &[u8]) -> ColumnIter<K> {
// We use this awkward pattern because we can't lock the `self.db` field *and* maintain a
// reference to the lock guard across calls to `.next()`. This would be require a
// struct with a field (the iterator) which references another field (the lock guard).
let start_key = BytesKey::from_vec(get_key_for_col(column.as_str(), from));
let col = column.as_str();
if let Some(keys) = self
.col_keys
let keys = self
.db
.read()
.get(col.as_bytes())
.map(|set| set.iter().cloned().collect::<Vec<_>>())
{
Box::new(keys.into_iter().filter_map(move |key| {
let hash = Hash256::from_slice(&key);
self.get_bytes(col, &key)
.transpose()
.map(|res| res.map(|bytes| (hash, bytes)))
}))
} else {
Box::new(std::iter::empty())
}
.range(start_key..)
.take_while(|(k, _)| k.remove_column_variable(column).is_some())
.filter_map(|(k, _)| k.remove_column_variable(column).map(|k| k.to_vec()))
.collect::<Vec<_>>();
Box::new(keys.into_iter().filter_map(move |key| {
self.get_bytes(col, &key).transpose().map(|res| {
let k = K::from_bytes(&key)?;
let v = res?;
Ok((k, v))
})
}))
}
fn iter_column_keys(&self, column: DBColumn) -> ColumnKeyIter {
Box::new(self.iter_column(column).map(|res| res.map(|(k, _)| k)))
}
fn begin_rw_transaction(&self) -> MutexGuard<()> {