Repurpose the pubkey cache for validator de-dupe

This commit is contained in:
Michael Sproul
2022-09-28 15:08:30 +10:00
parent 9a1799f235
commit ea599a6d7f
12 changed files with 184 additions and 113 deletions

View File

@@ -30,3 +30,4 @@ tree_hash = "0.4.0"
take-until = "0.1.0"
zstd = "0.10.0"
strum = { version = "0.24.0", features = ["derive"] }
bls = { path = "../../crypto/bls" }

View File

@@ -59,7 +59,12 @@ pub enum Error {
AddPayloadLogicError,
ResyncRequiredForExecutionPayloadSeparation,
SlotClockUnavailableForMigration,
MissingImmutableValidator(usize),
V9MigrationFailure(Hash256),
ValidatorPubkeyCacheError(String),
DuplicateValidatorPublicKey,
InvalidValidatorPubkeyBytes(bls::Error),
ValidatorPubkeyCacheUninitialized,
}
pub trait HandleUnavailable<T> {

View File

@@ -20,7 +20,7 @@ use crate::metrics;
use crate::state_cache::{PutStateOutcome, StateCache};
use crate::{
get_key_for_col, DBColumn, DatabaseBlock, Error, ItemStore, KeyValueStoreOp,
PartialBeaconState, StoreItem, StoreOp,
PartialBeaconState, StoreItem, StoreOp, ValidatorPubkeyCache,
};
use itertools::process_results;
use leveldb::iterator::LevelDBIterator;
@@ -36,13 +36,14 @@ use state_processing::{
block_replayer::PreSlotHook, BlockProcessingError, BlockReplayer, SlotProcessingError,
};
use std::cmp::min;
use std::convert::TryInto;
use std::io::Read;
use std::marker::PhantomData;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use types::*;
use types::{beacon_state::BeaconStateDiff, EthSpec};
use zstd::Decoder;
pub const MAX_PARENT_STATES_TO_CACHE: u64 = 32;
@@ -70,6 +71,8 @@ pub struct HotColdDB<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> {
block_cache: Mutex<LruCache<Hash256, SignedBeaconBlock<E>>>,
/// Cache of beacon states.
state_cache: Mutex<StateCache<E>>,
/// Immutable validator cache.
pub immutable_validators: Arc<RwLock<ValidatorPubkeyCache<E, Hot, Cold>>>,
/// Chain spec.
pub(crate) spec: ChainSpec,
/// Logger.
@@ -141,6 +144,7 @@ impl<E: EthSpec> HotColdDB<E, MemoryStore<E>, MemoryStore<E>> {
hot_db: MemoryStore::open(),
block_cache: Mutex::new(LruCache::new(config.block_cache_size)),
state_cache: Mutex::new(StateCache::new(config.state_cache_size)),
immutable_validators: Arc::new(RwLock::new(Default::default())),
config,
spec,
log,
@@ -176,6 +180,7 @@ impl<E: EthSpec> HotColdDB<E, LevelDB<E>, LevelDB<E>> {
hot_db: LevelDB::open(hot_path)?,
block_cache: Mutex::new(LruCache::new(config.block_cache_size)),
state_cache: Mutex::new(StateCache::new(config.state_cache_size)),
immutable_validators: Arc::new(RwLock::new(Default::default())),
config,
spec,
log,
@@ -217,6 +222,11 @@ impl<E: EthSpec> HotColdDB<E, LevelDB<E>, LevelDB<E>> {
);
}
// Load validator pubkey cache.
// FIXME(sproul): probably breaks migrations, etc
let pubkey_cache = ValidatorPubkeyCache::load_from_store(&db)?;
*db.immutable_validators.write() = pubkey_cache;
// Ensure that the schema version of the on-disk database matches the software.
// If the version is mismatched, an automatic migration will be attempted.
let db = Arc::new(db);
@@ -1100,7 +1110,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
// 1. Convert to PartialBeaconState and store that in the DB.
let partial_state = PartialBeaconState::from_state_forgetful(state);
let op = partial_state.as_kv_store_op(*state_root);
let op = partial_state.as_kv_store_op(*state_root, &self.config)?;
ops.push(op);
// 2. Store updated vector entries.
@@ -1151,12 +1161,19 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
/// Load a restore point state by its `state_root`.
fn load_restore_point(&self, state_root: &Hash256) -> Result<BeaconState<E>, Error> {
let partial_state_bytes = self
let bytes = self
.cold_db
.get_bytes(DBColumn::BeaconState.into(), state_root.as_bytes())?
.ok_or(HotColdDBError::MissingRestorePoint(*state_root))?;
let mut ssz_bytes = Vec::with_capacity(self.config.estimate_decompressed_size(bytes.len()));
let mut decoder = Decoder::new(&*bytes).map_err(Error::Compression)?;
decoder
.read_to_end(&mut ssz_bytes)
.map_err(Error::Compression)?;
let mut partial_state: PartialBeaconState<E> =
PartialBeaconState::from_ssz_bytes(&partial_state_bytes, &self.spec)?;
PartialBeaconState::from_ssz_bytes(&ssz_bytes, &self.spec)?;
// Fill in the fields of the partial state.
partial_state.load_block_roots(&self.cold_db, &self.spec)?;
@@ -1164,7 +1181,10 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
partial_state.load_historical_roots(&self.cold_db, &self.spec)?;
partial_state.load_randao_mixes(&self.cold_db, &self.spec)?;
partial_state.try_into()
let pubkey_cache = self.immutable_validators.read();
let immutable_validators = |i: usize| pubkey_cache.get_validator(i);
partial_state.try_into_full_state(immutable_validators)
}
/// Load a restore point state by its `restore_point_index`.

View File

@@ -28,6 +28,7 @@ mod partial_beacon_state;
pub mod reconstruct;
mod state_cache;
mod state_diff;
pub mod validator_pubkey_cache;
pub mod iter;
@@ -45,6 +46,7 @@ use parking_lot::MutexGuard;
use std::sync::Arc;
use strum::{EnumString, IntoStaticStr};
pub use types::*;
pub use validator_pubkey_cache::ValidatorPubkeyCache;
pub type ColumnIter<'a> = Box<dyn Iterator<Item = Result<(Hash256, Vec<u8>), Error>> + 'a>;
pub type ColumnKeyIter<'a> = Box<dyn Iterator<Item = Result<Hash256, Error>> + 'a>;

View File

@@ -2,13 +2,15 @@ use crate::chunked_vector::{
load_variable_list_from_db, load_vector_from_db, BlockRoots, HistoricalRoots, RandaoMixes,
StateRoots,
};
use crate::{get_key_for_col, DBColumn, Error, KeyValueStore, KeyValueStoreOp};
use crate::{get_key_for_col, DBColumn, Error, KeyValueStore, KeyValueStoreOp, StoreConfig};
use itertools::process_results;
use ssz::{Decode, DecodeError, Encode};
use ssz_derive::{Decode, Encode};
use std::convert::TryInto;
use std::io::Write;
use std::sync::Arc;
use types::superstruct;
use types::*;
use zstd::Encoder;
/// Lightweight variant of the `BeaconState` that is stored in the database.
///
@@ -47,7 +49,7 @@ where
pub eth1_deposit_index: u64,
// Registry
pub validators: VList<Validator, T::ValidatorRegistryLimit>,
pub validators: Vec<ValidatorMutable>,
pub balances: VList<u64, T::ValidatorRegistryLimit>,
// Shuffling
@@ -114,7 +116,10 @@ macro_rules! impl_from_state_forgetful {
eth1_deposit_index: $s.eth1_deposit_index,
// Validator registry
validators: $s.validators.clone(),
validators: $s.validators.into_iter().map(|validator| {
validator.mutable.clone()
})
.collect(),
balances: $s.balances.clone(),
// Shuffling
@@ -204,9 +209,23 @@ impl<T: EthSpec> PartialBeaconState<T> {
}
/// Prepare the partial state for storage in the KV database.
pub fn as_kv_store_op(&self, state_root: Hash256) -> KeyValueStoreOp {
pub fn as_kv_store_op(
&self,
state_root: Hash256,
config: &StoreConfig,
) -> Result<KeyValueStoreOp, Error> {
let db_key = get_key_for_col(DBColumn::BeaconState.into(), state_root.as_bytes());
KeyValueStoreOp::PutKeyValue(db_key, self.as_ssz_bytes())
let ssz_bytes = self.as_ssz_bytes();
let mut compressed_value =
Vec::with_capacity(config.estimate_compressed_size(ssz_bytes.len()));
let mut encoder = Encoder::new(&mut compressed_value, config.compression_level)
.map_err(Error::Compression)?;
encoder.write_all(&ssz_bytes).map_err(Error::Compression)?;
encoder.finish().map_err(Error::Compression)?;
Ok(KeyValueStoreOp::PutKeyValue(db_key, compressed_value))
}
pub fn load_block_roots<S: KeyValueStore<T>>(
@@ -278,7 +297,7 @@ impl<T: EthSpec> PartialBeaconState<T> {
/// Implement the conversion from PartialBeaconState -> BeaconState.
macro_rules! impl_try_into_beacon_state {
($inner:ident, $variant_name:ident, $struct_name:ident, [$($extra_fields:ident),*]) => {
($inner:ident, $variant_name:ident, $struct_name:ident, $immutable_validators:ident, [$($extra_fields:ident),*]) => {
BeaconState::$variant_name($struct_name {
// Versioning
genesis_time: $inner.genesis_time,
@@ -298,7 +317,16 @@ macro_rules! impl_try_into_beacon_state {
eth1_deposit_index: $inner.eth1_deposit_index,
// Validator registry
validators: $inner.validators,
validators: process_results($inner.validators.into_iter().enumerate().map(|(i, mutable)| {
$immutable_validators(i)
.ok_or(Error::MissingImmutableValidator(i))
.map(move |immutable| {
Validator {
immutable,
mutable
}
})
}), |iter| VList::try_from_iter(iter))??,
balances: $inner.balances,
// Shuffling
@@ -331,21 +359,24 @@ fn unpack_field<T>(x: Option<T>) -> Result<T, Error> {
x.ok_or(Error::PartialBeaconStateError)
}
impl<E: EthSpec> TryInto<BeaconState<E>> for PartialBeaconState<E> {
type Error = Error;
fn try_into(self) -> Result<BeaconState<E>, Error> {
impl<E: EthSpec> PartialBeaconState<E> {
pub fn try_into_full_state<F>(self, immutable_validators: F) -> Result<BeaconState<E>, Error>
where
F: Fn(usize) -> Option<Arc<ValidatorImmutable>>,
{
let state = match self {
PartialBeaconState::Base(inner) => impl_try_into_beacon_state!(
inner,
Base,
BeaconStateBase,
immutable_validators,
[previous_epoch_attestations, current_epoch_attestations]
),
PartialBeaconState::Altair(inner) => impl_try_into_beacon_state!(
inner,
Altair,
BeaconStateAltair,
immutable_validators,
[
previous_epoch_participation,
current_epoch_participation,
@@ -358,6 +389,7 @@ impl<E: EthSpec> TryInto<BeaconState<E>> for PartialBeaconState<E> {
inner,
Merge,
BeaconStateMerge,
immutable_validators,
[
previous_epoch_participation,
current_epoch_participation,

View File

@@ -0,0 +1,333 @@
use crate::{DBColumn, Error, HotColdDB, ItemStore, StoreItem};
use ssz::{Decode, Encode};
use std::collections::HashMap;
use std::convert::TryInto;
use std::marker::PhantomData;
use std::sync::Arc;
use types::{BeaconState, EthSpec, Hash256, PublicKey, PublicKeyBytes, ValidatorImmutable};
/// Provides a mapping of `validator_index -> validator_publickey`.
///
/// This cache exists for two reasons:
///
/// 1. To avoid reading a `BeaconState` from disk each time we need a public key.
/// 2. To reduce the amount of public key _decompression_ required. A `BeaconState` stores public
/// keys in compressed form and they are needed in decompressed form for signature verification.
/// Decompression is expensive when many keys are involved.
///
/// The cache has a `backing` that it uses to maintain a persistent, on-disk
/// copy of itself. This allows it to be restored between process invocations.
#[derive(Debug)]
pub struct ValidatorPubkeyCache<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> {
pubkeys: Vec<PublicKey>,
indices: HashMap<PublicKeyBytes, usize>,
validators: Vec<Arc<ValidatorImmutable>>,
_phantom: PhantomData<(E, Hot, Cold)>,
}
// Temp value.
impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> Default
for ValidatorPubkeyCache<E, Hot, Cold>
{
fn default() -> Self {
ValidatorPubkeyCache {
pubkeys: vec![],
indices: HashMap::new(),
validators: vec![],
_phantom: PhantomData,
}
}
}
impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> ValidatorPubkeyCache<E, Hot, Cold> {
/// Create a new public key cache using the keys in `state.validators`.
///
/// Also creates a new persistence file, returning an error if there is already a file at
/// `persistence_path`.
pub fn new(state: &BeaconState<E>, store: &HotColdDB<E, Hot, Cold>) -> Result<Self, Error> {
let mut cache = Self {
pubkeys: vec![],
indices: HashMap::new(),
validators: vec![],
_phantom: PhantomData,
};
cache.import_new_pubkeys(state, store)?;
Ok(cache)
}
/// Load the pubkey cache from the given on-disk database.
pub fn load_from_store(store: &HotColdDB<E, Hot, Cold>) -> Result<Self, Error> {
let mut pubkeys = vec![];
let mut indices = HashMap::new();
let mut validators = vec![];
for validator_index in 0.. {
if let Some(DatabaseValidator(validator)) =
store.get_item(&DatabaseValidator::key_for_index(validator_index))?
{
pubkeys.push(
(&validator.pubkey)
.try_into()
.map_err(|e| Error::ValidatorPubkeyCacheError(format!("{:?}", e)))?,
);
indices.insert(validator.pubkey, validator_index);
validators.push(validator);
} else {
break;
}
}
Ok(ValidatorPubkeyCache {
pubkeys,
indices,
validators,
_phantom: PhantomData,
})
}
/// Scan the given `state` and add any new validator public keys.
///
/// Does not delete any keys from `self` if they don't appear in `state`.
pub fn import_new_pubkeys(
&mut self,
state: &BeaconState<E>,
store: &HotColdDB<E, Hot, Cold>,
) -> Result<(), Error> {
if state.validators().len() > self.validators.len() {
self.import(
state
.validators()
.iter_from(self.pubkeys.len())
.unwrap() // FIXME(sproul)
.map(|v| v.immutable.clone()),
store,
)
} else {
Ok(())
}
}
/// Adds zero or more validators to `self`.
fn import<I>(&mut self, validator_keys: I, store: &HotColdDB<E, Hot, Cold>) -> Result<(), Error>
where
I: Iterator<Item = Arc<ValidatorImmutable>> + ExactSizeIterator,
{
self.validators.reserve(validator_keys.len());
self.pubkeys.reserve(validator_keys.len());
self.indices.reserve(validator_keys.len());
for validator in validator_keys {
let i = self.pubkeys.len();
if self.indices.contains_key(&validator.pubkey) {
return Err(Error::DuplicateValidatorPublicKey);
}
// The item is written to disk _before_ it is written into
// the local struct.
//
// This means that a pubkey cache read from disk will always be equivalent to or
// _later than_ the cache that was running in the previous instance of Lighthouse.
//
// The motivation behind this ordering is that we do not want to have states that
// reference a pubkey that is not in our cache. However, it's fine to have pubkeys
// that are never referenced in a state.
store.put_item(
&DatabaseValidator::key_for_index(i),
&DatabaseValidator(validator.clone()),
)?;
self.pubkeys.push(
(&validator.pubkey)
.try_into()
.map_err(Error::InvalidValidatorPubkeyBytes)?,
);
self.indices.insert(validator.pubkey, i);
self.validators.push(validator);
}
Ok(())
}
/// Get the public key for a validator with index `i`.
pub fn get(&self, i: usize) -> Option<&PublicKey> {
self.pubkeys.get(i)
}
/// Get the immutable validator with index `i`.
pub fn get_validator(&self, i: usize) -> Option<Arc<ValidatorImmutable>> {
self.validators.get(i).cloned()
}
/// Get the `PublicKey` for a validator with `PublicKeyBytes`.
pub fn get_pubkey_from_pubkey_bytes(&self, pubkey: &PublicKeyBytes) -> Option<&PublicKey> {
self.get_index(pubkey).and_then(|index| self.get(index))
}
/// Get the public key (in bytes form) for a validator with index `i`.
pub fn get_pubkey_bytes(&self, i: usize) -> Option<&PublicKeyBytes> {
self.validators.get(i).map(|validator| &validator.pubkey)
}
/// Get the index of a validator with `pubkey`.
pub fn get_index(&self, pubkey: &PublicKeyBytes) -> Option<usize> {
self.indices.get(pubkey).copied()
}
/// Returns the number of validators in the cache.
pub fn len(&self) -> usize {
self.indices.len()
}
/// Returns `true` if there are no validators in the cache.
pub fn is_empty(&self) -> bool {
self.indices.is_empty()
}
}
/// Wrapper for a public key stored in the database.
///
/// Keyed by the validator index as `Hash256::from_low_u64_be(index)`.
struct DatabaseValidator(Arc<ValidatorImmutable>);
impl StoreItem for DatabaseValidator {
fn db_column() -> DBColumn {
DBColumn::PubkeyCache
}
fn as_store_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(self.0.as_ssz_bytes())
}
fn from_store_bytes(bytes: &[u8]) -> Result<Self, Error> {
Ok(Self(Arc::new(ValidatorImmutable::from_ssz_bytes(bytes)?)))
}
}
impl DatabaseValidator {
fn key_for_index(index: usize) -> Hash256 {
Hash256::from_low_u64_be(index as u64)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test_utils::{BeaconChainHarness, EphemeralHarnessType};
use logging::test_logger;
use std::sync::Arc;
use store::HotColdDB;
use types::{BeaconState, EthSpec, Keypair, MainnetEthSpec};
type E = MainnetEthSpec;
type T = EphemeralHarnessType<E>;
fn get_state(validator_count: usize) -> (BeaconState<E>, Vec<Keypair>) {
let harness = BeaconChainHarness::builder(MainnetEthSpec)
.default_spec()
.deterministic_keypairs(validator_count)
.fresh_ephemeral_store()
.build();
harness.advance_slot();
(harness.get_current_state(), harness.validator_keypairs)
}
fn get_store() -> BeaconStore<T> {
Arc::new(
HotColdDB::open_ephemeral(<_>::default(), E::default_spec(), test_logger()).unwrap(),
)
}
#[allow(clippy::needless_range_loop)]
fn check_cache_get(cache: &ValidatorPubkeyCache<T>, keypairs: &[Keypair]) {
let validator_count = keypairs.len();
for i in 0..validator_count + 1 {
if i < validator_count {
let pubkey = cache.get(i).expect("pubkey should be present");
assert_eq!(pubkey, &keypairs[i].pk, "pubkey should match cache");
let pubkey_bytes: PublicKeyBytes = pubkey.clone().into();
assert_eq!(
i,
cache
.get_index(&pubkey_bytes)
.expect("should resolve index"),
"index should match cache"
);
} else {
assert_eq!(
cache.get(i),
None,
"should not get pubkey for out of bounds index",
);
}
}
}
#[test]
fn basic_operation() {
let (state, keypairs) = get_state(8);
let store = get_store();
let mut cache = ValidatorPubkeyCache::new(&state, store).expect("should create cache");
check_cache_get(&cache, &keypairs[..]);
// Try adding a state with the same number of keypairs.
let (state, keypairs) = get_state(8);
cache
.import_new_pubkeys(&state)
.expect("should import pubkeys");
check_cache_get(&cache, &keypairs[..]);
// Try adding a state with less keypairs.
let (state, _) = get_state(1);
cache
.import_new_pubkeys(&state)
.expect("should import pubkeys");
check_cache_get(&cache, &keypairs[..]);
// Try adding a state with more keypairs.
let (state, keypairs) = get_state(12);
cache
.import_new_pubkeys(&state)
.expect("should import pubkeys");
check_cache_get(&cache, &keypairs[..]);
}
#[test]
fn persistence() {
let (state, keypairs) = get_state(8);
let store = get_store();
// Create a new cache.
let cache = ValidatorPubkeyCache::new(&state, store.clone()).expect("should create cache");
check_cache_get(&cache, &keypairs[..]);
drop(cache);
// Re-init the cache from the store.
let mut cache =
ValidatorPubkeyCache::load_from_store(store.clone()).expect("should open cache");
check_cache_get(&cache, &keypairs[..]);
// Add some more keypairs.
let (state, keypairs) = get_state(12);
cache
.import_new_pubkeys(&state)
.expect("should import pubkeys");
check_cache_get(&cache, &keypairs[..]);
drop(cache);
// Re-init the cache from the store.
let cache = ValidatorPubkeyCache::load_from_store(store).expect("should open cache");
check_cache_get(&cache, &keypairs[..]);
}
}