Implement tree states & hierarchical state DB

This commit is contained in:
Michael Sproul
2023-06-19 10:14:47 +10:00
parent 2bb62b7f7d
commit 23db089a7a
193 changed files with 6093 additions and 5925 deletions

View File

@@ -15,9 +15,7 @@
//! 2. There's a possibility that the head block is never built upon, causing wasted CPU cycles.
use crate::validator_monitor::HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS;
use crate::{
beacon_chain::{ATTESTATION_CACHE_LOCK_TIMEOUT, BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT},
chain_config::FORK_CHOICE_LOOKAHEAD_FACTOR,
snapshot_cache::StateAdvance,
beacon_chain::ATTESTATION_CACHE_LOCK_TIMEOUT, chain_config::FORK_CHOICE_LOOKAHEAD_FACTOR,
BeaconChain, BeaconChainError, BeaconChainTypes,
};
use slog::{debug, error, warn, Logger};
@@ -29,7 +27,7 @@ use std::sync::{
};
use task_executor::TaskExecutor;
use tokio::time::{sleep, sleep_until, Instant};
use types::{AttestationShufflingId, EthSpec, Hash256, RelativeEpoch, Slot};
use types::{AttestationShufflingId, BeaconStateError, EthSpec, Hash256, RelativeEpoch, Slot};
/// If the head slot is more than `MAX_ADVANCE_DISTANCE` from the current slot, then don't perform
/// the state advancement.
@@ -48,6 +46,8 @@ const MAX_FORK_CHOICE_DISTANCE: u64 = 256;
#[derive(Debug)]
enum Error {
BeaconChain(BeaconChainError),
BeaconState(BeaconStateError),
Store(store::Error),
HeadMissingFromSnapshotCache(Hash256),
MaxDistanceExceeded {
current_slot: Slot,
@@ -58,7 +58,7 @@ enum Error {
},
BadStateSlot {
_state_slot: Slot,
_block_slot: Slot,
_current_slot: Slot,
},
}
@@ -68,6 +68,18 @@ impl From<BeaconChainError> for Error {
}
}
impl From<BeaconStateError> for Error {
fn from(e: BeaconStateError) -> Self {
Self::BeaconState(e)
}
}
impl From<store::Error> for Error {
fn from(e: store::Error) -> Self {
Self::Store(e)
}
}
/// Provides a simple thread-safe lock to be used for task co-ordination. Practically equivalent to
/// `Mutex<()>`.
#[derive(Clone)]
@@ -263,11 +275,6 @@ async fn state_advance_timer<T: BeaconChainTypes>(
}
}
/// Reads the `snapshot_cache` from the `beacon_chain` and attempts to take a clone of the
/// `BeaconState` of the head block. If it obtains this clone, the state will be advanced a single
/// slot then placed back in the `snapshot_cache` to be used for block verification.
///
/// See the module-level documentation for rationale.
fn advance_head<T: BeaconChainTypes>(
beacon_chain: &Arc<BeaconChain<T>>,
log: &Logger,
@@ -290,46 +297,38 @@ fn advance_head<T: BeaconChainTypes>(
}
}
let head_root = beacon_chain.head_beacon_block_root();
let (head_slot, head_state_root, mut state) = match beacon_chain
.snapshot_cache
.try_write_for(BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT)
.ok_or(BeaconChainError::SnapshotCacheLockTimeout)?
.get_for_state_advance(head_root)
{
StateAdvance::AlreadyAdvanced => {
return Err(Error::StateAlreadyAdvanced {
block_root: head_root,
})
}
StateAdvance::BlockNotFound => return Err(Error::HeadMissingFromSnapshotCache(head_root)),
StateAdvance::State {
state,
state_root,
block_slot,
} => (block_slot, state_root, *state),
let (head_block_root, head_block_state_root) = {
let snapshot = beacon_chain.head_snapshot();
(snapshot.beacon_block_root, snapshot.beacon_state_root())
};
let initial_slot = state.slot();
let initial_epoch = state.current_epoch();
let (head_state_root, mut state) = beacon_chain
.store
.get_advanced_state(head_block_root, current_slot, head_block_state_root)?
.ok_or(Error::HeadMissingFromSnapshotCache(head_block_root))?;
let state_root = if state.slot() == head_slot {
Some(head_state_root)
} else {
if state.slot() == current_slot + 1 {
return Err(Error::StateAlreadyAdvanced {
block_root: head_block_root,
});
} else if state.slot() != current_slot {
// Protect against advancing a state more than a single slot.
//
// Advancing more than one slot without storing the intermediate state would corrupt the
// database. Future works might store temporary, intermediate states inside this function.
return Err(Error::BadStateSlot {
_block_slot: head_slot,
_state_slot: state.slot(),
_current_slot: current_slot,
});
};
}
let initial_slot = state.slot();
let initial_epoch = state.current_epoch();
// Advance the state a single slot.
if let Some(summary) = per_slot_processing(&mut state, state_root, &beacon_chain.spec)
.map_err(BeaconChainError::from)?
if let Some(summary) =
per_slot_processing(&mut state, Some(head_state_root), &beacon_chain.spec)
.map_err(BeaconChainError::from)?
{
// Expose Prometheus metrics.
if let Err(e) = summary.observe_metrics() {
@@ -363,7 +362,7 @@ fn advance_head<T: BeaconChainTypes>(
debug!(
log,
"Advanced head state one slot";
"head_root" => ?head_root,
"head_block_root" => ?head_block_root,
"state_slot" => state.slot(),
"current_slot" => current_slot,
);
@@ -382,14 +381,14 @@ fn advance_head<T: BeaconChainTypes>(
if initial_epoch < state.current_epoch() {
// Update the proposer cache.
//
// We supply the `head_root` as the decision block since the prior `if` statement guarantees
// We supply the `head_block_root` as the decision block since the prior `if` statement guarantees
// the head root is the latest block from the prior epoch.
beacon_chain
.beacon_proposer_cache
.lock()
.insert(
state.current_epoch(),
head_root,
head_block_root,
state
.get_beacon_proposer_indices(&beacon_chain.spec)
.map_err(BeaconChainError::from)?,
@@ -398,8 +397,9 @@ fn advance_head<T: BeaconChainTypes>(
.map_err(BeaconChainError::from)?;
// Update the attester cache.
let shuffling_id = AttestationShufflingId::new(head_root, &state, RelativeEpoch::Next)
.map_err(BeaconChainError::from)?;
let shuffling_id =
AttestationShufflingId::new(head_block_root, &state, RelativeEpoch::Next)
.map_err(BeaconChainError::from)?;
let committee_cache = state
.committee_cache(RelativeEpoch::Next)
.map_err(BeaconChainError::from)?;
@@ -412,7 +412,7 @@ fn advance_head<T: BeaconChainTypes>(
debug!(
log,
"Primed proposer and attester caches";
"head_root" => ?head_root,
"head_block_root" => ?head_block_root,
"next_epoch_shuffling_root" => ?shuffling_id.shuffling_decision_block,
"state_epoch" => state.current_epoch(),
"current_epoch" => current_slot.epoch(T::EthSpec::slots_per_epoch()),
@@ -422,44 +422,19 @@ fn advance_head<T: BeaconChainTypes>(
// Apply the state to the attester cache, if the cache deems it interesting.
beacon_chain
.attester_cache
.maybe_cache_state(&state, head_root, &beacon_chain.spec)
.maybe_cache_state(&state, head_block_root, &beacon_chain.spec)
.map_err(BeaconChainError::from)?;
let final_slot = state.slot();
// Insert the advanced state back into the snapshot cache.
beacon_chain
.snapshot_cache
.try_write_for(BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT)
.ok_or(BeaconChainError::SnapshotCacheLockTimeout)?
.update_pre_state(head_root, state)
.ok_or(Error::HeadMissingFromSnapshotCache(head_root))?;
// If we have moved into the next slot whilst processing the state then this function is going
// to become ineffective and likely become a hindrance as we're stealing the tree hash cache
// from the snapshot cache (which may force the next block to rebuild a new one).
//
// If this warning occurs very frequently on well-resourced machines then we should consider
// starting it earlier in the slot. Otherwise, it's a good indication that the machine is too
// slow/overloaded and will be useful information for the user.
let starting_slot = current_slot;
let current_slot = beacon_chain.slot()?;
if starting_slot < current_slot {
warn!(
log,
"State advance too slow";
"head_root" => %head_root,
"advanced_slot" => final_slot,
"current_slot" => current_slot,
"starting_slot" => starting_slot,
"msg" => "system resources may be overloaded",
);
}
// Write the advanced state to the database.
let advanced_state_root = state.update_tree_hash_cache()?;
beacon_chain.store.put_state(&advanced_state_root, &state)?;
debug!(
log,
"Completed state advance";
"head_root" => ?head_root,
"head_block_root" => ?head_block_root,
"advanced_slot" => final_slot,
"initial_slot" => initial_slot,
);