In-memory tree states (#5533)

* Consensus changes

* EF tests

* lcli

* common and watch

* account manager

* cargo

* fork choice

* promise cache

* beacon chain

* interop genesis

* http api

* lighthouse

* op pool

* beacon chain misc

* parallel state cache

* store

* fix issues in store

* IT COMPILES

* Remove some unnecessary module qualification

* Revert Arced pubkey optimization (#5536)

* Merge remote-tracking branch 'origin/unstable' into tree-states-memory

* Fix caching, rebasing and some tests

* Remove unused deps

* Merge remote-tracking branch 'origin/unstable' into tree-states-memory

* Small cleanups

* Revert shuffling cache/promise cache changes

* Fix state advance bugs

* Fix shuffling tests

* Remove some resolved FIXMEs

* Remove StateProcessingStrategy

* Optimise withdrawals calculation

* Don't reorg if state cache is missed

* Remove inconsistent state func

* Fix beta compiler

* Rebase early, rebase often

* Fix state caching behaviour

* Update to milhouse release

* Fix on-disk consensus context format

* Merge remote-tracking branch 'origin/unstable' into tree-states-memory

* Squashed commit of the following:

commit 3a16649023
Author: Michael Sproul <michael@sigmaprime.io>
Date:   Thu Apr 18 14:26:09 2024 +1000

    Fix on-disk consensus context format

* Keep indexed attestations, thanks Sean

* Merge branch 'on-disk-consensus-context' into tree-states-memory

* Merge branch 'unstable' into tree-states-memory

* Address half of Sean's review

* More simplifications from Sean's review

* Cache state after get_advanced_hot_state
This commit is contained in:
Michael Sproul
2024-04-24 11:22:36 +10:00
committed by GitHub
parent 4cad1fcbbe
commit 61962898e2
108 changed files with 2038 additions and 2762 deletions

View File

@@ -48,6 +48,7 @@
// returned alongside.
#![allow(clippy::result_large_err)]
use crate::beacon_snapshot::PreProcessingSnapshot;
use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob};
use crate::block_verification_types::{
AsBlock, BlockContentsError, BlockImportData, GossipVerifiedBlockContents, RpcBlock,
@@ -59,14 +60,10 @@ use crate::execution_payload::{
AllowOptimisticImport, NotifyExecutionLayer, PayloadNotifier,
};
use crate::observed_block_producers::SeenBlock;
use crate::snapshot_cache::PreProcessingSnapshot;
use crate::validator_monitor::HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS;
use crate::validator_pubkey_cache::ValidatorPubkeyCache;
use crate::{
beacon_chain::{
BeaconForkChoice, ForkChoiceError, BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT,
VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT,
},
beacon_chain::{BeaconForkChoice, ForkChoiceError, VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT},
metrics, BeaconChain, BeaconChainError, BeaconChainTypes,
};
use derivative::Derivative;
@@ -86,22 +83,21 @@ use state_processing::{
block_signature_verifier::{BlockSignatureVerifier, Error as BlockSignatureVerifierError},
per_block_processing, per_slot_processing,
state_advance::partial_state_advance,
BlockProcessingError, BlockSignatureStrategy, ConsensusContext, SlotProcessingError,
StateProcessingStrategy, VerifyBlockRoot,
AllCaches, BlockProcessingError, BlockSignatureStrategy, ConsensusContext, SlotProcessingError,
VerifyBlockRoot,
};
use std::borrow::Cow;
use std::fmt::Debug;
use std::fs;
use std::io::Write;
use std::sync::Arc;
use std::time::Duration;
use store::{Error as DBError, HotStateSummary, KeyValueStore, StoreOp};
use task_executor::JoinHandle;
use tree_hash::TreeHash;
use types::{
BeaconBlockRef, BeaconState, BeaconStateError, ChainSpec, CloneConfig, Epoch, EthSpec,
ExecutionBlockHash, Hash256, InconsistentFork, PublicKey, PublicKeyBytes, RelativeEpoch,
SignedBeaconBlock, SignedBeaconBlockHeader, Slot,
BeaconBlockRef, BeaconState, BeaconStateError, ChainSpec, Epoch, EthSpec, ExecutionBlockHash,
Hash256, InconsistentFork, PublicKey, PublicKeyBytes, RelativeEpoch, SignedBeaconBlock,
SignedBeaconBlockHeader, Slot,
};
use types::{BlobSidecar, ExecPayload};
@@ -578,7 +574,7 @@ pub fn signature_verify_chain_segment<T: BeaconChainTypes>(
}
let (first_root, first_block) = chain_segment.remove(0);
let (mut parent, first_block) = load_parent(first_root, first_block, chain)?;
let (mut parent, first_block) = load_parent(first_block, chain)?;
let slot = first_block.slot();
chain_segment.insert(0, (first_root, first_block));
@@ -893,7 +889,7 @@ impl<T: BeaconChainTypes> GossipVerifiedBlock<T> {
} else {
// The proposer index was *not* cached and we must load the parent in order to determine
// the proposer index.
let (mut parent, block) = load_parent(block_root, block, chain)?;
let (mut parent, block) = load_parent(block, chain)?;
debug!(
chain.log,
@@ -1042,7 +1038,7 @@ impl<T: BeaconChainTypes> SignatureVerifiedBlock<T> {
// Check the anchor slot before loading the parent, to avoid spurious lookups.
check_block_against_anchor_slot(block.message(), chain)?;
let (mut parent, block) = load_parent(block_root, block, chain)?;
let (mut parent, block) = load_parent(block, chain)?;
let state = cheap_state_advance_to_obtain_committees::<_, BlockError<T::EthSpec>>(
&mut parent.pre_state,
@@ -1092,7 +1088,7 @@ impl<T: BeaconChainTypes> SignatureVerifiedBlock<T> {
let (mut parent, block) = if let Some(parent) = from.parent {
(parent, from.block)
} else {
load_parent(from.block_root, from.block, chain)?
load_parent(from.block, chain)?
};
let state = cheap_state_advance_to_obtain_committees::<_, BlockError<T::EthSpec>>(
@@ -1154,7 +1150,7 @@ impl<T: BeaconChainTypes> IntoExecutionPendingBlock<T> for SignatureVerifiedBloc
let (parent, block) = if let Some(parent) = self.parent {
(parent, self.block)
} else {
load_parent(self.block_root, self.block, chain)
load_parent(self.block, chain)
.map_err(|e| BlockSlashInfo::SignatureValid(header.clone(), e))?
};
@@ -1386,8 +1382,18 @@ impl<T: BeaconChainTypes> ExecutionPendingBlock<T> {
let catchup_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_CATCHUP_STATE);
// Stage a batch of operations to be completed atomically if this block is imported
// successfully.
let mut confirmed_state_roots = vec![];
// successfully. We include the state root of the pre-state, which may be an advanced state
// that was stored in the DB with a `temporary` flag.
let mut state = parent.pre_state;
let mut confirmed_state_roots = if state.slot() > parent.beacon_block.slot() {
// Advanced pre-state. Delete its temporary flag.
let pre_state_root = state.update_tree_hash_cache()?;
vec![pre_state_root]
} else {
// Pre state is parent state. It is already stored in the DB without temporary status.
vec![]
};
// The block must have a higher slot than its parent.
if block.slot() <= parent.beacon_block.slot() {
@@ -1397,14 +1403,6 @@ impl<T: BeaconChainTypes> ExecutionPendingBlock<T> {
});
}
let mut summaries = vec![];
// Transition the parent state to the block slot.
//
// It is important to note that we're using a "pre-state" here, one that has potentially
// been advanced one slot forward from `parent.beacon_block.slot`.
let mut state = parent.pre_state;
// Perform a sanity check on the pre-state.
let parent_slot = parent.beacon_block.slot();
if state.slot() < parent_slot || state.slot() > block.slot() {
@@ -1423,6 +1421,12 @@ impl<T: BeaconChainTypes> ExecutionPendingBlock<T> {
eth1_deposit_index: state.eth1_deposit_index(),
};
// Transition the parent state to the block slot.
//
// It is important to note that we're using a "pre-state" here, one that has potentially
// been advanced one slot forward from `parent.beacon_block.slot`.
let mut summaries = vec![];
let distance = block.slot().as_u64().saturating_sub(state.slot().as_u64());
for _ in 0..distance {
let state_root = if parent.beacon_block.slot() == state.slot() {
@@ -1522,8 +1526,7 @@ impl<T: BeaconChainTypes> ExecutionPendingBlock<T> {
let committee_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_COMMITTEE);
state.build_committee_cache(RelativeEpoch::Previous, &chain.spec)?;
state.build_committee_cache(RelativeEpoch::Current, &chain.spec)?;
state.build_all_committee_caches(&chain.spec)?;
metrics::stop_timer(committee_timer);
@@ -1564,7 +1567,6 @@ impl<T: BeaconChainTypes> ExecutionPendingBlock<T> {
block.as_block(),
// Signatures were verified earlier in this function.
BlockSignatureStrategy::NoVerification,
StateProcessingStrategy::Accurate,
VerifyBlockRoot::True,
&mut consensus_context,
&chain.spec,
@@ -1839,12 +1841,9 @@ fn verify_parent_block_is_known<T: BeaconChainTypes>(
/// whilst attempting the operation.
#[allow(clippy::type_complexity)]
fn load_parent<T: BeaconChainTypes, B: AsBlock<T::EthSpec>>(
block_root: Hash256,
block: B,
chain: &BeaconChain<T>,
) -> Result<(PreProcessingSnapshot<T::EthSpec>, B), BlockError<T::EthSpec>> {
let spec = &chain.spec;
// Reject any block if its parent is not known to fork choice.
//
// A block that is not in fork choice is either:
@@ -1863,45 +1862,10 @@ fn load_parent<T: BeaconChainTypes, B: AsBlock<T::EthSpec>>(
return Err(BlockError::ParentUnknown(block.into_rpc_block()));
}
let block_delay = chain
.block_times_cache
.read()
.get_block_delays(
block_root,
chain
.slot_clock
.start_of(block.slot())
.unwrap_or_else(|| Duration::from_secs(0)),
)
.observed;
let db_read_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_DB_READ);
let result = if let Some((snapshot, cloned)) = chain
.snapshot_cache
.try_write_for(BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT)
.and_then(|mut snapshot_cache| {
snapshot_cache.get_state_for_block_processing(
block.parent_root(),
block.slot(),
block_delay,
spec,
)
}) {
if cloned {
metrics::inc_counter(&metrics::BLOCK_PROCESSING_SNAPSHOT_CACHE_CLONES);
debug!(
chain.log,
"Cloned snapshot for late block/skipped slot";
"slot" => %block.slot(),
"parent_slot" => %snapshot.beacon_block.slot(),
"parent_root" => ?block.parent_root(),
"block_delay" => ?block_delay,
);
}
Ok((snapshot, block))
} else {
// Load the blocks parent block from the database, returning invalid if that block is not
let result = {
// Load the block's parent block from the database, returning invalid if that block is not
// found.
//
// We don't return a DBInconsistent error here since it's possible for a block to
@@ -1925,7 +1889,7 @@ fn load_parent<T: BeaconChainTypes, B: AsBlock<T::EthSpec>>(
// Retrieve any state that is advanced through to at most `block.slot()`: this is
// particularly important if `block` descends from the finalized/split block, but at a slot
// prior to the finalized slot (which is invalid and inaccessible in our DB schema).
let (parent_state_root, parent_state) = chain
let (parent_state_root, state) = chain
.store
.get_advanced_hot_state(root, block.slot(), parent_block.state_root())?
.ok_or_else(|| {
@@ -1934,22 +1898,46 @@ fn load_parent<T: BeaconChainTypes, B: AsBlock<T::EthSpec>>(
)
})?;
metrics::inc_counter(&metrics::BLOCK_PROCESSING_SNAPSHOT_CACHE_MISSES);
debug!(
chain.log,
"Missed snapshot cache";
"slot" => block.slot(),
"parent_slot" => parent_block.slot(),
"parent_root" => ?block.parent_root(),
"block_delay" => ?block_delay,
);
if !state.all_caches_built() {
debug!(
chain.log,
"Parent state lacks built caches";
"block_slot" => block.slot(),
"state_slot" => state.slot(),
);
}
if block.slot() != state.slot() {
debug!(
chain.log,
"Parent state is not advanced";
"block_slot" => block.slot(),
"state_slot" => state.slot(),
);
}
let beacon_state_root = if state.slot() == parent_block.slot() {
// Sanity check.
if parent_state_root != parent_block.state_root() {
return Err(BeaconChainError::DBInconsistent(format!(
"Parent state at slot {} has the wrong state root: {:?} != {:?}",
state.slot(),
parent_state_root,
parent_block.state_root()
))
.into());
}
Some(parent_block.state_root())
} else {
None
};
Ok((
PreProcessingSnapshot {
beacon_block: parent_block,
beacon_block_root: root,
pre_state: parent_state,
beacon_state_root: Some(parent_state_root),
pre_state: state,
beacon_state_root,
},
block,
))
@@ -2030,7 +2018,7 @@ pub fn cheap_state_advance_to_obtain_committees<'a, E: EthSpec, Err: BlockBlobEr
} else if state.slot() > block_slot {
Err(Err::not_later_than_parent_error(block_slot, state.slot()))
} else {
let mut state = state.clone_with(CloneConfig::committee_caches_only());
let mut state = state.clone();
let target_slot = block_epoch.start_slot(E::slots_per_epoch());
// Advance the state into the same epoch as the block. Use the "partial" method since state