mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-10 12:11:59 +00:00
Initial Commit of Retrospective OTB Verification (#3372)
## Issue Addressed * #2983 ## Proposed Changes Basically followed the [instructions laid out here](https://github.com/sigp/lighthouse/issues/2983#issuecomment-1062494947) Co-authored-by: Paul Hauner <paul@paulhauner.com> Co-authored-by: ethDreamer <37123614+ethDreamer@users.noreply.github.com>
This commit is contained in:
@@ -137,6 +137,9 @@ const MAX_PER_SLOT_FORK_CHOICE_DISTANCE: u64 = 4;
|
||||
pub const INVALID_JUSTIFIED_PAYLOAD_SHUTDOWN_REASON: &str =
|
||||
"Justified block has an invalid execution payload.";
|
||||
|
||||
pub const INVALID_FINALIZED_MERGE_TRANSITION_BLOCK_SHUTDOWN_REASON: &str =
|
||||
"Finalized merge transition block is invalid.";
|
||||
|
||||
/// Defines the behaviour when a block/block-root for a skipped slot is requested.
|
||||
pub enum WhenSlotSkipped {
|
||||
/// If the slot is a skip slot, return `None`.
|
||||
@@ -528,6 +531,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
|
||||
/// Even more efficient variant of `forwards_iter_block_roots` that will avoid cloning the head
|
||||
/// state if it isn't required for the requested range of blocks.
|
||||
/// The range [start_slot, end_slot] is inclusive (ie `start_slot <= end_slot`)
|
||||
pub fn forwards_iter_block_roots_until(
|
||||
&self,
|
||||
start_slot: Slot,
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
//! ```
|
||||
use crate::execution_payload::{
|
||||
is_optimistic_candidate_block, validate_execution_payload_for_gossip, validate_merge_block,
|
||||
PayloadNotifier,
|
||||
AllowOptimisticImport, PayloadNotifier,
|
||||
};
|
||||
use crate::snapshot_cache::PreProcessingSnapshot;
|
||||
use crate::validator_monitor::HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS;
|
||||
@@ -1199,7 +1199,7 @@ impl<T: BeaconChainTypes> ExecutionPendingBlock<T> {
|
||||
// - Doing the check here means we can keep our fork-choice implementation "pure". I.e., no
|
||||
// calls to remote servers.
|
||||
if is_valid_merge_transition_block {
|
||||
validate_merge_block(&chain, block.message()).await?;
|
||||
validate_merge_block(&chain, block.message(), AllowOptimisticImport::Yes).await?;
|
||||
};
|
||||
|
||||
// The specification declares that this should be run *inside* `per_block_processing`,
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
//! So, this module contains functions that one might expect to find in other crates, but they live
|
||||
//! here for good reason.
|
||||
|
||||
use crate::otb_verification_service::OptimisticTransitionBlock;
|
||||
use crate::{
|
||||
BeaconChain, BeaconChainError, BeaconChainTypes, BlockError, BlockProductionError,
|
||||
ExecutionPayloadError,
|
||||
@@ -27,6 +28,12 @@ use types::*;
|
||||
pub type PreparePayloadResult<Payload> = Result<Payload, BlockProductionError>;
|
||||
pub type PreparePayloadHandle<Payload> = JoinHandle<Option<PreparePayloadResult<Payload>>>;
|
||||
|
||||
#[derive(PartialEq)]
|
||||
pub enum AllowOptimisticImport {
|
||||
Yes,
|
||||
No,
|
||||
}
|
||||
|
||||
/// Used to await the result of executing payload with a remote EE.
|
||||
pub struct PayloadNotifier<T: BeaconChainTypes> {
|
||||
pub chain: Arc<BeaconChain<T>>,
|
||||
@@ -146,6 +153,7 @@ async fn notify_new_payload<'a, T: BeaconChainTypes>(
|
||||
pub async fn validate_merge_block<'a, T: BeaconChainTypes>(
|
||||
chain: &Arc<BeaconChain<T>>,
|
||||
block: BeaconBlockRef<'a, T::EthSpec>,
|
||||
allow_optimistic_import: AllowOptimisticImport,
|
||||
) -> Result<(), BlockError<T::EthSpec>> {
|
||||
let spec = &chain.spec;
|
||||
let block_epoch = block.slot().epoch(T::EthSpec::slots_per_epoch());
|
||||
@@ -188,13 +196,18 @@ pub async fn validate_merge_block<'a, T: BeaconChainTypes>(
|
||||
}
|
||||
.into()),
|
||||
None => {
|
||||
if is_optimistic_candidate_block(chain, block.slot(), block.parent_root()).await? {
|
||||
if allow_optimistic_import == AllowOptimisticImport::Yes
|
||||
&& is_optimistic_candidate_block(chain, block.slot(), block.parent_root()).await?
|
||||
{
|
||||
debug!(
|
||||
chain.log,
|
||||
"Optimistically accepting terminal block";
|
||||
"Optimistically importing merge transition block";
|
||||
"block_hash" => ?execution_payload.parent_hash(),
|
||||
"msg" => "the terminal block/parent was unavailable"
|
||||
);
|
||||
// Store Optimistic Transition Block in Database for later Verification
|
||||
OptimisticTransitionBlock::from_block(block)
|
||||
.persist_in_store::<T, _>(&chain.store)?;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ExecutionPayloadError::UnverifiedNonOptimisticCandidate.into())
|
||||
|
||||
@@ -28,6 +28,7 @@ mod observed_aggregates;
|
||||
mod observed_attesters;
|
||||
mod observed_block_producers;
|
||||
pub mod observed_operations;
|
||||
pub mod otb_verification_service;
|
||||
mod persisted_beacon_chain;
|
||||
mod persisted_fork_choice;
|
||||
mod pre_finalization_cache;
|
||||
@@ -45,6 +46,7 @@ mod validator_pubkey_cache;
|
||||
pub use self::beacon_chain::{
|
||||
AttestationProcessingOutcome, BeaconChain, BeaconChainTypes, BeaconStore, ChainSegmentResult,
|
||||
CountUnrealized, ForkChoiceError, ProduceBlockVerification, StateSkipConfig, WhenSlotSkipped,
|
||||
INVALID_FINALIZED_MERGE_TRANSITION_BLOCK_SHUTDOWN_REASON,
|
||||
INVALID_JUSTIFIED_PAYLOAD_SHUTDOWN_REASON, MAXIMUM_GOSSIP_CLOCK_DISPARITY,
|
||||
};
|
||||
pub use self::beacon_snapshot::BeaconSnapshot;
|
||||
|
||||
378
beacon_node/beacon_chain/src/otb_verification_service.rs
Normal file
378
beacon_node/beacon_chain/src/otb_verification_service.rs
Normal file
@@ -0,0 +1,378 @@
|
||||
use crate::execution_payload::{validate_merge_block, AllowOptimisticImport};
|
||||
use crate::{
|
||||
BeaconChain, BeaconChainError, BeaconChainTypes, BlockError, ExecutionPayloadError,
|
||||
INVALID_FINALIZED_MERGE_TRANSITION_BLOCK_SHUTDOWN_REASON,
|
||||
};
|
||||
use itertools::process_results;
|
||||
use proto_array::InvalidationOperation;
|
||||
use slog::{crit, debug, error, info, warn};
|
||||
use slot_clock::SlotClock;
|
||||
use ssz::{Decode, Encode};
|
||||
use ssz_derive::{Decode, Encode};
|
||||
use state_processing::per_block_processing::is_merge_transition_complete;
|
||||
use std::sync::Arc;
|
||||
use store::{DBColumn, Error as StoreError, HotColdDB, KeyValueStore, StoreItem};
|
||||
use task_executor::{ShutdownReason, TaskExecutor};
|
||||
use tokio::time::sleep;
|
||||
use tree_hash::TreeHash;
|
||||
use types::{BeaconBlockRef, EthSpec, Hash256, Slot};
|
||||
use DBColumn::OptimisticTransitionBlock as OTBColumn;
|
||||
|
||||
#[derive(Clone, Debug, Decode, Encode, PartialEq)]
|
||||
pub struct OptimisticTransitionBlock {
|
||||
root: Hash256,
|
||||
slot: Slot,
|
||||
}
|
||||
|
||||
impl OptimisticTransitionBlock {
|
||||
// types::BeaconBlockRef<'_, <T as BeaconChainTypes>::EthSpec>
|
||||
pub fn from_block<E: EthSpec>(block: BeaconBlockRef<E>) -> Self {
|
||||
Self {
|
||||
root: block.tree_hash_root(),
|
||||
slot: block.slot(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn root(&self) -> &Hash256 {
|
||||
&self.root
|
||||
}
|
||||
|
||||
pub fn slot(&self) -> &Slot {
|
||||
&self.slot
|
||||
}
|
||||
|
||||
pub fn persist_in_store<T, A>(&self, store: A) -> Result<(), StoreError>
|
||||
where
|
||||
T: BeaconChainTypes,
|
||||
A: AsRef<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
|
||||
{
|
||||
if store
|
||||
.as_ref()
|
||||
.item_exists::<OptimisticTransitionBlock>(&self.root)?
|
||||
{
|
||||
Ok(())
|
||||
} else {
|
||||
store.as_ref().put_item(&self.root, self)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_from_store<T, A>(&self, store: A) -> Result<(), StoreError>
|
||||
where
|
||||
T: BeaconChainTypes,
|
||||
A: AsRef<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
|
||||
{
|
||||
store
|
||||
.as_ref()
|
||||
.hot_db
|
||||
.key_delete(OTBColumn.into(), self.root.as_bytes())
|
||||
}
|
||||
|
||||
fn is_canonical<T: BeaconChainTypes>(
|
||||
&self,
|
||||
chain: &BeaconChain<T>,
|
||||
) -> Result<bool, BeaconChainError> {
|
||||
Ok(chain
|
||||
.forwards_iter_block_roots_until(self.slot, self.slot)?
|
||||
.next()
|
||||
.transpose()?
|
||||
.map(|(root, _)| root)
|
||||
== Some(self.root))
|
||||
}
|
||||
}
|
||||
|
||||
impl StoreItem for OptimisticTransitionBlock {
|
||||
fn db_column() -> DBColumn {
|
||||
OTBColumn
|
||||
}
|
||||
|
||||
fn as_store_bytes(&self) -> Vec<u8> {
|
||||
self.as_ssz_bytes()
|
||||
}
|
||||
|
||||
fn from_store_bytes(bytes: &[u8]) -> Result<Self, StoreError> {
|
||||
Ok(Self::from_ssz_bytes(bytes)?)
|
||||
}
|
||||
}
|
||||
|
||||
/// The routine is expected to run once per epoch, 1/4th through the epoch.
|
||||
pub const EPOCH_DELAY_FACTOR: u32 = 4;
|
||||
|
||||
/// Spawns a routine which checks the validity of any optimistically imported transition blocks
|
||||
///
|
||||
/// This routine will run once per epoch, at `epoch_duration / EPOCH_DELAY_FACTOR` after
|
||||
/// the start of each epoch.
|
||||
///
|
||||
/// The service will not be started if there is no `execution_layer` on the `chain`.
|
||||
pub fn start_otb_verification_service<T: BeaconChainTypes>(
|
||||
executor: TaskExecutor,
|
||||
chain: Arc<BeaconChain<T>>,
|
||||
) {
|
||||
// Avoid spawning the service if there's no EL, it'll just error anyway.
|
||||
if chain.execution_layer.is_some() {
|
||||
executor.spawn(
|
||||
async move { otb_verification_service(chain).await },
|
||||
"otb_verification_service",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_optimistic_transition_blocks<T: BeaconChainTypes>(
|
||||
chain: &BeaconChain<T>,
|
||||
) -> Result<Vec<OptimisticTransitionBlock>, StoreError> {
|
||||
process_results(chain.store.hot_db.iter_column(OTBColumn), |iter| {
|
||||
iter.map(|(_, bytes)| OptimisticTransitionBlock::from_store_bytes(&bytes))
|
||||
.collect()
|
||||
})?
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
ForkChoice(String),
|
||||
BeaconChain(BeaconChainError),
|
||||
StoreError(StoreError),
|
||||
NoBlockFound(OptimisticTransitionBlock),
|
||||
}
|
||||
|
||||
pub async fn validate_optimistic_transition_blocks<T: BeaconChainTypes>(
|
||||
chain: &Arc<BeaconChain<T>>,
|
||||
otbs: Vec<OptimisticTransitionBlock>,
|
||||
) -> Result<(), Error> {
|
||||
let finalized_slot = chain
|
||||
.canonical_head
|
||||
.fork_choice_read_lock()
|
||||
.get_finalized_block()
|
||||
.map_err(|e| Error::ForkChoice(format!("{:?}", e)))?
|
||||
.slot;
|
||||
|
||||
// separate otbs into
|
||||
// non-canonical
|
||||
// finalized canonical
|
||||
// unfinalized canonical
|
||||
let mut non_canonical_otbs = vec![];
|
||||
let (finalized_canonical_otbs, unfinalized_canonical_otbs) = process_results(
|
||||
otbs.into_iter().map(|otb| {
|
||||
otb.is_canonical(chain)
|
||||
.map(|is_canonical| (otb, is_canonical))
|
||||
}),
|
||||
|pair_iter| {
|
||||
pair_iter
|
||||
.filter_map(|(otb, is_canonical)| {
|
||||
if is_canonical {
|
||||
Some(otb)
|
||||
} else {
|
||||
non_canonical_otbs.push(otb);
|
||||
None
|
||||
}
|
||||
})
|
||||
.partition::<Vec<_>, _>(|otb| *otb.slot() <= finalized_slot)
|
||||
},
|
||||
)
|
||||
.map_err(Error::BeaconChain)?;
|
||||
|
||||
// remove non-canonical blocks that conflict with finalized checkpoint from the database
|
||||
for otb in non_canonical_otbs {
|
||||
if *otb.slot() <= finalized_slot {
|
||||
otb.remove_from_store::<T, _>(&chain.store)
|
||||
.map_err(Error::StoreError)?;
|
||||
}
|
||||
}
|
||||
|
||||
// ensure finalized canonical otb are valid, otherwise kill client
|
||||
for otb in finalized_canonical_otbs {
|
||||
match chain.get_block(otb.root()).await {
|
||||
Ok(Some(block)) => {
|
||||
match validate_merge_block(chain, block.message(), AllowOptimisticImport::No).await
|
||||
{
|
||||
Ok(()) => {
|
||||
// merge transition block is valid, remove it from OTB
|
||||
otb.remove_from_store::<T, _>(&chain.store)
|
||||
.map_err(Error::StoreError)?;
|
||||
info!(
|
||||
chain.log,
|
||||
"Validated merge transition block";
|
||||
"block_root" => ?otb.root(),
|
||||
"type" => "finalized"
|
||||
);
|
||||
}
|
||||
// The block was not able to be verified by the EL. Leave the OTB in the
|
||||
// database since the EL is likely still syncing and may verify the block
|
||||
// later.
|
||||
Err(BlockError::ExecutionPayloadError(
|
||||
ExecutionPayloadError::UnverifiedNonOptimisticCandidate,
|
||||
)) => (),
|
||||
Err(BlockError::ExecutionPayloadError(
|
||||
ExecutionPayloadError::InvalidTerminalPoWBlock { .. },
|
||||
)) => {
|
||||
// Finalized Merge Transition Block is Invalid! Kill the Client!
|
||||
crit!(
|
||||
chain.log,
|
||||
"Finalized merge transition block is invalid!";
|
||||
"msg" => "You must use the `--purge-db` flag to clear the database and restart sync. \
|
||||
You may be on a hostile network.",
|
||||
"block_hash" => ?block.canonical_root()
|
||||
);
|
||||
let mut shutdown_sender = chain.shutdown_sender();
|
||||
if let Err(e) = shutdown_sender.try_send(ShutdownReason::Failure(
|
||||
INVALID_FINALIZED_MERGE_TRANSITION_BLOCK_SHUTDOWN_REASON,
|
||||
)) {
|
||||
crit!(
|
||||
chain.log,
|
||||
"Failed to shut down client";
|
||||
"error" => ?e,
|
||||
"shutdown_reason" => INVALID_FINALIZED_MERGE_TRANSITION_BLOCK_SHUTDOWN_REASON
|
||||
);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(None) => return Err(Error::NoBlockFound(otb)),
|
||||
// Our database has pruned the payload and the payload was unavailable on the EL since
|
||||
// the EL is still syncing or the payload is non-canonical.
|
||||
Err(BeaconChainError::BlockHashMissingFromExecutionLayer(_)) => (),
|
||||
Err(e) => return Err(Error::BeaconChain(e)),
|
||||
}
|
||||
}
|
||||
|
||||
// attempt to validate any non-finalized canonical otb blocks
|
||||
for otb in unfinalized_canonical_otbs {
|
||||
match chain.get_block(otb.root()).await {
|
||||
Ok(Some(block)) => {
|
||||
match validate_merge_block(chain, block.message(), AllowOptimisticImport::No).await
|
||||
{
|
||||
Ok(()) => {
|
||||
// merge transition block is valid, remove it from OTB
|
||||
otb.remove_from_store::<T, _>(&chain.store)
|
||||
.map_err(Error::StoreError)?;
|
||||
info!(
|
||||
chain.log,
|
||||
"Validated merge transition block";
|
||||
"block_root" => ?otb.root(),
|
||||
"type" => "not finalized"
|
||||
);
|
||||
}
|
||||
// The block was not able to be verified by the EL. Leave the OTB in the
|
||||
// database since the EL is likely still syncing and may verify the block
|
||||
// later.
|
||||
Err(BlockError::ExecutionPayloadError(
|
||||
ExecutionPayloadError::UnverifiedNonOptimisticCandidate,
|
||||
)) => (),
|
||||
Err(BlockError::ExecutionPayloadError(
|
||||
ExecutionPayloadError::InvalidTerminalPoWBlock { .. },
|
||||
)) => {
|
||||
// Unfinalized Merge Transition Block is Invalid -> Run process_invalid_execution_payload
|
||||
warn!(
|
||||
chain.log,
|
||||
"Merge transition block invalid";
|
||||
"block_root" => ?otb.root()
|
||||
);
|
||||
chain
|
||||
.process_invalid_execution_payload(
|
||||
&InvalidationOperation::InvalidateOne {
|
||||
block_root: *otb.root(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
warn!(
|
||||
chain.log,
|
||||
"Error checking merge transition block";
|
||||
"error" => ?e,
|
||||
"location" => "process_invalid_execution_payload"
|
||||
);
|
||||
Error::BeaconChain(e)
|
||||
})?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(None) => return Err(Error::NoBlockFound(otb)),
|
||||
// Our database has pruned the payload and the payload was unavailable on the EL since
|
||||
// the EL is still syncing or the payload is non-canonical.
|
||||
Err(BeaconChainError::BlockHashMissingFromExecutionLayer(_)) => (),
|
||||
Err(e) => return Err(Error::BeaconChain(e)),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Loop until any optimistically imported merge transition blocks have been verified and
|
||||
/// the merge has been finalized.
|
||||
async fn otb_verification_service<T: BeaconChainTypes>(chain: Arc<BeaconChain<T>>) {
|
||||
let epoch_duration = chain.slot_clock.slot_duration() * T::EthSpec::slots_per_epoch() as u32;
|
||||
loop {
|
||||
match chain
|
||||
.slot_clock
|
||||
.duration_to_next_epoch(T::EthSpec::slots_per_epoch())
|
||||
{
|
||||
Some(duration) => {
|
||||
let additional_delay = epoch_duration / EPOCH_DELAY_FACTOR;
|
||||
sleep(duration + additional_delay).await;
|
||||
|
||||
debug!(
|
||||
chain.log,
|
||||
"OTB verification service firing";
|
||||
);
|
||||
|
||||
if !is_merge_transition_complete(
|
||||
&chain.canonical_head.cached_head().snapshot.beacon_state,
|
||||
) {
|
||||
// We are pre-merge. Nothing to do yet.
|
||||
continue;
|
||||
}
|
||||
|
||||
// load all optimistically imported transition blocks from the database
|
||||
match load_optimistic_transition_blocks(chain.as_ref()) {
|
||||
Ok(otbs) => {
|
||||
if otbs.is_empty() {
|
||||
if chain
|
||||
.canonical_head
|
||||
.fork_choice_read_lock()
|
||||
.get_finalized_block()
|
||||
.map_or(false, |block| {
|
||||
block.execution_status.is_execution_enabled()
|
||||
})
|
||||
{
|
||||
// there are no optimistic blocks in the database, we can exit
|
||||
// the service since the merge transition is finalized and we'll
|
||||
// never see another transition block
|
||||
break;
|
||||
} else {
|
||||
debug!(
|
||||
chain.log,
|
||||
"No optimistic transition blocks";
|
||||
"info" => "waiting for the merge transition to finalize"
|
||||
)
|
||||
}
|
||||
}
|
||||
if let Err(e) = validate_optimistic_transition_blocks(&chain, otbs).await {
|
||||
warn!(
|
||||
chain.log,
|
||||
"Error while validating optimistic transition blocks";
|
||||
"error" => ?e
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
chain.log,
|
||||
"Error loading optimistic transition blocks";
|
||||
"error" => ?e
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
None => {
|
||||
error!(chain.log, "Failed to read slot clock");
|
||||
// If we can't read the slot clock, just wait another slot.
|
||||
sleep(chain.slot_clock.slot_duration()).await;
|
||||
}
|
||||
};
|
||||
}
|
||||
debug!(
|
||||
chain.log,
|
||||
"No optimistic transition blocks in database";
|
||||
"msg" => "shutting down OTB verification service"
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user