diff --git a/Cargo.lock b/Cargo.lock index 46cd2d96f0..faad708f41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2235,6 +2235,7 @@ dependencies = [ "eth2_ssz", "eth2_ssz_derive", "proto_array", + "slog", "state_processing", "store", "tokio", diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index fdcd3eed88..e1d7a5cfc2 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -58,7 +58,7 @@ use execution_layer::{ }; use fork_choice::{ AttestationFromBlock, ExecutionStatus, ForkChoice, ForkchoiceUpdateParameters, - InvalidationOperation, PayloadVerificationStatus, + InvalidationOperation, PayloadVerificationStatus, ResetPayloadStatuses, }; use futures::channel::mpsc::Sender; use itertools::process_results; @@ -432,7 +432,9 @@ impl BeaconChain { /// Load fork choice from disk, returning `None` if it isn't found. pub fn load_fork_choice( store: BeaconStore, + reset_payload_statuses: ResetPayloadStatuses, spec: &ChainSpec, + log: &Logger, ) -> Result>, Error> { let persisted_fork_choice = match store.get_item::(&FORK_CHOICE_DB_KEY)? { @@ -445,8 +447,10 @@ impl BeaconChain { Ok(Some(ForkChoice::from_persisted( persisted_fork_choice.fork_choice, + reset_payload_statuses, fc_store, spec, + log, )?)) } @@ -2925,10 +2929,15 @@ impl BeaconChain { // Since the write failed, try to revert the canonical head back to what was stored // in the database. This attempts to prevent inconsistency between the database and // fork choice. - if let Err(e) = - self.canonical_head - .restore_from_store(fork_choice, &self.store, &self.spec) - { + if let Err(e) = self.canonical_head.restore_from_store( + fork_choice, + ResetPayloadStatuses::always_reset_conditionally( + self.config.always_reset_payload_statuses, + ), + &self.store, + &self.spec, + &self.log, + ) { crit!( self.log, "No stored fork choice found to restore from"; diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index cba9a56982..2704690442 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -17,7 +17,7 @@ use crate::{ }; use eth1::Config as Eth1Config; use execution_layer::ExecutionLayer; -use fork_choice::ForkChoice; +use fork_choice::{ForkChoice, ResetPayloadStatuses}; use futures::channel::mpsc::Sender; use operation_pool::{OperationPool, PersistedOperationPool}; use parking_lot::RwLock; @@ -245,7 +245,11 @@ where let fork_choice = BeaconChain::>::load_fork_choice( store.clone(), + ResetPayloadStatuses::always_reset_conditionally( + self.chain_config.always_reset_payload_statuses, + ), &self.spec, + log, ) .map_err(|e| format!("Unable to load fork choice from disk: {:?}", e))? .ok_or("Fork choice not found in store")?; diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index 6559487980..166ba85720 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -43,7 +43,9 @@ use crate::{ BeaconChain, BeaconChainError as Error, BeaconChainTypes, BeaconSnapshot, }; use eth2::types::{EventKind, SseChainReorg, SseFinalizedCheckpoint, SseHead, SseLateHead}; -use fork_choice::{ExecutionStatus, ForkChoiceView, ForkchoiceUpdateParameters, ProtoBlock}; +use fork_choice::{ + ExecutionStatus, ForkChoiceView, ForkchoiceUpdateParameters, ProtoBlock, ResetPayloadStatuses, +}; use itertools::process_results; use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; use slog::{crit, debug, error, warn, Logger}; @@ -249,11 +251,14 @@ impl CanonicalHead { // and it needs to be dropped to prevent a dead-lock. Requiring it to be passed here is // defensive programming. mut fork_choice_write_lock: RwLockWriteGuard>, + reset_payload_statuses: ResetPayloadStatuses, store: &BeaconStore, spec: &ChainSpec, + log: &Logger, ) -> Result<(), Error> { - let fork_choice = >::load_fork_choice(store.clone(), spec)? - .ok_or(Error::MissingPersistedForkChoice)?; + let fork_choice = + >::load_fork_choice(store.clone(), reset_payload_statuses, spec, log)? + .ok_or(Error::MissingPersistedForkChoice)?; let fork_choice_view = fork_choice.cached_fork_choice_view(); let beacon_block_root = fork_choice_view.head_block_root; let beacon_block = store diff --git a/beacon_node/beacon_chain/src/chain_config.rs b/beacon_node/beacon_chain/src/chain_config.rs index ba3a0b628c..ad2b7abe5a 100644 --- a/beacon_node/beacon_chain/src/chain_config.rs +++ b/beacon_node/beacon_chain/src/chain_config.rs @@ -34,7 +34,12 @@ pub struct ChainConfig { pub builder_fallback_epochs_since_finalization: usize, /// Whether any chain health checks should be considered when deciding whether to use the builder API. pub builder_fallback_disable_checks: bool, + /// When set to `true`, weigh the "unrealized" FFG progression when choosing a head in fork + /// choice. pub count_unrealized: bool, + /// When set to `true`, forget any valid/invalid/optimistic statuses in fork choice during start + /// up. + pub always_reset_payload_statuses: bool, /// Whether to apply paranoid checks to blocks proposed by this beacon node. pub paranoid_block_proposal: bool, } @@ -54,6 +59,7 @@ impl Default for ChainConfig { builder_fallback_epochs_since_finalization: 3, builder_fallback_disable_checks: false, count_unrealized: true, + always_reset_payload_statuses: false, paranoid_block_proposal: false, } } diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 3df95a0a5d..fe2afb0213 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -786,4 +786,12 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { .takes_value(true) .default_value("true") ) + .arg( + Arg::with_name("reset-payload-statuses") + .long("reset-payload-statuses") + .help("When present, Lighthouse will forget the payload statuses of any \ + already-imported blocks. This can assist in the recovery from a consensus \ + failure caused by the execution layer.") + .takes_value(false) + ) } diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index f08981b103..caa10f555d 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -644,6 +644,9 @@ pub fn get_config( client_config.chain.count_unrealized = clap_utils::parse_required(cli_args, "count-unrealized")?; + client_config.chain.always_reset_payload_statuses = + cli_args.is_present("reset-payload-statuses"); + client_config.chain.paranoid_block_proposal = cli_args.is_present("paranoid-block-proposal"); /* diff --git a/consensus/fork_choice/Cargo.toml b/consensus/fork_choice/Cargo.toml index b2570092e6..52a738351e 100644 --- a/consensus/fork_choice/Cargo.toml +++ b/consensus/fork_choice/Cargo.toml @@ -12,6 +12,7 @@ state_processing = { path = "../state_processing" } proto_array = { path = "../proto_array" } eth2_ssz = "0.4.1" eth2_ssz_derive = "0.3.0" +slog = { version = "2.5.2", features = ["max_level_trace", "release_max_level_trace"] } [dev-dependencies] beacon_chain = { path = "../../beacon_node/beacon_chain" } diff --git a/consensus/fork_choice/src/fork_choice.rs b/consensus/fork_choice/src/fork_choice.rs index 3341fc5c22..f55a283ed1 100644 --- a/consensus/fork_choice/src/fork_choice.rs +++ b/consensus/fork_choice/src/fork_choice.rs @@ -1,5 +1,6 @@ use crate::{ForkChoiceStore, InvalidationOperation}; use proto_array::{Block as ProtoBlock, ExecutionStatus, ProtoArrayForkChoice}; +use slog::{crit, debug, warn, Logger}; use ssz_derive::{Decode, Encode}; use state_processing::{ per_block_processing::errors::AttesterSlashingValidationError, per_epoch_processing, @@ -79,6 +80,26 @@ impl From for Error { } } +#[derive(Debug, Clone, Copy)] +/// Controls how fork choice should behave when restoring from a persisted fork choice. +pub enum ResetPayloadStatuses { + /// Reset all payload statuses back to "optimistic". + Always, + /// Only reset all payload statuses back to "optimistic" when an "invalid" block is present. + OnlyWithInvalidPayload, +} + +impl ResetPayloadStatuses { + /// When `should_always_reset == True`, return `ResetPayloadStatuses::Always`. + pub fn always_reset_conditionally(should_always_reset: bool) -> Self { + if should_always_reset { + ResetPayloadStatuses::Always + } else { + ResetPayloadStatuses::OnlyWithInvalidPayload + } + } +} + #[derive(Debug)] pub enum InvalidBlock { UnknownParent(Hash256), @@ -1425,15 +1446,68 @@ where .map_err(Into::into) } + /// Instantiate `Self` from some `PersistedForkChoice` generated by a earlier call to + /// `Self::to_persisted`. + pub fn proto_array_from_persisted( + persisted: &PersistedForkChoice, + reset_payload_statuses: ResetPayloadStatuses, + spec: &ChainSpec, + log: &Logger, + ) -> Result> { + let mut proto_array = ProtoArrayForkChoice::from_bytes(&persisted.proto_array_bytes) + .map_err(Error::InvalidProtoArrayBytes)?; + let contains_invalid_payloads = proto_array.contains_invalid_payloads(); + + debug!( + log, + "Restoring fork choice from persisted"; + "reset_payload_statuses" => ?reset_payload_statuses, + "contains_invalid_payloads" => contains_invalid_payloads, + ); + + // Exit early if there are no "invalid" payloads, if requested. + if matches!( + reset_payload_statuses, + ResetPayloadStatuses::OnlyWithInvalidPayload + ) && !contains_invalid_payloads + { + return Ok(proto_array); + } + + // Reset all blocks back to being "optimistic". This helps recover from an EL consensus + // fault where an invalid payload becomes valid. + if let Err(e) = proto_array.set_all_blocks_to_optimistic::(spec) { + // If there is an error resetting the optimistic status then log loudly and revert + // back to a proto-array which does not have the reset applied. This indicates a + // significant error in Lighthouse and warrants detailed investigation. + crit!( + log, + "Failed to reset payload statuses"; + "error" => e, + "info" => "please report this error", + ); + ProtoArrayForkChoice::from_bytes(&persisted.proto_array_bytes) + .map_err(Error::InvalidProtoArrayBytes) + } else { + debug!( + log, + "Successfully reset all payload statuses"; + ); + Ok(proto_array) + } + } + /// Instantiate `Self` from some `PersistedForkChoice` generated by a earlier call to /// `Self::to_persisted`. pub fn from_persisted( persisted: PersistedForkChoice, + reset_payload_statuses: ResetPayloadStatuses, fc_store: T, spec: &ChainSpec, + log: &Logger, ) -> Result> { - let proto_array = ProtoArrayForkChoice::from_bytes(&persisted.proto_array_bytes) - .map_err(Error::InvalidProtoArrayBytes)?; + let proto_array = + Self::proto_array_from_persisted(&persisted, reset_payload_statuses, spec, log)?; let current_slot = fc_store.get_current_slot(); @@ -1456,7 +1530,16 @@ where // If a call to `get_head` fails, the only known cause is because the only head with viable // FFG properties is has an invalid payload. In this scenario, set all the payloads back to // an optimistic status so that we can have a head to start from. - if fork_choice.get_head(current_slot, spec).is_err() { + if let Err(e) = fork_choice.get_head(current_slot, spec) { + warn!( + log, + "Could not find head on persisted FC"; + "info" => "resetting all payload statuses and retrying", + "error" => ?e + ); + // Although we may have already made this call whilst loading `proto_array`, try it + // again since we may have mutated the `proto_array` during `get_head` and therefore may + // get a different result. fork_choice .proto_array .set_all_blocks_to_optimistic::(spec)?; diff --git a/consensus/fork_choice/src/fork_choice_store.rs b/consensus/fork_choice/src/fork_choice_store.rs index 6a4616e9f3..9604e25475 100644 --- a/consensus/fork_choice/src/fork_choice_store.rs +++ b/consensus/fork_choice/src/fork_choice_store.rs @@ -1,4 +1,5 @@ use std::collections::BTreeSet; +use std::fmt::Debug; use types::{BeaconBlockRef, BeaconState, Checkpoint, EthSpec, ExecPayload, Hash256, Slot}; /// Approximates the `Store` in "Ethereum 2.0 Phase 0 -- Beacon Chain Fork Choice": @@ -18,7 +19,7 @@ use types::{BeaconBlockRef, BeaconState, Checkpoint, EthSpec, ExecPayload, Hash2 /// concrete struct is to allow this crate to be free from "impure" on-disk database logic, /// hopefully making auditing easier. pub trait ForkChoiceStore: Sized { - type Error; + type Error: Debug; /// Returns the last value passed to `Self::set_current_slot`. fn get_current_slot(&self) -> Slot; diff --git a/consensus/fork_choice/src/lib.rs b/consensus/fork_choice/src/lib.rs index 6cb2010f1a..397a2ff893 100644 --- a/consensus/fork_choice/src/lib.rs +++ b/consensus/fork_choice/src/lib.rs @@ -4,7 +4,7 @@ mod fork_choice_store; pub use crate::fork_choice::{ AttestationFromBlock, CountUnrealized, Error, ForkChoice, ForkChoiceView, ForkchoiceUpdateParameters, InvalidAttestation, InvalidBlock, PayloadVerificationStatus, - PersistedForkChoice, QueuedAttestation, + PersistedForkChoice, QueuedAttestation, ResetPayloadStatuses, }; pub use fork_choice_store::ForkChoiceStore; pub use proto_array::{Block as ProtoBlock, ExecutionStatus, InvalidationOperation}; diff --git a/consensus/proto_array/src/proto_array_fork_choice.rs b/consensus/proto_array/src/proto_array_fork_choice.rs index 9902ccb1cc..cc3f92d46e 100644 --- a/consensus/proto_array/src/proto_array_fork_choice.rs +++ b/consensus/proto_array/src/proto_array_fork_choice.rs @@ -317,6 +317,17 @@ impl ProtoArrayForkChoice { .map_err(|e| format!("find_head failed: {:?}", e)) } + /// Returns `true` if there are any blocks in `self` with an `INVALID` execution payload status. + /// + /// This will operate on *all* blocks, even those that do not descend from the finalized + /// ancestor. + pub fn contains_invalid_payloads(&mut self) -> bool { + self.proto_array + .nodes + .iter() + .any(|node| node.execution_status.is_invalid()) + } + /// For all nodes, regardless of their relationship to the finalized block, set their execution /// status to be optimistic. /// diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 14934a5669..4e110b85a1 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -178,6 +178,21 @@ fn count_unrealized_true() { .with_config(|config| assert!(config.chain.count_unrealized)); } +#[test] +fn reset_payload_statuses_default() { + CommandLineTest::new() + .run_with_zero_port() + .with_config(|config| assert!(!config.chain.always_reset_payload_statuses)); +} + +#[test] +fn reset_payload_statuses_present() { + CommandLineTest::new() + .flag("reset-payload-statuses", None) + .run_with_zero_port() + .with_config(|config| assert!(config.chain.always_reset_payload_statuses)); +} + #[test] fn freezer_dir_flag() { let dir = TempDir::new().expect("Unable to create temporary directory");