mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-16 12:28:24 +00:00
Reset payload statuses when resuming fork choice (#3498)
## Issue Addressed
NA
## Proposed Changes
This PR is motivated by a recent consensus failure in Geth where it returned `INVALID` for an `VALID` block. Without this PR, the only way to recover is by re-syncing Lighthouse. Whilst ELs "shouldn't have consensus failures", in reality it's something that we can expect from time to time due to the complex nature of Ethereum. Being able to recover easily will help the network recover and EL devs to troubleshoot.
The risk introduced with this PR is that genuinely INVALID payloads get a "second chance" at being imported. I believe the DoS risk here is negligible since LH needs to be restarted in order to re-process the payload. Furthermore, there's no reason to think that a well-performing EL will accept a truly invalid payload the second-time-around.
## Additional Info
This implementation has the following intricacies:
1. Instead of just resetting *invalid* payloads to optimistic, we'll also reset *valid* payloads. This is an artifact of our existing implementation.
1. We will only reset payload statuses when we detect an invalid payload present in `proto_array`
- This helps save us from forgetting that all our blocks are valid in the "best case scenario" where there are no invalid blocks.
1. If we fail to revert the payload statuses we'll log a `CRIT` and just continue with a `proto_array` that *does not* have reverted payload statuses.
- The code to revert statuses needs to deal with balances and proposer-boost, so it's a failure point. This is a defensive measure to avoid introducing new show-stopping bugs to LH.
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
use crate::{ForkChoiceStore, InvalidationOperation};
|
||||
use proto_array::{Block as ProtoBlock, ExecutionStatus, ProtoArrayForkChoice};
|
||||
use slog::{crit, debug, warn, Logger};
|
||||
use ssz_derive::{Decode, Encode};
|
||||
use state_processing::{
|
||||
per_block_processing::errors::AttesterSlashingValidationError, per_epoch_processing,
|
||||
@@ -79,6 +80,26 @@ impl<T> From<state_processing::EpochProcessingError> for Error<T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
/// Controls how fork choice should behave when restoring from a persisted fork choice.
|
||||
pub enum ResetPayloadStatuses {
|
||||
/// Reset all payload statuses back to "optimistic".
|
||||
Always,
|
||||
/// Only reset all payload statuses back to "optimistic" when an "invalid" block is present.
|
||||
OnlyWithInvalidPayload,
|
||||
}
|
||||
|
||||
impl ResetPayloadStatuses {
|
||||
/// When `should_always_reset == True`, return `ResetPayloadStatuses::Always`.
|
||||
pub fn always_reset_conditionally(should_always_reset: bool) -> Self {
|
||||
if should_always_reset {
|
||||
ResetPayloadStatuses::Always
|
||||
} else {
|
||||
ResetPayloadStatuses::OnlyWithInvalidPayload
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum InvalidBlock {
|
||||
UnknownParent(Hash256),
|
||||
@@ -1425,15 +1446,68 @@ where
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
/// Instantiate `Self` from some `PersistedForkChoice` generated by a earlier call to
|
||||
/// `Self::to_persisted`.
|
||||
pub fn proto_array_from_persisted(
|
||||
persisted: &PersistedForkChoice,
|
||||
reset_payload_statuses: ResetPayloadStatuses,
|
||||
spec: &ChainSpec,
|
||||
log: &Logger,
|
||||
) -> Result<ProtoArrayForkChoice, Error<T::Error>> {
|
||||
let mut proto_array = ProtoArrayForkChoice::from_bytes(&persisted.proto_array_bytes)
|
||||
.map_err(Error::InvalidProtoArrayBytes)?;
|
||||
let contains_invalid_payloads = proto_array.contains_invalid_payloads();
|
||||
|
||||
debug!(
|
||||
log,
|
||||
"Restoring fork choice from persisted";
|
||||
"reset_payload_statuses" => ?reset_payload_statuses,
|
||||
"contains_invalid_payloads" => contains_invalid_payloads,
|
||||
);
|
||||
|
||||
// Exit early if there are no "invalid" payloads, if requested.
|
||||
if matches!(
|
||||
reset_payload_statuses,
|
||||
ResetPayloadStatuses::OnlyWithInvalidPayload
|
||||
) && !contains_invalid_payloads
|
||||
{
|
||||
return Ok(proto_array);
|
||||
}
|
||||
|
||||
// Reset all blocks back to being "optimistic". This helps recover from an EL consensus
|
||||
// fault where an invalid payload becomes valid.
|
||||
if let Err(e) = proto_array.set_all_blocks_to_optimistic::<E>(spec) {
|
||||
// If there is an error resetting the optimistic status then log loudly and revert
|
||||
// back to a proto-array which does not have the reset applied. This indicates a
|
||||
// significant error in Lighthouse and warrants detailed investigation.
|
||||
crit!(
|
||||
log,
|
||||
"Failed to reset payload statuses";
|
||||
"error" => e,
|
||||
"info" => "please report this error",
|
||||
);
|
||||
ProtoArrayForkChoice::from_bytes(&persisted.proto_array_bytes)
|
||||
.map_err(Error::InvalidProtoArrayBytes)
|
||||
} else {
|
||||
debug!(
|
||||
log,
|
||||
"Successfully reset all payload statuses";
|
||||
);
|
||||
Ok(proto_array)
|
||||
}
|
||||
}
|
||||
|
||||
/// Instantiate `Self` from some `PersistedForkChoice` generated by a earlier call to
|
||||
/// `Self::to_persisted`.
|
||||
pub fn from_persisted(
|
||||
persisted: PersistedForkChoice,
|
||||
reset_payload_statuses: ResetPayloadStatuses,
|
||||
fc_store: T,
|
||||
spec: &ChainSpec,
|
||||
log: &Logger,
|
||||
) -> Result<Self, Error<T::Error>> {
|
||||
let proto_array = ProtoArrayForkChoice::from_bytes(&persisted.proto_array_bytes)
|
||||
.map_err(Error::InvalidProtoArrayBytes)?;
|
||||
let proto_array =
|
||||
Self::proto_array_from_persisted(&persisted, reset_payload_statuses, spec, log)?;
|
||||
|
||||
let current_slot = fc_store.get_current_slot();
|
||||
|
||||
@@ -1456,7 +1530,16 @@ where
|
||||
// If a call to `get_head` fails, the only known cause is because the only head with viable
|
||||
// FFG properties is has an invalid payload. In this scenario, set all the payloads back to
|
||||
// an optimistic status so that we can have a head to start from.
|
||||
if fork_choice.get_head(current_slot, spec).is_err() {
|
||||
if let Err(e) = fork_choice.get_head(current_slot, spec) {
|
||||
warn!(
|
||||
log,
|
||||
"Could not find head on persisted FC";
|
||||
"info" => "resetting all payload statuses and retrying",
|
||||
"error" => ?e
|
||||
);
|
||||
// Although we may have already made this call whilst loading `proto_array`, try it
|
||||
// again since we may have mutated the `proto_array` during `get_head` and therefore may
|
||||
// get a different result.
|
||||
fork_choice
|
||||
.proto_array
|
||||
.set_all_blocks_to_optimistic::<E>(spec)?;
|
||||
|
||||
Reference in New Issue
Block a user