Don't return errors when fork choice fails (#3370)

## Issue Addressed

NA

## Proposed Changes

There are scenarios where the only viable head will have an invalid execution payload, in this scenario the `get_head` function on `proto_array` will return an error. We must recover from this scenario by importing blocks from the network.

This PR stops `BeaconChain::recompute_head` from returning an error so that we can't accidentally start down-scoring peers or aborting block import just because the current head has an invalid payload.

## Reviewer Notes

The following changes are included:

1. Allow `fork_choice.get_head` to fail gracefully in `BeaconChain::process_block` when trying to update the `early_attester_cache`; simply don't add the block to the cache rather than aborting the entire process.
1. Don't return an error from `BeaconChain::recompute_head_at_current_slot` and `BeaconChain::recompute_head` to defensively prevent calling functions from aborting any process just because the fork choice function failed to run.
    - This should have practically no effect, since most callers were still continuing if recomputing the head failed.
    - The outlier is that the API will return 200 rather than a 500 when fork choice fails.
1. Add the `ProtoArrayForkChoice::set_all_blocks_to_optimistic` function to recover from the scenario where we've rebooted and the persisted fork choice has an invalid head.
This commit is contained in:
Paul Hauner
2022-07-28 13:57:09 +00:00
parent d04fde3ba9
commit 25f0e261cb
16 changed files with 466 additions and 147 deletions

View File

@@ -160,11 +160,7 @@ async fn chain_segment_full_segment() {
.into_block_error()
.expect("should import chain segment");
harness
.chain
.recompute_head_at_current_slot()
.await
.expect("should run fork choice");
harness.chain.recompute_head_at_current_slot().await;
assert_eq!(
harness.head_block_root(),
@@ -194,11 +190,7 @@ async fn chain_segment_varying_chunk_size() {
.unwrap_or_else(|_| panic!("should import chain segment of len {}", chunk_size));
}
harness
.chain
.recompute_head_at_current_slot()
.await
.expect("should run fork choice");
harness.chain.recompute_head_at_current_slot().await;
assert_eq!(
harness.head_block_root(),
@@ -729,11 +721,7 @@ async fn block_gossip_verification() {
}
// Recompute the head to ensure we cache the latest view of fork choice.
harness
.chain
.recompute_head_at_current_slot()
.await
.unwrap();
harness.chain.recompute_head_at_current_slot().await;
/*
* This test ensures that:

View File

@@ -1,6 +1,7 @@
#![cfg(not(debug_assertions))]
use beacon_chain::{
canonical_head::{CachedHead, CanonicalHead},
test_utils::{BeaconChainHarness, EphemeralHarnessType},
BeaconChainError, BlockError, ExecutionPayloadError, StateSkipConfig, WhenSlotSkipped,
INVALID_JUSTIFIED_PAYLOAD_SHUTDOWN_REASON,
@@ -14,6 +15,7 @@ use fork_choice::{
};
use proto_array::{Error as ProtoArrayError, ExecutionStatus};
use slot_clock::SlotClock;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use task_executor::ShutdownReason;
@@ -95,11 +97,15 @@ impl InvalidPayloadRig {
}
async fn recompute_head(&self) {
self.harness
.chain
.recompute_head_at_current_slot()
.await
.unwrap();
self.harness.chain.recompute_head_at_current_slot().await;
}
fn cached_head(&self) -> CachedHead<E> {
self.harness.chain.canonical_head.cached_head()
}
fn canonical_head(&self) -> &CanonicalHead<EphemeralHarnessType<E>> {
&self.harness.chain.canonical_head
}
fn previous_forkchoice_update_params(&self) -> (ForkChoiceState, PayloadAttributes) {
@@ -354,6 +360,19 @@ impl InvalidPayloadRig {
.await
.unwrap();
}
fn assert_get_head_error_contains(&self, s: &str) {
match self
.harness
.chain
.canonical_head
.fork_choice_write_lock()
.get_head(self.harness.chain.slot().unwrap(), &self.harness.chain.spec)
{
Err(ForkChoiceError::ProtoArrayError(e)) if e.contains(s) => (),
other => panic!("expected {} error, got {:?}", s, other),
};
}
}
/// Simple test of the different import types.
@@ -1183,3 +1202,235 @@ async fn attesting_to_optimistic_head() {
get_aggregated().unwrap();
get_aggregated_by_slot_and_root().unwrap();
}
/// Helper for running tests where we generate a chain with an invalid head and then some
/// `fork_blocks` to recover it.
struct InvalidHeadSetup {
rig: InvalidPayloadRig,
fork_blocks: Vec<Arc<SignedBeaconBlock<E>>>,
invalid_head: CachedHead<E>,
}
impl InvalidHeadSetup {
async fn new() -> InvalidHeadSetup {
let mut rig = InvalidPayloadRig::new().enable_attestations();
rig.move_to_terminal_block();
rig.import_block(Payload::Valid).await; // Import a valid transition block.
// Import blocks until the first time the chain finalizes.
while rig.cached_head().finalized_checkpoint().epoch == 0 {
rig.import_block(Payload::Syncing).await;
}
let invalid_head = rig.cached_head();
// Invalidate the head block.
rig.invalidate_manually(invalid_head.head_block_root())
.await;
assert!(rig
.canonical_head()
.head_execution_status()
.unwrap()
.is_invalid());
// Finding a new head should fail since the only possible head is not valid.
rig.assert_get_head_error_contains("InvalidBestNode");
// Build three "fork" blocks that conflict with the current canonical head. Don't apply them to
// the chain yet.
let mut fork_blocks = vec![];
let mut parent_state = rig
.harness
.chain
.state_at_slot(
invalid_head.head_slot() - 3,
StateSkipConfig::WithStateRoots,
)
.unwrap();
for _ in 0..3 {
let slot = parent_state.slot() + 1;
let (fork_block, post_state) = rig.harness.make_block(parent_state, slot).await;
parent_state = post_state;
fork_blocks.push(Arc::new(fork_block))
}
Self {
rig,
fork_blocks,
invalid_head,
}
}
}
#[tokio::test]
async fn recover_from_invalid_head_by_importing_blocks() {
let InvalidHeadSetup {
rig,
fork_blocks,
invalid_head,
} = InvalidHeadSetup::new().await;
// Import the first two blocks, they should not become the head.
for i in 0..2 {
if i == 0 {
// The first block should be `VALID` during import.
rig.harness
.mock_execution_layer
.as_ref()
.unwrap()
.server
.all_payloads_valid_on_new_payload();
} else {
// All blocks after the first block should return `SYNCING`.
rig.harness
.mock_execution_layer
.as_ref()
.unwrap()
.server
.all_payloads_syncing_on_new_payload(true);
}
rig.harness
.chain
.process_block(fork_blocks[i].clone(), CountUnrealized::True)
.await
.unwrap();
rig.recompute_head().await;
rig.assert_get_head_error_contains("InvalidBestNode");
let new_head = rig.cached_head();
assert_eq!(
new_head.head_block_root(),
invalid_head.head_block_root(),
"the head should not change"
);
}
// Import the third block, it should become the head.
rig.harness
.chain
.process_block(fork_blocks[2].clone(), CountUnrealized::True)
.await
.unwrap();
rig.recompute_head().await;
let new_head = rig.cached_head();
assert_eq!(
new_head.head_block_root(),
fork_blocks[2].canonical_root(),
"the third block should become the head"
);
let manual_get_head = rig
.harness
.chain
.canonical_head
.fork_choice_write_lock()
.get_head(rig.harness.chain.slot().unwrap(), &rig.harness.chain.spec)
.unwrap();
assert_eq!(manual_get_head, new_head.head_block_root(),);
}
#[tokio::test]
async fn recover_from_invalid_head_after_persist_and_reboot() {
let InvalidHeadSetup {
rig,
fork_blocks: _,
invalid_head,
} = InvalidHeadSetup::new().await;
// Forcefully persist the head and fork choice.
rig.harness.chain.persist_head_and_fork_choice().unwrap();
let resumed = BeaconChainHarness::builder(MainnetEthSpec)
.default_spec()
.deterministic_keypairs(VALIDATOR_COUNT)
.resumed_ephemeral_store(rig.harness.chain.store.clone())
.mock_execution_layer()
.build();
// Forget the original rig so we don't accidentally use it again.
drop(rig);
let resumed_head = resumed.chain.canonical_head.cached_head();
assert_eq!(
resumed_head.head_block_root(),
invalid_head.head_block_root(),
"the resumed harness should have the invalid block as the head"
);
assert!(
resumed
.chain
.canonical_head
.fork_choice_read_lock()
.is_optimistic_block(&resumed_head.head_block_root())
.unwrap(),
"the invalid block should have become optimistic"
);
}
#[tokio::test]
async fn weights_after_resetting_optimistic_status() {
let mut rig = InvalidPayloadRig::new().enable_attestations();
rig.move_to_terminal_block();
rig.import_block(Payload::Valid).await; // Import a valid transition block.
let mut roots = vec![];
for _ in 0..4 {
roots.push(rig.import_block(Payload::Syncing).await);
}
rig.recompute_head().await;
let head = rig.cached_head();
let original_weights = rig
.harness
.chain
.canonical_head
.fork_choice_read_lock()
.proto_array()
.iter_nodes(&head.head_block_root())
.map(|node| (node.root, node.weight))
.collect::<HashMap<_, _>>();
rig.invalidate_manually(roots[1]).await;
rig.harness
.chain
.canonical_head
.fork_choice_write_lock()
.proto_array_mut()
.set_all_blocks_to_optimistic::<E>(&rig.harness.chain.spec)
.unwrap();
let new_weights = rig
.harness
.chain
.canonical_head
.fork_choice_read_lock()
.proto_array()
.iter_nodes(&head.head_block_root())
.map(|node| (node.root, node.weight))
.collect::<HashMap<_, _>>();
assert_eq!(original_weights, new_weights);
// Advance the current slot and run fork choice to remove proposer boost.
rig.harness
.set_current_slot(rig.harness.chain.slot().unwrap() + 1);
rig.recompute_head().await;
assert_eq!(
rig.harness
.chain
.canonical_head
.fork_choice_read_lock()
.get_block_weight(&head.head_block_root())
.unwrap(),
head.snapshot.beacon_state.validators()[0].effective_balance,
"proposer boost should be removed from the head block and the vote of a single validator applied"
);
// Import a length of chain to ensure the chain can be built atop.
for _ in 0..E::slots_per_epoch() * 4 {
rig.import_block(Payload::Valid).await;
}
}

View File

@@ -2128,7 +2128,7 @@ async fn weak_subjectivity_sync() {
.process_block(Arc::new(full_block), CountUnrealized::True)
.await
.unwrap();
beacon_chain.recompute_head_at_current_slot().await.unwrap();
beacon_chain.recompute_head_at_current_slot().await;
// Check that the new block's state can be loaded correctly.
let state_root = block.state_root();
@@ -2460,11 +2460,7 @@ async fn revert_minority_fork_on_resume() {
.build();
// Head should now be just before the fork.
resumed_harness
.chain
.recompute_head_at_current_slot()
.await
.unwrap();
resumed_harness.chain.recompute_head_at_current_slot().await;
assert_eq!(resumed_harness.head_slot(), fork_slot - 1);
// Head track should know the canonical head and the rogue head.
@@ -2482,11 +2478,7 @@ async fn revert_minority_fork_on_resume() {
.unwrap();
// The canonical head should be the block from the majority chain.
resumed_harness
.chain
.recompute_head_at_current_slot()
.await
.unwrap();
resumed_harness.chain.recompute_head_at_current_slot().await;
assert_eq!(resumed_harness.head_slot(), block.slot());
assert_eq!(resumed_harness.head_block_root(), block.canonical_root());
}

View File

@@ -693,11 +693,7 @@ async fn run_skip_slot_test(skip_slots: u64) {
harness_a.chain.head_snapshot().beacon_block_root
);
harness_b
.chain
.recompute_head_at_current_slot()
.await
.expect("should run fork choice");
harness_b.chain.recompute_head_at_current_slot().await;
assert_eq!(
harness_b.chain.head_snapshot().beacon_block.slot(),