Don't return errors when fork choice fails (#3370)

## Issue Addressed NA ## Proposed Changes There are scenarios where the only viable head will have an invalid execution payload, in this scenario the `get_head` function on `proto_array` will return an error. We must recover from this scenario by importing blocks from the network. This PR stops `BeaconChain::recompute_head` from returning an error so that we can't accidentally start down-scoring peers or aborting block import just because the current head has an invalid payload. ## Reviewer Notes The following changes are included: 1. Allow `fork_choice.get_head` to fail gracefully in `BeaconChain::process_block` when trying to update the `early_attester_cache`; simply don't add the block to the cache rather than aborting the entire process. 1. Don't return an error from `BeaconChain::recompute_head_at_current_slot` and `BeaconChain::recompute_head` to defensively prevent calling functions from aborting any process just because the fork choice function failed to run. - This should have practically no effect, since most callers were still continuing if recomputing the head failed. - The outlier is that the API will return 200 rather than a 500 when fork choice fails. 1. Add the `ProtoArrayForkChoice::set_all_blocks_to_optimistic` function to recover from the scenario where we've rebooted and the persisted fork choice has an invalid head.
2026-06-01 13:47:16 +00:00 · 2022-07-28 13:57:09 +00:00
parent d04fde3ba9
commit 25f0e261cb
16 changed files with 466 additions and 147 deletions
--- a/beacon_node/beacon_chain/tests/block_verification.rs
+++ b/beacon_node/beacon_chain/tests/block_verification.rs
@@ -160,11 +160,7 @@ async fn chain_segment_full_segment() {
        .into_block_error()
        .expect("should import chain segment");

-    harness
-        .chain
-        .recompute_head_at_current_slot()
-        .await
-        .expect("should run fork choice");
+    harness.chain.recompute_head_at_current_slot().await;

    assert_eq!(
        harness.head_block_root(),
@@ -194,11 +190,7 @@ async fn chain_segment_varying_chunk_size() {
                .unwrap_or_else(|_| panic!("should import chain segment of len {}", chunk_size));
        }

-        harness
-            .chain
-            .recompute_head_at_current_slot()
-            .await
-            .expect("should run fork choice");
+        harness.chain.recompute_head_at_current_slot().await;

        assert_eq!(
            harness.head_block_root(),
@@ -729,11 +721,7 @@ async fn block_gossip_verification() {
    }

    // Recompute the head to ensure we cache the latest view of fork choice.
-    harness
-        .chain
-        .recompute_head_at_current_slot()
-        .await
-        .unwrap();
+    harness.chain.recompute_head_at_current_slot().await;

    /*
     * This test ensures that:
--- a/beacon_node/beacon_chain/tests/payload_invalidation.rs
+++ b/beacon_node/beacon_chain/tests/payload_invalidation.rs
@@ -1,6 +1,7 @@
 #![cfg(not(debug_assertions))]

 use beacon_chain::{
+    canonical_head::{CachedHead, CanonicalHead},
    test_utils::{BeaconChainHarness, EphemeralHarnessType},
    BeaconChainError, BlockError, ExecutionPayloadError, StateSkipConfig, WhenSlotSkipped,
    INVALID_JUSTIFIED_PAYLOAD_SHUTDOWN_REASON,
@@ -14,6 +15,7 @@ use fork_choice::{
 };
 use proto_array::{Error as ProtoArrayError, ExecutionStatus};
 use slot_clock::SlotClock;
+use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Duration;
 use task_executor::ShutdownReason;
@@ -95,11 +97,15 @@ impl InvalidPayloadRig {
    }

    async fn recompute_head(&self) {
-        self.harness
-            .chain
-            .recompute_head_at_current_slot()
-            .await
-            .unwrap();
+        self.harness.chain.recompute_head_at_current_slot().await;
+    }
+
+    fn cached_head(&self) -> CachedHead<E> {
+        self.harness.chain.canonical_head.cached_head()
+    }
+
+    fn canonical_head(&self) -> &CanonicalHead<EphemeralHarnessType<E>> {
+        &self.harness.chain.canonical_head
    }

    fn previous_forkchoice_update_params(&self) -> (ForkChoiceState, PayloadAttributes) {
@@ -354,6 +360,19 @@ impl InvalidPayloadRig {
            .await
            .unwrap();
    }
+
+    fn assert_get_head_error_contains(&self, s: &str) {
+        match self
+            .harness
+            .chain
+            .canonical_head
+            .fork_choice_write_lock()
+            .get_head(self.harness.chain.slot().unwrap(), &self.harness.chain.spec)
+        {
+            Err(ForkChoiceError::ProtoArrayError(e)) if e.contains(s) => (),
+            other => panic!("expected {} error, got {:?}", s, other),
+        };
+    }
 }

 /// Simple test of the different import types.
@@ -1183,3 +1202,235 @@ async fn attesting_to_optimistic_head() {
    get_aggregated().unwrap();
    get_aggregated_by_slot_and_root().unwrap();
 }
+
+/// Helper for running tests where we generate a chain with an invalid head and then some
+/// `fork_blocks` to recover it.
+struct InvalidHeadSetup {
+    rig: InvalidPayloadRig,
+    fork_blocks: Vec<Arc<SignedBeaconBlock<E>>>,
+    invalid_head: CachedHead<E>,
+}
+
+impl InvalidHeadSetup {
+    async fn new() -> InvalidHeadSetup {
+        let mut rig = InvalidPayloadRig::new().enable_attestations();
+        rig.move_to_terminal_block();
+        rig.import_block(Payload::Valid).await; // Import a valid transition block.
+
+        // Import blocks until the first time the chain finalizes.
+        while rig.cached_head().finalized_checkpoint().epoch == 0 {
+            rig.import_block(Payload::Syncing).await;
+        }
+
+        let invalid_head = rig.cached_head();
+
+        // Invalidate the head block.
+        rig.invalidate_manually(invalid_head.head_block_root())
+            .await;
+        assert!(rig
+            .canonical_head()
+            .head_execution_status()
+            .unwrap()
+            .is_invalid());
+
+        // Finding a new head should fail since the only possible head is not valid.
+        rig.assert_get_head_error_contains("InvalidBestNode");
+
+        // Build three "fork" blocks that conflict with the current canonical head. Don't apply them to
+        // the chain yet.
+        let mut fork_blocks = vec![];
+        let mut parent_state = rig
+            .harness
+            .chain
+            .state_at_slot(
+                invalid_head.head_slot() - 3,
+                StateSkipConfig::WithStateRoots,
+            )
+            .unwrap();
+        for _ in 0..3 {
+            let slot = parent_state.slot() + 1;
+            let (fork_block, post_state) = rig.harness.make_block(parent_state, slot).await;
+            parent_state = post_state;
+            fork_blocks.push(Arc::new(fork_block))
+        }
+
+        Self {
+            rig,
+            fork_blocks,
+            invalid_head,
+        }
+    }
+}
+
+#[tokio::test]
+async fn recover_from_invalid_head_by_importing_blocks() {
+    let InvalidHeadSetup {
+        rig,
+        fork_blocks,
+        invalid_head,
+    } = InvalidHeadSetup::new().await;
+
+    // Import the first two blocks, they should not become the head.
+    for i in 0..2 {
+        if i == 0 {
+            // The first block should be `VALID` during import.
+            rig.harness
+                .mock_execution_layer
+                .as_ref()
+                .unwrap()
+                .server
+                .all_payloads_valid_on_new_payload();
+        } else {
+            // All blocks after the first block should return `SYNCING`.
+            rig.harness
+                .mock_execution_layer
+                .as_ref()
+                .unwrap()
+                .server
+                .all_payloads_syncing_on_new_payload(true);
+        }
+
+        rig.harness
+            .chain
+            .process_block(fork_blocks[i].clone(), CountUnrealized::True)
+            .await
+            .unwrap();
+        rig.recompute_head().await;
+        rig.assert_get_head_error_contains("InvalidBestNode");
+        let new_head = rig.cached_head();
+        assert_eq!(
+            new_head.head_block_root(),
+            invalid_head.head_block_root(),
+            "the head should not change"
+        );
+    }
+
+    // Import the third block, it should become the head.
+    rig.harness
+        .chain
+        .process_block(fork_blocks[2].clone(), CountUnrealized::True)
+        .await
+        .unwrap();
+    rig.recompute_head().await;
+    let new_head = rig.cached_head();
+    assert_eq!(
+        new_head.head_block_root(),
+        fork_blocks[2].canonical_root(),
+        "the third block should become the head"
+    );
+
+    let manual_get_head = rig
+        .harness
+        .chain
+        .canonical_head
+        .fork_choice_write_lock()
+        .get_head(rig.harness.chain.slot().unwrap(), &rig.harness.chain.spec)
+        .unwrap();
+    assert_eq!(manual_get_head, new_head.head_block_root(),);
+}
+
+#[tokio::test]
+async fn recover_from_invalid_head_after_persist_and_reboot() {
+    let InvalidHeadSetup {
+        rig,
+        fork_blocks: _,
+        invalid_head,
+    } = InvalidHeadSetup::new().await;
+
+    // Forcefully persist the head and fork choice.
+    rig.harness.chain.persist_head_and_fork_choice().unwrap();
+
+    let resumed = BeaconChainHarness::builder(MainnetEthSpec)
+        .default_spec()
+        .deterministic_keypairs(VALIDATOR_COUNT)
+        .resumed_ephemeral_store(rig.harness.chain.store.clone())
+        .mock_execution_layer()
+        .build();
+
+    // Forget the original rig so we don't accidentally use it again.
+    drop(rig);
+
+    let resumed_head = resumed.chain.canonical_head.cached_head();
+    assert_eq!(
+        resumed_head.head_block_root(),
+        invalid_head.head_block_root(),
+        "the resumed harness should have the invalid block as the head"
+    );
+    assert!(
+        resumed
+            .chain
+            .canonical_head
+            .fork_choice_read_lock()
+            .is_optimistic_block(&resumed_head.head_block_root())
+            .unwrap(),
+        "the invalid block should have become optimistic"
+    );
+}
+
+#[tokio::test]
+async fn weights_after_resetting_optimistic_status() {
+    let mut rig = InvalidPayloadRig::new().enable_attestations();
+    rig.move_to_terminal_block();
+    rig.import_block(Payload::Valid).await; // Import a valid transition block.
+
+    let mut roots = vec![];
+    for _ in 0..4 {
+        roots.push(rig.import_block(Payload::Syncing).await);
+    }
+
+    rig.recompute_head().await;
+    let head = rig.cached_head();
+
+    let original_weights = rig
+        .harness
+        .chain
+        .canonical_head
+        .fork_choice_read_lock()
+        .proto_array()
+        .iter_nodes(&head.head_block_root())
+        .map(|node| (node.root, node.weight))
+        .collect::<HashMap<_, _>>();
+
+    rig.invalidate_manually(roots[1]).await;
+
+    rig.harness
+        .chain
+        .canonical_head
+        .fork_choice_write_lock()
+        .proto_array_mut()
+        .set_all_blocks_to_optimistic::<E>(&rig.harness.chain.spec)
+        .unwrap();
+
+    let new_weights = rig
+        .harness
+        .chain
+        .canonical_head
+        .fork_choice_read_lock()
+        .proto_array()
+        .iter_nodes(&head.head_block_root())
+        .map(|node| (node.root, node.weight))
+        .collect::<HashMap<_, _>>();
+
+    assert_eq!(original_weights, new_weights);
+
+    // Advance the current slot and run fork choice to remove proposer boost.
+    rig.harness
+        .set_current_slot(rig.harness.chain.slot().unwrap() + 1);
+    rig.recompute_head().await;
+
+    assert_eq!(
+        rig.harness
+            .chain
+            .canonical_head
+            .fork_choice_read_lock()
+            .get_block_weight(&head.head_block_root())
+            .unwrap(),
+        head.snapshot.beacon_state.validators()[0].effective_balance,
+        "proposer boost should be removed from the head block and the vote of a single validator applied"
+    );
+
+    // Import a length of chain to ensure the chain can be built atop.
+    for _ in 0..E::slots_per_epoch() * 4 {
+        rig.import_block(Payload::Valid).await;
+    }
+}
--- a/beacon_node/beacon_chain/tests/store_tests.rs
+++ b/beacon_node/beacon_chain/tests/store_tests.rs
@@ -2128,7 +2128,7 @@ async fn weak_subjectivity_sync() {
            .process_block(Arc::new(full_block), CountUnrealized::True)
            .await
            .unwrap();
-        beacon_chain.recompute_head_at_current_slot().await.unwrap();
+        beacon_chain.recompute_head_at_current_slot().await;

        // Check that the new block's state can be loaded correctly.
        let state_root = block.state_root();
@@ -2460,11 +2460,7 @@ async fn revert_minority_fork_on_resume() {
        .build();

    // Head should now be just before the fork.
-    resumed_harness
-        .chain
-        .recompute_head_at_current_slot()
-        .await
-        .unwrap();
+    resumed_harness.chain.recompute_head_at_current_slot().await;
    assert_eq!(resumed_harness.head_slot(), fork_slot - 1);

    // Head track should know the canonical head and the rogue head.
@@ -2482,11 +2478,7 @@ async fn revert_minority_fork_on_resume() {
            .unwrap();

        // The canonical head should be the block from the majority chain.
-        resumed_harness
-            .chain
-            .recompute_head_at_current_slot()
-            .await
-            .unwrap();
+        resumed_harness.chain.recompute_head_at_current_slot().await;
        assert_eq!(resumed_harness.head_slot(), block.slot());
        assert_eq!(resumed_harness.head_block_root(), block.canonical_root());
    }
--- a/beacon_node/beacon_chain/tests/tests.rs
+++ b/beacon_node/beacon_chain/tests/tests.rs
@@ -693,11 +693,7 @@ async fn run_skip_slot_test(skip_slots: u64) {
        harness_a.chain.head_snapshot().beacon_block_root
    );

-    harness_b
-        .chain
-        .recompute_head_at_current_slot()
-        .await
-        .expect("should run fork choice");
+    harness_b.chain.recompute_head_at_current_slot().await;

    assert_eq!(
        harness_b.chain.head_snapshot().beacon_block.slot(),