Delete bogus InvalidBestNode error (#9364)

On Glamsterdam devnets we started seeing Lighthouse nodes unable to start with errors like:

> May 26 04:34:01.582 CRIT  Failed to start beacon node                   reason: "Unable to load fork choice from disk: ForkChoiceError(ProtoArrayStringError(\"find_head failed: InvalidBestNode(InvalidBestNodeInfo { current_slot: Slot(23550), start_root: 0x2c70b1641c29ec46360c99f9a8512f077862cbbc603e16f4a423007d210b0c5f, justified_checkpoint: Checkpoint { epoch: Epoch(712), root: 0x2c70b1641c29ec46360c99f9a8512f077862cbbc603e16f4a423007d210b0c5f }, finalized_checkpoint: Checkpoint { epoch: Epoch(710), root: 0xede5e0b09b51bdb5445ade3398e685bd193b845e0b0ffb827f0c3fec8277ea51 }, head_root: 0x2c70b1641c29ec46360c99f9a8512f077862cbbc603e16f4a423007d210b0c5f, head_justified_checkpoint: Checkpoint { epoch: Epoch(710), root: 0xede5e0b09b51bdb5445ade3398e685bd193b845e0b0ffb827f0c3fec8277ea51 }, head_finalized_checkpoint: Checkpoint { epoch: Epoch(709), root: 0xbb243eff616ff362c52b83113e7c536d0a68cb9ca3d6a1cb1055e732219d9736 } })\"))"

This error was the result of an overly-strict sanity check, based on assumptions that are not true under extreme network conditions.


  Completely remove the `InvalidBestNode` failure path: it is not compliant with the spec, and is actively harmful when triggered (it prevents Lighthouse from starting at all). The error was reachable in any situation where all leaf nodes of fork choice were ineligible to be the head. The payload invalidation tests show some examples of cases where this would happen, and the [newly-added regression test](9a5df1d982) shows a contrived case where it can happen on a Gloas network without _any_ slashings or invalid blocks. There are probably many more cases where it can happen.

We do not lose anything by removing it. The spec's implementation of `get_head` _always_ returns something (unless it crashes), and in these cases it is correct to return the starting node of the traversal: the justified checkpoint block. This is what we now do, and what the new test verifies.

I've also added some facilities to the harness for injecting attestations with fixed `payload_present` fields. @hopinheimer found himself needing something similar when messing with reorg tests, so I think these are probably useful. It might be possible to do without them by juggling the payload reveal timing in just the right way, but I think this approach is just way simpler.


Co-Authored-By: Michael Sproul <michael@sigmaprime.io>
This commit is contained in:
Michael Sproul
2026-06-01 10:46:58 +10:00
committed by GitHub
parent 8396dc87d0
commit 74a5609ab1
6 changed files with 200 additions and 83 deletions

View File

@@ -1636,6 +1636,7 @@ async fn attestation_verification_use_head_state_fork() {
MakeAttestationOptions {
fork: capella_fork,
limit: None,
payload_present_override: None,
},
)
.0
@@ -1667,6 +1668,7 @@ async fn attestation_verification_use_head_state_fork() {
MakeAttestationOptions {
fork: bellatrix_fork,
limit: None,
payload_present_override: None,
},
)
.0
@@ -1741,6 +1743,7 @@ async fn aggregated_attestation_verification_use_head_state_fork() {
MakeAttestationOptions {
fork: capella_fork,
limit: None,
payload_present_override: None,
},
)
.0
@@ -1768,6 +1771,7 @@ async fn aggregated_attestation_verification_use_head_state_fork() {
MakeAttestationOptions {
fork: bellatrix_fork,
limit: None,
payload_present_override: None,
},
)
.0

View File

@@ -8,7 +8,8 @@ use beacon_chain::{
WhenSlotSkipped,
custody_context::NodeCustodyType,
test_utils::{
AttestationStrategy, BeaconChainHarness, BlockStrategy, EphemeralHarnessType, test_spec,
AttestationStrategy, BeaconChainHarness, BlockStrategy, EphemeralHarnessType,
MakeAttestationOptions, test_spec,
},
};
use beacon_chain::{
@@ -17,6 +18,7 @@ use beacon_chain::{
};
use bls::{AggregateSignature, Keypair, Signature};
use fixed_bytes::FixedBytesExtended;
use fork_choice::PayloadStatus;
use logging::create_test_tracing_subscriber;
use slasher::{Config as SlasherConfig, Slasher};
use state_processing::{
@@ -1909,6 +1911,153 @@ async fn add_altair_block_to_base_chain() {
));
}
// This is a regression test for the bogus `InvalidBestNode` error which was reachable in Gloas
// networks. Previously Lighthouse would return an `InvalidBestNode` error from `get_head` in
// contradiction to the spec, which states that the justified root should be returned when no leaf
// node is viable.
//
// The chain construction in this test is contrived but not impossible: the justified block's full
// branch is what contained the evidence to justify it, but the empty branch is more weighty and
// wins out.
#[tokio::test]
async fn gloas_get_head_can_return_justified_empty_payload_branch() {
let spec = test_spec::<E>();
if !spec.fork_name_at_epoch(Epoch::new(0)).gloas_enabled() {
return;
}
let harness = BeaconChainHarness::builder(MainnetEthSpec)
.spec(spec.clone().into())
.chain_config(ChainConfig {
archive: true,
..ChainConfig::default()
})
.keypairs(KEYPAIRS[0..VALIDATOR_COUNT].to_vec())
.node_custody_type(NodeCustodyType::Supernode)
.fresh_ephemeral_store()
.mock_execution_layer()
.build();
harness
.extend_slots(E::slots_per_epoch() as usize * 3)
.await;
let justified_checkpoint = harness.justified_checkpoint();
assert_ne!(justified_checkpoint.epoch, Epoch::new(0));
let justified_root = justified_checkpoint.root;
let justified_block = harness
.chain
.get_blinded_block(&justified_root)
.unwrap()
.unwrap();
let justified_slot = justified_block.message().slot();
let justified_state_root = justified_block.message().state_root();
harness.advance_slot();
harness
.extend_chain(
E::slots_per_epoch() as usize * 2,
BlockStrategy::OnCanonicalHead,
AttestationStrategy::SomeValidators(vec![]),
)
.await;
let current_slot = harness.get_current_slot();
let current_epoch = current_slot.epoch(E::slots_per_epoch());
assert_eq!(
harness
.chain
.canonical_head
.cached_head()
.head_payload_status(),
PayloadStatus::Full
);
{
let fork_choice = harness.chain.canonical_head.fork_choice_read_lock();
assert!(fork_choice.is_payload_received(&justified_root));
let justified_node = fork_choice.get_block(&justified_root).unwrap();
let voting_source = justified_node
.unrealized_justified_checkpoint
.unwrap_or(justified_node.justified_checkpoint);
assert!(
voting_source.epoch + 2 < current_epoch,
"the justified node's own voting source must be stale"
);
}
let mut attestation_state = harness
.chain
.get_state(&justified_state_root, Some(justified_slot), true)
.unwrap()
.unwrap();
assert!(
attestation_state
.validators()
.iter()
.all(|validator| !validator.slashed),
"reproducer must not rely on slashed validators"
);
let all_validators = harness.get_all_validators();
let mut validators_with_empty_vote = [false; VALIDATOR_COUNT];
let attestation_start_slot = (current_epoch - 1).start_slot(E::slots_per_epoch());
let attestation_slot = current_slot - 1;
assert_eq!(
attestation_start_slot + E::slots_per_epoch() - 1,
attestation_slot
);
// Create two epochs worth of attestations with `payload_present=false`, all pointing at the
// justified block. This ensures it's very much the canonical head, instead of the justifying
// chain built off its `Full` branch.
for slot in (attestation_start_slot.as_u64()..current_slot.as_u64()).map(Slot::new) {
while attestation_state.slot() < slot {
per_slot_processing(&mut attestation_state, None, &spec).unwrap();
}
attestation_state.build_caches(&spec).unwrap();
let attestation_state_root = attestation_state.update_tree_hash_cache().unwrap();
assert_eq!(
attestation_state.get_latest_block_root(attestation_state_root),
justified_root
);
let fork = spec.fork_at_epoch(slot.epoch(E::slots_per_epoch()));
let (attestations, attesters) = harness.make_attestations_with_opts(
&all_validators,
&attestation_state,
attestation_state_root,
justified_root.into(),
slot,
MakeAttestationOptions {
limit: None,
fork,
payload_present_override: Some(false),
},
);
for validator_index in attesters {
validators_with_empty_vote[validator_index] = true;
}
harness.process_attestations(attestations, &attestation_state);
}
assert!(
validators_with_empty_vote.iter().all(|attested| *attested),
"all validators should have a latest regular attestation to the justified root"
);
let (head_root, payload_status) = harness
.chain
.canonical_head
.fork_choice_write_lock()
.get_head(current_slot, &spec)
.expect("fork choice should return the justified root on the empty payload branch");
assert_eq!(head_root, justified_root);
assert_eq!(payload_status, PayloadStatus::Empty);
}
// This is a regression test for this bug:
// https://github.com/sigp/lighthouse/issues/4332#issuecomment-1565092279
#[tokio::test]

View File

@@ -6,7 +6,7 @@ use beacon_chain::{
BeaconChainError, BlockError, ChainConfig, ExecutionPayloadError,
INVALID_JUSTIFIED_PAYLOAD_SHUTDOWN_REASON, NotifyExecutionLayer, StateSkipConfig,
WhenSlotSkipped,
canonical_head::{CachedHead, CanonicalHead},
canonical_head::CachedHead,
test_utils::{BeaconChainHarness, EphemeralHarnessType, fork_name_from_env, test_spec},
};
use execution_layer::{
@@ -108,10 +108,6 @@ impl InvalidPayloadRig {
self.harness.chain.canonical_head.cached_head()
}
fn canonical_head(&self) -> &CanonicalHead<EphemeralHarnessType<E>> {
&self.harness.chain.canonical_head
}
fn previous_forkchoice_update_params(&self) -> (ForkchoiceState, PayloadAttributes) {
let mock_execution_layer = self.harness.mock_execution_layer.as_ref().unwrap();
let json = mock_execution_layer
@@ -353,19 +349,6 @@ impl InvalidPayloadRig {
.await
.unwrap();
}
fn assert_get_head_error_contains(&self, s: &str) {
match self
.harness
.chain
.canonical_head
.fork_choice_write_lock()
.get_head(self.harness.chain.slot().unwrap(), &self.harness.chain.spec)
{
Err(ForkChoiceError::ProtoArrayStringError(e)) if e.contains(s) => (),
other => panic!("expected {} error, got {:?}", s, other),
};
}
}
/// Simple test of the different import types.
@@ -1297,21 +1280,14 @@ impl InvalidHeadSetup {
rig.invalidate_manually(invalid_head.head_block_root())
.await;
// Since our setup ensures that there is only a single, invalid block
// that's viable for head (according to FFG filtering), setting the
// head block as invalid should not result in another head being chosen.
// Rather, it should fail to run fork choice and leave the invalid block as
// the head.
assert!(
rig.canonical_head()
.head_execution_status()
.unwrap()
.is_invalid()
);
// Ensure that we're getting the correct error when trying to find a new
// head.
rig.assert_get_head_error_contains("InvalidBestNode");
// Ensure the justified root is the head. This is the spec-correct choice of head when
// all leaves are ineligible.
let mut fork_choice = rig.harness.chain.canonical_head.fork_choice_write_lock();
let head = fork_choice
.get_head(rig.harness.chain.slot().unwrap(), &rig.harness.chain.spec)
.unwrap();
assert_eq!(head.0, fork_choice.justified_checkpoint().root);
drop(fork_choice);
Self {
rig,