From 90dd5bb5d7b83aaf886bc94dcb41b9df43f3736a Mon Sep 17 00:00:00 2001 From: chonghe <44791194+chong-he@users.noreply.github.com> Date: Mon, 1 Dec 2025 13:56:50 +0800 Subject: [PATCH] Refactor get_validator_blocks_v3 fallback (#8186) #7727 introduced a bug in the logging, where as long as the node failed the SSZ `get_validator_blocks_v3` endpoint, it would log as `Beacon node does not support...`. However, the failure can be due to other reasons, such as a timed out error as found by @jimmygchen: `WARN Beacon node does not support SSZ in block production, falling back to JSON slot: 5283379, error: HttpClient(url: https://ho-h-bn-cowl.spesi.io:15052/, kind: timeout, detail: operation timed out` This PR made the error log more generic, so there is less confusion. Additionally, suggested by @michaelsproul, this PR refactors the `get_validator_blocks_v3` calls by trying all beacon nodes using the SSZ endpoint first, and if all beacon node fails the SSZ endpoint, only then fallback to JSON. It changes the logic from: "SSZ -> JSON for primary beacon node, followed by SSZ -> JSON for second beacon node and so on" to "SSZ for all beacon nodes -> JSON for all beacon nodes" This has the advantage that if the primary beacon node is having issues and failed the SSZ, we avoid retrying the primary beacon node again on JSON (as it could be that the primary beacon node fail again); rather, we switch to the second beacon node. Co-Authored-By: Tan Chee Keong Co-Authored-By: chonghe <44791194+chong-he@users.noreply.github.com> --- .../validator_services/src/block_service.rs | 155 ++++++++---------- 1 file changed, 68 insertions(+), 87 deletions(-) diff --git a/validator_client/validator_services/src/block_service.rs b/validator_client/validator_services/src/block_service.rs index 5ffabd22ec..8ec53d3f40 100644 --- a/validator_client/validator_services/src/block_service.rs +++ b/validator_client/validator_services/src/block_service.rs @@ -1,5 +1,4 @@ use beacon_node_fallback::{ApiTopic, BeaconNodeFallback, Error as FallbackError, Errors}; -use bls::SignatureBytes; use eth2::{BeaconNodeHttpClient, StatusCode}; use graffiti_file::{GraffitiFile, determine_graffiti}; use logging::crit; @@ -298,7 +297,7 @@ impl BlockService { self.inner.executor.spawn( async move { let result = service - .publish_block(slot, validator_pubkey, builder_boost_factor) + .get_validator_block_and_publish_block(slot, validator_pubkey, builder_boost_factor) .await; match result { @@ -396,7 +395,7 @@ impl BlockService { skip_all, fields(%slot, ?validator_pubkey) )] - async fn publish_block( + async fn get_validator_block_and_publish_block( self, slot: Slot, validator_pubkey: PublicKeyBytes, @@ -449,33 +448,80 @@ impl BlockService { info!(slot = slot.as_u64(), "Requesting unsigned block"); - // Request block from first responsive beacon node. + // Request an SSZ block from all beacon nodes in order, returning on the first successful response. + // If all nodes fail, run a second pass falling back to JSON. // - // Try the proposer nodes last, since it's likely that they don't have a + // Proposer nodes will always be tried last during each pass since it's likely that they don't have a // great view of attestations on the network. - let unsigned_block = proposer_fallback + let ssz_block_response = proposer_fallback .request_proposers_last(|beacon_node| async move { let _get_timer = validator_metrics::start_timer_vec( &validator_metrics::BLOCK_SERVICE_TIMES, &[validator_metrics::BEACON_BLOCK_HTTP_GET], ); - Self::get_validator_block( - &beacon_node, - slot, - randao_reveal_ref, - graffiti, - proposer_index, - builder_boost_factor, - ) - .await - .map_err(|e| { - BlockError::Recoverable(format!( - "Error from beacon node when producing block: {:?}", - e - )) - }) + beacon_node + .get_validator_blocks_v3_ssz::( + slot, + randao_reveal_ref, + graffiti.as_ref(), + builder_boost_factor, + ) + .await }) - .await?; + .await; + + let block_response = match ssz_block_response { + Ok((ssz_block_response, _metadata)) => ssz_block_response, + Err(e) => { + warn!( + slot = slot.as_u64(), + error = %e, + "SSZ block production failed, falling back to JSON" + ); + + proposer_fallback + .request_proposers_last(|beacon_node| async move { + let _get_timer = validator_metrics::start_timer_vec( + &validator_metrics::BLOCK_SERVICE_TIMES, + &[validator_metrics::BEACON_BLOCK_HTTP_GET], + ); + let (json_block_response, _metadata) = beacon_node + .get_validator_blocks_v3::( + slot, + randao_reveal_ref, + graffiti.as_ref(), + builder_boost_factor, + ) + .await + .map_err(|e| { + BlockError::Recoverable(format!( + "Error from beacon node when producing block: {:?}", + e + )) + })?; + + Ok(json_block_response.data) + }) + .await + .map_err(BlockError::from)? + } + }; + + let (block_proposer, unsigned_block) = match block_response { + eth2::types::ProduceBlockV3Response::Full(block) => { + (block.block().proposer_index(), UnsignedBlock::Full(block)) + } + eth2::types::ProduceBlockV3Response::Blinded(block) => { + (block.proposer_index(), UnsignedBlock::Blinded(block)) + } + }; + + info!(slot = slot.as_u64(), "Received unsigned block"); + if proposer_index != Some(block_proposer) { + return Err(BlockError::Recoverable( + "Proposer index does not match block proposer. Beacon chain re-orged".to_string(), + )); + } self_ref .sign_and_publish_block( @@ -525,71 +571,6 @@ impl BlockService { } Ok::<_, BlockError>(()) } - - #[instrument(skip_all, fields(%slot))] - async fn get_validator_block( - beacon_node: &BeaconNodeHttpClient, - slot: Slot, - randao_reveal_ref: &SignatureBytes, - graffiti: Option, - proposer_index: Option, - builder_boost_factor: Option, - ) -> Result, BlockError> { - let block_response = match beacon_node - .get_validator_blocks_v3_ssz::( - slot, - randao_reveal_ref, - graffiti.as_ref(), - builder_boost_factor, - ) - .await - { - Ok((ssz_block_response, _)) => ssz_block_response, - Err(e) => { - warn!( - slot = slot.as_u64(), - error = %e, - "Beacon node does not support SSZ in block production, falling back to JSON" - ); - - let (json_block_response, _) = beacon_node - .get_validator_blocks_v3::( - slot, - randao_reveal_ref, - graffiti.as_ref(), - builder_boost_factor, - ) - .await - .map_err(|e| { - BlockError::Recoverable(format!( - "Error from beacon node when producing block: {:?}", - e - )) - })?; - - // Extract ProduceBlockV3Response (data field of the struct ForkVersionedResponse) - json_block_response.data - } - }; - - let (block_proposer, unsigned_block) = match block_response { - eth2::types::ProduceBlockV3Response::Full(block) => { - (block.block().proposer_index(), UnsignedBlock::Full(block)) - } - eth2::types::ProduceBlockV3Response::Blinded(block) => { - (block.proposer_index(), UnsignedBlock::Blinded(block)) - } - }; - - info!(slot = slot.as_u64(), "Received unsigned block"); - if proposer_index != Some(block_proposer) { - return Err(BlockError::Recoverable( - "Proposer index does not match block proposer. Beacon chain re-orged".to_string(), - )); - } - - Ok::<_, BlockError>(unsigned_block) - } } /// Wrapper for values we want to log about a block we signed, for easy extraction from the possible