mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-02 16:21:42 +00:00
Refactor get_validator_blocks_v3 fallback (#8186)
#7727 introduced a bug in the logging, where as long as the node failed the SSZ `get_validator_blocks_v3` endpoint, it would log as `Beacon node does not support...`. However, the failure can be due to other reasons, such as a timed out error as found by @jimmygchen: `WARN Beacon node does not support SSZ in block production, falling back to JSON slot: 5283379, error: HttpClient(url: https://ho-h-bn-cowl.spesi.io:15052/, kind: timeout, detail: operation timed out` This PR made the error log more generic, so there is less confusion. Additionally, suggested by @michaelsproul, this PR refactors the `get_validator_blocks_v3` calls by trying all beacon nodes using the SSZ endpoint first, and if all beacon node fails the SSZ endpoint, only then fallback to JSON. It changes the logic from: "SSZ -> JSON for primary beacon node, followed by SSZ -> JSON for second beacon node and so on" to "SSZ for all beacon nodes -> JSON for all beacon nodes" This has the advantage that if the primary beacon node is having issues and failed the SSZ, we avoid retrying the primary beacon node again on JSON (as it could be that the primary beacon node fail again); rather, we switch to the second beacon node. Co-Authored-By: Tan Chee Keong <tanck@sigmaprime.io> Co-Authored-By: chonghe <44791194+chong-he@users.noreply.github.com>
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
use beacon_node_fallback::{ApiTopic, BeaconNodeFallback, Error as FallbackError, Errors};
|
||||
use bls::SignatureBytes;
|
||||
use eth2::{BeaconNodeHttpClient, StatusCode};
|
||||
use graffiti_file::{GraffitiFile, determine_graffiti};
|
||||
use logging::crit;
|
||||
@@ -298,7 +297,7 @@ impl<S: ValidatorStore + 'static, T: SlotClock + 'static> BlockService<S, T> {
|
||||
self.inner.executor.spawn(
|
||||
async move {
|
||||
let result = service
|
||||
.publish_block(slot, validator_pubkey, builder_boost_factor)
|
||||
.get_validator_block_and_publish_block(slot, validator_pubkey, builder_boost_factor)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
@@ -396,7 +395,7 @@ impl<S: ValidatorStore + 'static, T: SlotClock + 'static> BlockService<S, T> {
|
||||
skip_all,
|
||||
fields(%slot, ?validator_pubkey)
|
||||
)]
|
||||
async fn publish_block(
|
||||
async fn get_validator_block_and_publish_block(
|
||||
self,
|
||||
slot: Slot,
|
||||
validator_pubkey: PublicKeyBytes,
|
||||
@@ -449,33 +448,80 @@ impl<S: ValidatorStore + 'static, T: SlotClock + 'static> BlockService<S, T> {
|
||||
|
||||
info!(slot = slot.as_u64(), "Requesting unsigned block");
|
||||
|
||||
// Request block from first responsive beacon node.
|
||||
// Request an SSZ block from all beacon nodes in order, returning on the first successful response.
|
||||
// If all nodes fail, run a second pass falling back to JSON.
|
||||
//
|
||||
// Try the proposer nodes last, since it's likely that they don't have a
|
||||
// Proposer nodes will always be tried last during each pass since it's likely that they don't have a
|
||||
// great view of attestations on the network.
|
||||
let unsigned_block = proposer_fallback
|
||||
let ssz_block_response = proposer_fallback
|
||||
.request_proposers_last(|beacon_node| async move {
|
||||
let _get_timer = validator_metrics::start_timer_vec(
|
||||
&validator_metrics::BLOCK_SERVICE_TIMES,
|
||||
&[validator_metrics::BEACON_BLOCK_HTTP_GET],
|
||||
);
|
||||
Self::get_validator_block(
|
||||
&beacon_node,
|
||||
slot,
|
||||
randao_reveal_ref,
|
||||
graffiti,
|
||||
proposer_index,
|
||||
builder_boost_factor,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
BlockError::Recoverable(format!(
|
||||
"Error from beacon node when producing block: {:?}",
|
||||
e
|
||||
))
|
||||
})
|
||||
beacon_node
|
||||
.get_validator_blocks_v3_ssz::<S::E>(
|
||||
slot,
|
||||
randao_reveal_ref,
|
||||
graffiti.as_ref(),
|
||||
builder_boost_factor,
|
||||
)
|
||||
.await
|
||||
})
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
let block_response = match ssz_block_response {
|
||||
Ok((ssz_block_response, _metadata)) => ssz_block_response,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
slot = slot.as_u64(),
|
||||
error = %e,
|
||||
"SSZ block production failed, falling back to JSON"
|
||||
);
|
||||
|
||||
proposer_fallback
|
||||
.request_proposers_last(|beacon_node| async move {
|
||||
let _get_timer = validator_metrics::start_timer_vec(
|
||||
&validator_metrics::BLOCK_SERVICE_TIMES,
|
||||
&[validator_metrics::BEACON_BLOCK_HTTP_GET],
|
||||
);
|
||||
let (json_block_response, _metadata) = beacon_node
|
||||
.get_validator_blocks_v3::<S::E>(
|
||||
slot,
|
||||
randao_reveal_ref,
|
||||
graffiti.as_ref(),
|
||||
builder_boost_factor,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
BlockError::Recoverable(format!(
|
||||
"Error from beacon node when producing block: {:?}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
Ok(json_block_response.data)
|
||||
})
|
||||
.await
|
||||
.map_err(BlockError::from)?
|
||||
}
|
||||
};
|
||||
|
||||
let (block_proposer, unsigned_block) = match block_response {
|
||||
eth2::types::ProduceBlockV3Response::Full(block) => {
|
||||
(block.block().proposer_index(), UnsignedBlock::Full(block))
|
||||
}
|
||||
eth2::types::ProduceBlockV3Response::Blinded(block) => {
|
||||
(block.proposer_index(), UnsignedBlock::Blinded(block))
|
||||
}
|
||||
};
|
||||
|
||||
info!(slot = slot.as_u64(), "Received unsigned block");
|
||||
if proposer_index != Some(block_proposer) {
|
||||
return Err(BlockError::Recoverable(
|
||||
"Proposer index does not match block proposer. Beacon chain re-orged".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
self_ref
|
||||
.sign_and_publish_block(
|
||||
@@ -525,71 +571,6 @@ impl<S: ValidatorStore + 'static, T: SlotClock + 'static> BlockService<S, T> {
|
||||
}
|
||||
Ok::<_, BlockError>(())
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(%slot))]
|
||||
async fn get_validator_block(
|
||||
beacon_node: &BeaconNodeHttpClient,
|
||||
slot: Slot,
|
||||
randao_reveal_ref: &SignatureBytes,
|
||||
graffiti: Option<Graffiti>,
|
||||
proposer_index: Option<u64>,
|
||||
builder_boost_factor: Option<u64>,
|
||||
) -> Result<UnsignedBlock<S::E>, BlockError> {
|
||||
let block_response = match beacon_node
|
||||
.get_validator_blocks_v3_ssz::<S::E>(
|
||||
slot,
|
||||
randao_reveal_ref,
|
||||
graffiti.as_ref(),
|
||||
builder_boost_factor,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok((ssz_block_response, _)) => ssz_block_response,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
slot = slot.as_u64(),
|
||||
error = %e,
|
||||
"Beacon node does not support SSZ in block production, falling back to JSON"
|
||||
);
|
||||
|
||||
let (json_block_response, _) = beacon_node
|
||||
.get_validator_blocks_v3::<S::E>(
|
||||
slot,
|
||||
randao_reveal_ref,
|
||||
graffiti.as_ref(),
|
||||
builder_boost_factor,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
BlockError::Recoverable(format!(
|
||||
"Error from beacon node when producing block: {:?}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
// Extract ProduceBlockV3Response (data field of the struct ForkVersionedResponse)
|
||||
json_block_response.data
|
||||
}
|
||||
};
|
||||
|
||||
let (block_proposer, unsigned_block) = match block_response {
|
||||
eth2::types::ProduceBlockV3Response::Full(block) => {
|
||||
(block.block().proposer_index(), UnsignedBlock::Full(block))
|
||||
}
|
||||
eth2::types::ProduceBlockV3Response::Blinded(block) => {
|
||||
(block.proposer_index(), UnsignedBlock::Blinded(block))
|
||||
}
|
||||
};
|
||||
|
||||
info!(slot = slot.as_u64(), "Received unsigned block");
|
||||
if proposer_index != Some(block_proposer) {
|
||||
return Err(BlockError::Recoverable(
|
||||
"Proposer index does not match block proposer. Beacon chain re-orged".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok::<_, BlockError>(unsigned_block)
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper for values we want to log about a block we signed, for easy extraction from the possible
|
||||
|
||||
Reference in New Issue
Block a user