Instrument tracing on block production code path (#8017)

Partially #7814. Instrument block production code path.

New root spans:
* `produce_block_v3`
* `produce_block_v2`

Example traces:

<img width="518" height="432" alt="image" src="https://github.com/user-attachments/assets/a9413d25-501c-49dc-95cc-623db5988981" />


  


Co-Authored-By: Jimmy Chen <jchen.tc@gmail.com>
This commit is contained in:
Jimmy Chen
2025-09-10 13:30:51 +10:00
committed by GitHub
parent 2b22903fba
commit 8a4f6cf0d5
7 changed files with 101 additions and 50 deletions

View File

@@ -1437,6 +1437,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
///
/// Returns `None` when the state is not found in the database or there is an error skipping
/// to a future state.
#[instrument(level = "debug", skip_all)]
pub fn state_at_slot(
&self,
slot: Slot,
@@ -4466,6 +4467,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
}
/// If configured, wait for the fork choice run at the start of the slot to complete.
#[instrument(level = "debug", skip_all)]
fn wait_for_fork_choice_before_block_production(
self: &Arc<Self>,
slot: Slot,
@@ -4528,10 +4530,15 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
//
// Load the parent state from disk.
let chain = self.clone();
let span = Span::current();
let (state, state_root_opt) = self
.task_executor
.spawn_blocking_handle(
move || chain.load_state_for_block_production(slot),
move || {
let _guard =
debug_span!(parent: span, "load_state_for_block_production").entered();
chain.load_state_for_block_production(slot)
},
"load_state_for_block_production",
)
.ok_or(BlockProductionError::ShuttingDown)?
@@ -4618,6 +4625,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
/// Fetch the beacon state to use for producing a block if a 1-slot proposer re-org is viable.
///
/// This function will return `None` if proposer re-orgs are disabled.
#[instrument(skip_all, level = "debug")]
fn get_state_for_re_org(
&self,
slot: Slot,
@@ -5072,6 +5080,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
/// equal to the root of `state`. Providing this value will serve as an optimization to avoid
/// performing a tree hash in some scenarios.
#[allow(clippy::too_many_arguments)]
#[instrument(level = "debug", skip_all)]
pub async fn produce_block_on_state(
self: &Arc<Self>,
state: BeaconState<T::EthSpec>,
@@ -5091,10 +5100,13 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
.graffiti_calculator
.get_graffiti(validator_graffiti)
.await;
let span = Span::current();
let mut partial_beacon_block = self
.task_executor
.spawn_blocking_handle(
move || {
let _guard =
debug_span!(parent: span, "produce_partial_beacon_block").entered();
chain.produce_partial_beacon_block(
state,
state_root_opt,
@@ -5130,10 +5142,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
match block_contents_type {
BlockProposalContentsType::Full(block_contents) => {
let chain = self.clone();
let span = Span::current();
let beacon_block_response = self
.task_executor
.spawn_blocking_handle(
move || {
let _guard =
debug_span!(parent: span, "complete_partial_beacon_block")
.entered();
chain.complete_partial_beacon_block(
partial_beacon_block,
Some(block_contents),
@@ -5150,10 +5166,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
}
BlockProposalContentsType::Blinded(block_contents) => {
let chain = self.clone();
let span = Span::current();
let beacon_block_response = self
.task_executor
.spawn_blocking_handle(
move || {
let _guard =
debug_span!(parent: span, "complete_partial_beacon_block")
.entered();
chain.complete_partial_beacon_block(
partial_beacon_block,
Some(block_contents),
@@ -5171,10 +5191,13 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
}
} else {
let chain = self.clone();
let span = Span::current();
let beacon_block_response = self
.task_executor
.spawn_blocking_handle(
move || {
let _guard =
debug_span!(parent: span, "complete_partial_beacon_block").entered();
chain.complete_partial_beacon_block(
partial_beacon_block,
None,
@@ -5276,51 +5299,54 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
// Iterate through the naive aggregation pool and ensure all the attestations from there
// are included in the operation pool.
let unagg_import_timer =
metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES);
for attestation in self.naive_aggregation_pool.read().iter() {
let import = |attestation: &Attestation<T::EthSpec>| {
let attesting_indices =
get_attesting_indices_from_state(&state, attestation.to_ref())?;
self.op_pool
.insert_attestation(attestation.clone(), attesting_indices)
};
if let Err(e) = import(attestation) {
// Don't stop block production if there's an error, just create a log.
error!(
reason = ?e,
"Attestation did not transfer to op pool"
);
{
let _guard = debug_span!("import_naive_aggregation_pool").entered();
let _unagg_import_timer =
metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES);
for attestation in self.naive_aggregation_pool.read().iter() {
let import = |attestation: &Attestation<T::EthSpec>| {
let attesting_indices =
get_attesting_indices_from_state(&state, attestation.to_ref())?;
self.op_pool
.insert_attestation(attestation.clone(), attesting_indices)
};
if let Err(e) = import(attestation) {
// Don't stop block production if there's an error, just create a log.
error!(
reason = ?e,
"Attestation did not transfer to op pool"
);
}
}
}
drop(unagg_import_timer);
let attestation_packing_timer =
metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES);
// Epoch cache and total balance cache are required for op pool packing.
state.build_total_active_balance_cache(&self.spec)?;
initialize_epoch_cache(&mut state, &self.spec)?;
let mut prev_filter_cache = HashMap::new();
let prev_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state)
};
let mut curr_filter_cache = HashMap::new();
let curr_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state)
};
let mut attestations = self
.op_pool
.get_attestations(
&state,
prev_attestation_filter,
curr_attestation_filter,
&self.spec,
)
.map_err(BlockProductionError::OpPoolError)?;
drop(attestation_packing_timer);
let mut attestations = {
let _guard = debug_span!("pack_attestations").entered();
let _attestation_packing_timer =
metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES);
// Epoch cache and total balance cache are required for op pool packing.
state.build_total_active_balance_cache(&self.spec)?;
initialize_epoch_cache(&mut state, &self.spec)?;
let mut prev_filter_cache = HashMap::new();
let prev_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state)
};
let mut curr_filter_cache = HashMap::new();
let curr_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state)
};
self.op_pool
.get_attestations(
&state,
prev_attestation_filter,
curr_attestation_filter,
&self.spec,
)
.map_err(BlockProductionError::OpPoolError)?
};
// If paranoid mode is enabled re-check the signatures of every included message.
// This will be a lot slower but guards against bugs in block production and can be

View File

@@ -24,7 +24,7 @@ use state_processing::per_block_processing::{
};
use std::sync::Arc;
use tokio::task::JoinHandle;
use tracing::{debug, warn};
use tracing::{Instrument, debug, debug_span, warn};
use tree_hash::TreeHash;
use types::payload::BlockProductionVersion;
use types::*;
@@ -403,8 +403,9 @@ pub fn get_execution_payload<T: BeaconChainTypes>(
block_production_version,
)
.await
},
"get_execution_payload",
}
.instrument(debug_span!("prepare_execution_payload")),
"prepare_execution_payload",
)
.ok_or(BlockProductionError::ShuttingDown)?;
@@ -503,6 +504,7 @@ where
},
"prepare_execution_payload_forkchoice_update_params",
)
.instrument(debug_span!("forkchoice_update_params"))
.await
.map_err(|e| BlockProductionError::BeaconChain(Box::new(e)))?;

View File

@@ -43,7 +43,7 @@ use tokio::{
time::sleep,
};
use tokio_stream::wrappers::WatchStream;
use tracing::{debug, error, info, warn};
use tracing::{Instrument, debug, debug_span, error, info, instrument, warn};
use tree_hash::TreeHash;
use types::beacon_block_body::KzgCommitments;
use types::builder_bid::BuilderBid;
@@ -851,6 +851,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
}
/// Returns the fee-recipient address that should be used to build a block
#[instrument(level = "debug", skip_all)]
pub async fn get_suggested_fee_recipient(&self, proposer_index: u64) -> Address {
if let Some(preparation_data_entry) =
self.proposer_preparation_data().await.get(&proposer_index)
@@ -875,6 +876,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
}
}
#[instrument(level = "debug", skip_all)]
pub async fn get_proposer_gas_limit(&self, proposer_index: u64) -> Option<u64> {
self.proposer_preparation_data()
.await
@@ -891,6 +893,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
///
/// The result will be returned from the first node that returns successfully. No more nodes
/// will be contacted.
#[instrument(level = "debug", skip_all)]
pub async fn get_payload(
&self,
payload_parameters: PayloadParameters<'_>,
@@ -996,6 +999,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
timed_future(metrics::GET_BLINDED_PAYLOAD_BUILDER, async {
builder
.get_builder_header::<E>(slot, parent_hash, pubkey)
.instrument(debug_span!("get_builder_header"))
.await
}),
timed_future(metrics::GET_BLINDED_PAYLOAD_LOCAL, async {
@@ -1237,6 +1241,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
.await
}
#[instrument(level = "debug", skip_all)]
async fn get_full_payload_with(
&self,
payload_parameters: PayloadParameters<'_>,

View File

@@ -10,8 +10,10 @@ use beacon_chain::{
BeaconBlockResponseWrapper, BeaconChain, BeaconChainTypes, ProduceBlockVerification,
};
use eth2::types::{self as api_types, ProduceBlockV3Metadata, SkipRandaoVerification};
use lighthouse_tracing::{SPAN_PRODUCE_BLOCK_V2, SPAN_PRODUCE_BLOCK_V3};
use ssz::Encode;
use std::sync::Arc;
use tracing::instrument;
use types::{payload::BlockProductionVersion, *};
use warp::{
Reply,
@@ -40,6 +42,11 @@ pub fn get_randao_verification(
Ok(randao_verification)
}
#[instrument(
name = SPAN_PRODUCE_BLOCK_V3,
skip_all,
fields(%slot)
)]
pub async fn produce_block_v3<T: BeaconChainTypes>(
accept_header: Option<api_types::Accept>,
chain: Arc<BeaconChain<T>>,
@@ -155,6 +162,11 @@ pub async fn produce_blinded_block_v2<T: BeaconChainTypes>(
build_response_v2(chain, block_response_type, accept_header)
}
#[instrument(
name = SPAN_PRODUCE_BLOCK_V2,
skip_all,
fields(%slot)
)]
pub async fn produce_block_v2<T: BeaconChainTypes>(
accept_header: Option<api_types::Accept>,
chain: Arc<BeaconChain<T>>,

View File

@@ -3,7 +3,9 @@
//! TODO: These span identifiers will be used to implement selective tracing export (to be implemented),
//! where only the listed root spans and their descendants will be exported to the tracing backend.
/// Root span name for publish_block
/// Root span names for block production and publishing
pub const SPAN_PRODUCE_BLOCK_V2: &str = "produce_block_v2";
pub const SPAN_PRODUCE_BLOCK_V3: &str = "produce_block_v3";
pub const SPAN_PUBLISH_BLOCK: &str = "publish_block";
/// Data Availability checker span identifiers
@@ -42,11 +44,14 @@ pub const SPAN_HANDLE_LIGHT_CLIENT_FINALITY_UPDATE: &str = "handle_light_client_
/// Only these spans and their descendants will be processed to reduce noise from
/// uninstrumented code paths. New root spans must be added to this list to be traced.
pub const LH_BN_ROOT_SPAN_NAMES: &[&str] = &[
SPAN_SYNCING_CHAIN,
SPAN_PRODUCE_BLOCK_V2,
SPAN_PRODUCE_BLOCK_V3,
SPAN_PUBLISH_BLOCK,
SPAN_PENDING_COMPONENTS,
SPAN_PROCESS_GOSSIP_DATA_COLUMN,
SPAN_PROCESS_GOSSIP_BLOB,
SPAN_PROCESS_GOSSIP_BLOCK,
SPAN_SYNCING_CHAIN,
SPAN_OUTGOING_RANGE_REQUEST,
SPAN_SINGLE_BLOCK_LOOKUP,
SPAN_PROCESS_RPC_BLOCK,