Instrument tracing on block production code path (#8017)

Partially #7814. Instrument block production code path.

New root spans:
* `produce_block_v3`
* `produce_block_v2`

Example traces:

<img width="518" height="432" alt="image" src="https://github.com/user-attachments/assets/a9413d25-501c-49dc-95cc-623db5988981" />


  


Co-Authored-By: Jimmy Chen <jchen.tc@gmail.com>
This commit is contained in:
Jimmy Chen
2025-09-10 13:30:51 +10:00
committed by GitHub
parent 2b22903fba
commit 8a4f6cf0d5
7 changed files with 101 additions and 50 deletions

View File

@@ -1437,6 +1437,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
/// ///
/// Returns `None` when the state is not found in the database or there is an error skipping /// Returns `None` when the state is not found in the database or there is an error skipping
/// to a future state. /// to a future state.
#[instrument(level = "debug", skip_all)]
pub fn state_at_slot( pub fn state_at_slot(
&self, &self,
slot: Slot, slot: Slot,
@@ -4466,6 +4467,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
} }
/// If configured, wait for the fork choice run at the start of the slot to complete. /// If configured, wait for the fork choice run at the start of the slot to complete.
#[instrument(level = "debug", skip_all)]
fn wait_for_fork_choice_before_block_production( fn wait_for_fork_choice_before_block_production(
self: &Arc<Self>, self: &Arc<Self>,
slot: Slot, slot: Slot,
@@ -4528,10 +4530,15 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
// //
// Load the parent state from disk. // Load the parent state from disk.
let chain = self.clone(); let chain = self.clone();
let span = Span::current();
let (state, state_root_opt) = self let (state, state_root_opt) = self
.task_executor .task_executor
.spawn_blocking_handle( .spawn_blocking_handle(
move || chain.load_state_for_block_production(slot), move || {
let _guard =
debug_span!(parent: span, "load_state_for_block_production").entered();
chain.load_state_for_block_production(slot)
},
"load_state_for_block_production", "load_state_for_block_production",
) )
.ok_or(BlockProductionError::ShuttingDown)? .ok_or(BlockProductionError::ShuttingDown)?
@@ -4618,6 +4625,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
/// Fetch the beacon state to use for producing a block if a 1-slot proposer re-org is viable. /// Fetch the beacon state to use for producing a block if a 1-slot proposer re-org is viable.
/// ///
/// This function will return `None` if proposer re-orgs are disabled. /// This function will return `None` if proposer re-orgs are disabled.
#[instrument(skip_all, level = "debug")]
fn get_state_for_re_org( fn get_state_for_re_org(
&self, &self,
slot: Slot, slot: Slot,
@@ -5072,6 +5080,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
/// equal to the root of `state`. Providing this value will serve as an optimization to avoid /// equal to the root of `state`. Providing this value will serve as an optimization to avoid
/// performing a tree hash in some scenarios. /// performing a tree hash in some scenarios.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[instrument(level = "debug", skip_all)]
pub async fn produce_block_on_state( pub async fn produce_block_on_state(
self: &Arc<Self>, self: &Arc<Self>,
state: BeaconState<T::EthSpec>, state: BeaconState<T::EthSpec>,
@@ -5091,10 +5100,13 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
.graffiti_calculator .graffiti_calculator
.get_graffiti(validator_graffiti) .get_graffiti(validator_graffiti)
.await; .await;
let span = Span::current();
let mut partial_beacon_block = self let mut partial_beacon_block = self
.task_executor .task_executor
.spawn_blocking_handle( .spawn_blocking_handle(
move || { move || {
let _guard =
debug_span!(parent: span, "produce_partial_beacon_block").entered();
chain.produce_partial_beacon_block( chain.produce_partial_beacon_block(
state, state,
state_root_opt, state_root_opt,
@@ -5130,10 +5142,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
match block_contents_type { match block_contents_type {
BlockProposalContentsType::Full(block_contents) => { BlockProposalContentsType::Full(block_contents) => {
let chain = self.clone(); let chain = self.clone();
let span = Span::current();
let beacon_block_response = self let beacon_block_response = self
.task_executor .task_executor
.spawn_blocking_handle( .spawn_blocking_handle(
move || { move || {
let _guard =
debug_span!(parent: span, "complete_partial_beacon_block")
.entered();
chain.complete_partial_beacon_block( chain.complete_partial_beacon_block(
partial_beacon_block, partial_beacon_block,
Some(block_contents), Some(block_contents),
@@ -5150,10 +5166,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
} }
BlockProposalContentsType::Blinded(block_contents) => { BlockProposalContentsType::Blinded(block_contents) => {
let chain = self.clone(); let chain = self.clone();
let span = Span::current();
let beacon_block_response = self let beacon_block_response = self
.task_executor .task_executor
.spawn_blocking_handle( .spawn_blocking_handle(
move || { move || {
let _guard =
debug_span!(parent: span, "complete_partial_beacon_block")
.entered();
chain.complete_partial_beacon_block( chain.complete_partial_beacon_block(
partial_beacon_block, partial_beacon_block,
Some(block_contents), Some(block_contents),
@@ -5171,10 +5191,13 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
} }
} else { } else {
let chain = self.clone(); let chain = self.clone();
let span = Span::current();
let beacon_block_response = self let beacon_block_response = self
.task_executor .task_executor
.spawn_blocking_handle( .spawn_blocking_handle(
move || { move || {
let _guard =
debug_span!(parent: span, "complete_partial_beacon_block").entered();
chain.complete_partial_beacon_block( chain.complete_partial_beacon_block(
partial_beacon_block, partial_beacon_block,
None, None,
@@ -5276,51 +5299,54 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
// Iterate through the naive aggregation pool and ensure all the attestations from there // Iterate through the naive aggregation pool and ensure all the attestations from there
// are included in the operation pool. // are included in the operation pool.
let unagg_import_timer = {
metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES); let _guard = debug_span!("import_naive_aggregation_pool").entered();
for attestation in self.naive_aggregation_pool.read().iter() { let _unagg_import_timer =
let import = |attestation: &Attestation<T::EthSpec>| { metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES);
let attesting_indices = for attestation in self.naive_aggregation_pool.read().iter() {
get_attesting_indices_from_state(&state, attestation.to_ref())?; let import = |attestation: &Attestation<T::EthSpec>| {
self.op_pool let attesting_indices =
.insert_attestation(attestation.clone(), attesting_indices) get_attesting_indices_from_state(&state, attestation.to_ref())?;
}; self.op_pool
if let Err(e) = import(attestation) { .insert_attestation(attestation.clone(), attesting_indices)
// Don't stop block production if there's an error, just create a log. };
error!( if let Err(e) = import(attestation) {
reason = ?e, // Don't stop block production if there's an error, just create a log.
"Attestation did not transfer to op pool" error!(
); reason = ?e,
"Attestation did not transfer to op pool"
);
}
} }
}
drop(unagg_import_timer);
let attestation_packing_timer =
metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES);
// Epoch cache and total balance cache are required for op pool packing.
state.build_total_active_balance_cache(&self.spec)?;
initialize_epoch_cache(&mut state, &self.spec)?;
let mut prev_filter_cache = HashMap::new();
let prev_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state)
};
let mut curr_filter_cache = HashMap::new();
let curr_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state)
}; };
let mut attestations = self let mut attestations = {
.op_pool let _guard = debug_span!("pack_attestations").entered();
.get_attestations( let _attestation_packing_timer =
&state, metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES);
prev_attestation_filter,
curr_attestation_filter, // Epoch cache and total balance cache are required for op pool packing.
&self.spec, state.build_total_active_balance_cache(&self.spec)?;
) initialize_epoch_cache(&mut state, &self.spec)?;
.map_err(BlockProductionError::OpPoolError)?;
drop(attestation_packing_timer); let mut prev_filter_cache = HashMap::new();
let prev_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state)
};
let mut curr_filter_cache = HashMap::new();
let curr_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state)
};
self.op_pool
.get_attestations(
&state,
prev_attestation_filter,
curr_attestation_filter,
&self.spec,
)
.map_err(BlockProductionError::OpPoolError)?
};
// If paranoid mode is enabled re-check the signatures of every included message. // If paranoid mode is enabled re-check the signatures of every included message.
// This will be a lot slower but guards against bugs in block production and can be // This will be a lot slower but guards against bugs in block production and can be

View File

@@ -24,7 +24,7 @@ use state_processing::per_block_processing::{
}; };
use std::sync::Arc; use std::sync::Arc;
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
use tracing::{debug, warn}; use tracing::{Instrument, debug, debug_span, warn};
use tree_hash::TreeHash; use tree_hash::TreeHash;
use types::payload::BlockProductionVersion; use types::payload::BlockProductionVersion;
use types::*; use types::*;
@@ -403,8 +403,9 @@ pub fn get_execution_payload<T: BeaconChainTypes>(
block_production_version, block_production_version,
) )
.await .await
}, }
"get_execution_payload", .instrument(debug_span!("prepare_execution_payload")),
"prepare_execution_payload",
) )
.ok_or(BlockProductionError::ShuttingDown)?; .ok_or(BlockProductionError::ShuttingDown)?;
@@ -503,6 +504,7 @@ where
}, },
"prepare_execution_payload_forkchoice_update_params", "prepare_execution_payload_forkchoice_update_params",
) )
.instrument(debug_span!("forkchoice_update_params"))
.await .await
.map_err(|e| BlockProductionError::BeaconChain(Box::new(e)))?; .map_err(|e| BlockProductionError::BeaconChain(Box::new(e)))?;

View File

@@ -43,7 +43,7 @@ use tokio::{
time::sleep, time::sleep,
}; };
use tokio_stream::wrappers::WatchStream; use tokio_stream::wrappers::WatchStream;
use tracing::{debug, error, info, warn}; use tracing::{Instrument, debug, debug_span, error, info, instrument, warn};
use tree_hash::TreeHash; use tree_hash::TreeHash;
use types::beacon_block_body::KzgCommitments; use types::beacon_block_body::KzgCommitments;
use types::builder_bid::BuilderBid; use types::builder_bid::BuilderBid;
@@ -851,6 +851,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
} }
/// Returns the fee-recipient address that should be used to build a block /// Returns the fee-recipient address that should be used to build a block
#[instrument(level = "debug", skip_all)]
pub async fn get_suggested_fee_recipient(&self, proposer_index: u64) -> Address { pub async fn get_suggested_fee_recipient(&self, proposer_index: u64) -> Address {
if let Some(preparation_data_entry) = if let Some(preparation_data_entry) =
self.proposer_preparation_data().await.get(&proposer_index) self.proposer_preparation_data().await.get(&proposer_index)
@@ -875,6 +876,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
} }
} }
#[instrument(level = "debug", skip_all)]
pub async fn get_proposer_gas_limit(&self, proposer_index: u64) -> Option<u64> { pub async fn get_proposer_gas_limit(&self, proposer_index: u64) -> Option<u64> {
self.proposer_preparation_data() self.proposer_preparation_data()
.await .await
@@ -891,6 +893,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
/// ///
/// The result will be returned from the first node that returns successfully. No more nodes /// The result will be returned from the first node that returns successfully. No more nodes
/// will be contacted. /// will be contacted.
#[instrument(level = "debug", skip_all)]
pub async fn get_payload( pub async fn get_payload(
&self, &self,
payload_parameters: PayloadParameters<'_>, payload_parameters: PayloadParameters<'_>,
@@ -996,6 +999,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
timed_future(metrics::GET_BLINDED_PAYLOAD_BUILDER, async { timed_future(metrics::GET_BLINDED_PAYLOAD_BUILDER, async {
builder builder
.get_builder_header::<E>(slot, parent_hash, pubkey) .get_builder_header::<E>(slot, parent_hash, pubkey)
.instrument(debug_span!("get_builder_header"))
.await .await
}), }),
timed_future(metrics::GET_BLINDED_PAYLOAD_LOCAL, async { timed_future(metrics::GET_BLINDED_PAYLOAD_LOCAL, async {
@@ -1237,6 +1241,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
.await .await
} }
#[instrument(level = "debug", skip_all)]
async fn get_full_payload_with( async fn get_full_payload_with(
&self, &self,
payload_parameters: PayloadParameters<'_>, payload_parameters: PayloadParameters<'_>,

View File

@@ -10,8 +10,10 @@ use beacon_chain::{
BeaconBlockResponseWrapper, BeaconChain, BeaconChainTypes, ProduceBlockVerification, BeaconBlockResponseWrapper, BeaconChain, BeaconChainTypes, ProduceBlockVerification,
}; };
use eth2::types::{self as api_types, ProduceBlockV3Metadata, SkipRandaoVerification}; use eth2::types::{self as api_types, ProduceBlockV3Metadata, SkipRandaoVerification};
use lighthouse_tracing::{SPAN_PRODUCE_BLOCK_V2, SPAN_PRODUCE_BLOCK_V3};
use ssz::Encode; use ssz::Encode;
use std::sync::Arc; use std::sync::Arc;
use tracing::instrument;
use types::{payload::BlockProductionVersion, *}; use types::{payload::BlockProductionVersion, *};
use warp::{ use warp::{
Reply, Reply,
@@ -40,6 +42,11 @@ pub fn get_randao_verification(
Ok(randao_verification) Ok(randao_verification)
} }
#[instrument(
name = SPAN_PRODUCE_BLOCK_V3,
skip_all,
fields(%slot)
)]
pub async fn produce_block_v3<T: BeaconChainTypes>( pub async fn produce_block_v3<T: BeaconChainTypes>(
accept_header: Option<api_types::Accept>, accept_header: Option<api_types::Accept>,
chain: Arc<BeaconChain<T>>, chain: Arc<BeaconChain<T>>,
@@ -155,6 +162,11 @@ pub async fn produce_blinded_block_v2<T: BeaconChainTypes>(
build_response_v2(chain, block_response_type, accept_header) build_response_v2(chain, block_response_type, accept_header)
} }
#[instrument(
name = SPAN_PRODUCE_BLOCK_V2,
skip_all,
fields(%slot)
)]
pub async fn produce_block_v2<T: BeaconChainTypes>( pub async fn produce_block_v2<T: BeaconChainTypes>(
accept_header: Option<api_types::Accept>, accept_header: Option<api_types::Accept>,
chain: Arc<BeaconChain<T>>, chain: Arc<BeaconChain<T>>,

View File

@@ -3,7 +3,9 @@
//! TODO: These span identifiers will be used to implement selective tracing export (to be implemented), //! TODO: These span identifiers will be used to implement selective tracing export (to be implemented),
//! where only the listed root spans and their descendants will be exported to the tracing backend. //! where only the listed root spans and their descendants will be exported to the tracing backend.
/// Root span name for publish_block /// Root span names for block production and publishing
pub const SPAN_PRODUCE_BLOCK_V2: &str = "produce_block_v2";
pub const SPAN_PRODUCE_BLOCK_V3: &str = "produce_block_v3";
pub const SPAN_PUBLISH_BLOCK: &str = "publish_block"; pub const SPAN_PUBLISH_BLOCK: &str = "publish_block";
/// Data Availability checker span identifiers /// Data Availability checker span identifiers
@@ -42,11 +44,14 @@ pub const SPAN_HANDLE_LIGHT_CLIENT_FINALITY_UPDATE: &str = "handle_light_client_
/// Only these spans and their descendants will be processed to reduce noise from /// Only these spans and their descendants will be processed to reduce noise from
/// uninstrumented code paths. New root spans must be added to this list to be traced. /// uninstrumented code paths. New root spans must be added to this list to be traced.
pub const LH_BN_ROOT_SPAN_NAMES: &[&str] = &[ pub const LH_BN_ROOT_SPAN_NAMES: &[&str] = &[
SPAN_SYNCING_CHAIN, SPAN_PRODUCE_BLOCK_V2,
SPAN_PRODUCE_BLOCK_V3,
SPAN_PUBLISH_BLOCK,
SPAN_PENDING_COMPONENTS, SPAN_PENDING_COMPONENTS,
SPAN_PROCESS_GOSSIP_DATA_COLUMN, SPAN_PROCESS_GOSSIP_DATA_COLUMN,
SPAN_PROCESS_GOSSIP_BLOB, SPAN_PROCESS_GOSSIP_BLOB,
SPAN_PROCESS_GOSSIP_BLOCK, SPAN_PROCESS_GOSSIP_BLOCK,
SPAN_SYNCING_CHAIN,
SPAN_OUTGOING_RANGE_REQUEST, SPAN_OUTGOING_RANGE_REQUEST,
SPAN_SINGLE_BLOCK_LOOKUP, SPAN_SINGLE_BLOCK_LOOKUP,
SPAN_PROCESS_RPC_BLOCK, SPAN_PROCESS_RPC_BLOCK,

View File

@@ -523,6 +523,7 @@ where
/// ///
/// You *must* call `get_head` for the proposal slot prior to calling this function and pass /// You *must* call `get_head` for the proposal slot prior to calling this function and pass
/// in the result of `get_head` as `canonical_head`. /// in the result of `get_head` as `canonical_head`.
#[instrument(level = "debug", skip_all)]
pub fn get_proposer_head( pub fn get_proposer_head(
&self, &self,
current_slot: Slot, current_slot: Slot,

View File

@@ -26,7 +26,7 @@ impl From<ArithError> for Error {
/// If the root of the supplied `state` is known, then it can be passed as `state_root`. If /// If the root of the supplied `state` is known, then it can be passed as `state_root`. If
/// `state_root` is `None`, the root of `state` will be computed using a cached tree hash. /// `state_root` is `None`, the root of `state` will be computed using a cached tree hash.
/// Providing the `state_root` makes this function several orders of magnitude faster. /// Providing the `state_root` makes this function several orders of magnitude faster.
#[instrument(skip_all)] #[instrument(level = "debug", skip_all)]
pub fn per_slot_processing<E: EthSpec>( pub fn per_slot_processing<E: EthSpec>(
state: &mut BeaconState<E>, state: &mut BeaconState<E>,
state_root: Option<Hash256>, state_root: Option<Hash256>,