From 8a4f6cf0d5b6b261b2c3439ce7c05383a53d30c5 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 10 Sep 2025 13:30:51 +1000 Subject: [PATCH] Instrument tracing on block production code path (#8017) Partially #7814. Instrument block production code path. New root spans: * `produce_block_v3` * `produce_block_v2` Example traces: image Co-Authored-By: Jimmy Chen --- beacon_node/beacon_chain/src/beacon_chain.rs | 112 +++++++++++------- .../beacon_chain/src/execution_payload.rs | 8 +- beacon_node/execution_layer/src/lib.rs | 7 +- beacon_node/http_api/src/produce_block.rs | 12 ++ beacon_node/lighthouse_tracing/src/lib.rs | 9 +- consensus/fork_choice/src/fork_choice.rs | 1 + .../src/per_slot_processing.rs | 2 +- 7 files changed, 101 insertions(+), 50 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index b8a6529653..6e11b66610 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -1437,6 +1437,7 @@ impl BeaconChain { /// /// Returns `None` when the state is not found in the database or there is an error skipping /// to a future state. + #[instrument(level = "debug", skip_all)] pub fn state_at_slot( &self, slot: Slot, @@ -4466,6 +4467,7 @@ impl BeaconChain { } /// If configured, wait for the fork choice run at the start of the slot to complete. + #[instrument(level = "debug", skip_all)] fn wait_for_fork_choice_before_block_production( self: &Arc, slot: Slot, @@ -4528,10 +4530,15 @@ impl BeaconChain { // // Load the parent state from disk. let chain = self.clone(); + let span = Span::current(); let (state, state_root_opt) = self .task_executor .spawn_blocking_handle( - move || chain.load_state_for_block_production(slot), + move || { + let _guard = + debug_span!(parent: span, "load_state_for_block_production").entered(); + chain.load_state_for_block_production(slot) + }, "load_state_for_block_production", ) .ok_or(BlockProductionError::ShuttingDown)? @@ -4618,6 +4625,7 @@ impl BeaconChain { /// Fetch the beacon state to use for producing a block if a 1-slot proposer re-org is viable. /// /// This function will return `None` if proposer re-orgs are disabled. + #[instrument(skip_all, level = "debug")] fn get_state_for_re_org( &self, slot: Slot, @@ -5072,6 +5080,7 @@ impl BeaconChain { /// equal to the root of `state`. Providing this value will serve as an optimization to avoid /// performing a tree hash in some scenarios. #[allow(clippy::too_many_arguments)] + #[instrument(level = "debug", skip_all)] pub async fn produce_block_on_state( self: &Arc, state: BeaconState, @@ -5091,10 +5100,13 @@ impl BeaconChain { .graffiti_calculator .get_graffiti(validator_graffiti) .await; + let span = Span::current(); let mut partial_beacon_block = self .task_executor .spawn_blocking_handle( move || { + let _guard = + debug_span!(parent: span, "produce_partial_beacon_block").entered(); chain.produce_partial_beacon_block( state, state_root_opt, @@ -5130,10 +5142,14 @@ impl BeaconChain { match block_contents_type { BlockProposalContentsType::Full(block_contents) => { let chain = self.clone(); + let span = Span::current(); let beacon_block_response = self .task_executor .spawn_blocking_handle( move || { + let _guard = + debug_span!(parent: span, "complete_partial_beacon_block") + .entered(); chain.complete_partial_beacon_block( partial_beacon_block, Some(block_contents), @@ -5150,10 +5166,14 @@ impl BeaconChain { } BlockProposalContentsType::Blinded(block_contents) => { let chain = self.clone(); + let span = Span::current(); let beacon_block_response = self .task_executor .spawn_blocking_handle( move || { + let _guard = + debug_span!(parent: span, "complete_partial_beacon_block") + .entered(); chain.complete_partial_beacon_block( partial_beacon_block, Some(block_contents), @@ -5171,10 +5191,13 @@ impl BeaconChain { } } else { let chain = self.clone(); + let span = Span::current(); let beacon_block_response = self .task_executor .spawn_blocking_handle( move || { + let _guard = + debug_span!(parent: span, "complete_partial_beacon_block").entered(); chain.complete_partial_beacon_block( partial_beacon_block, None, @@ -5276,51 +5299,54 @@ impl BeaconChain { // Iterate through the naive aggregation pool and ensure all the attestations from there // are included in the operation pool. - let unagg_import_timer = - metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES); - for attestation in self.naive_aggregation_pool.read().iter() { - let import = |attestation: &Attestation| { - let attesting_indices = - get_attesting_indices_from_state(&state, attestation.to_ref())?; - self.op_pool - .insert_attestation(attestation.clone(), attesting_indices) - }; - if let Err(e) = import(attestation) { - // Don't stop block production if there's an error, just create a log. - error!( - reason = ?e, - "Attestation did not transfer to op pool" - ); + { + let _guard = debug_span!("import_naive_aggregation_pool").entered(); + let _unagg_import_timer = + metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES); + for attestation in self.naive_aggregation_pool.read().iter() { + let import = |attestation: &Attestation| { + let attesting_indices = + get_attesting_indices_from_state(&state, attestation.to_ref())?; + self.op_pool + .insert_attestation(attestation.clone(), attesting_indices) + }; + if let Err(e) = import(attestation) { + // Don't stop block production if there's an error, just create a log. + error!( + reason = ?e, + "Attestation did not transfer to op pool" + ); + } } - } - drop(unagg_import_timer); - - let attestation_packing_timer = - metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES); - - // Epoch cache and total balance cache are required for op pool packing. - state.build_total_active_balance_cache(&self.spec)?; - initialize_epoch_cache(&mut state, &self.spec)?; - - let mut prev_filter_cache = HashMap::new(); - let prev_attestation_filter = |att: &CompactAttestationRef| { - self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state) - }; - let mut curr_filter_cache = HashMap::new(); - let curr_attestation_filter = |att: &CompactAttestationRef| { - self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state) }; - let mut attestations = self - .op_pool - .get_attestations( - &state, - prev_attestation_filter, - curr_attestation_filter, - &self.spec, - ) - .map_err(BlockProductionError::OpPoolError)?; - drop(attestation_packing_timer); + let mut attestations = { + let _guard = debug_span!("pack_attestations").entered(); + let _attestation_packing_timer = + metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES); + + // Epoch cache and total balance cache are required for op pool packing. + state.build_total_active_balance_cache(&self.spec)?; + initialize_epoch_cache(&mut state, &self.spec)?; + + let mut prev_filter_cache = HashMap::new(); + let prev_attestation_filter = |att: &CompactAttestationRef| { + self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state) + }; + let mut curr_filter_cache = HashMap::new(); + let curr_attestation_filter = |att: &CompactAttestationRef| { + self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state) + }; + + self.op_pool + .get_attestations( + &state, + prev_attestation_filter, + curr_attestation_filter, + &self.spec, + ) + .map_err(BlockProductionError::OpPoolError)? + }; // If paranoid mode is enabled re-check the signatures of every included message. // This will be a lot slower but guards against bugs in block production and can be diff --git a/beacon_node/beacon_chain/src/execution_payload.rs b/beacon_node/beacon_chain/src/execution_payload.rs index 697fee351e..f0cab06ca3 100644 --- a/beacon_node/beacon_chain/src/execution_payload.rs +++ b/beacon_node/beacon_chain/src/execution_payload.rs @@ -24,7 +24,7 @@ use state_processing::per_block_processing::{ }; use std::sync::Arc; use tokio::task::JoinHandle; -use tracing::{debug, warn}; +use tracing::{Instrument, debug, debug_span, warn}; use tree_hash::TreeHash; use types::payload::BlockProductionVersion; use types::*; @@ -403,8 +403,9 @@ pub fn get_execution_payload( block_production_version, ) .await - }, - "get_execution_payload", + } + .instrument(debug_span!("prepare_execution_payload")), + "prepare_execution_payload", ) .ok_or(BlockProductionError::ShuttingDown)?; @@ -503,6 +504,7 @@ where }, "prepare_execution_payload_forkchoice_update_params", ) + .instrument(debug_span!("forkchoice_update_params")) .await .map_err(|e| BlockProductionError::BeaconChain(Box::new(e)))?; diff --git a/beacon_node/execution_layer/src/lib.rs b/beacon_node/execution_layer/src/lib.rs index b53c4cde4e..5b48b81aa6 100644 --- a/beacon_node/execution_layer/src/lib.rs +++ b/beacon_node/execution_layer/src/lib.rs @@ -43,7 +43,7 @@ use tokio::{ time::sleep, }; use tokio_stream::wrappers::WatchStream; -use tracing::{debug, error, info, warn}; +use tracing::{Instrument, debug, debug_span, error, info, instrument, warn}; use tree_hash::TreeHash; use types::beacon_block_body::KzgCommitments; use types::builder_bid::BuilderBid; @@ -851,6 +851,7 @@ impl ExecutionLayer { } /// Returns the fee-recipient address that should be used to build a block + #[instrument(level = "debug", skip_all)] pub async fn get_suggested_fee_recipient(&self, proposer_index: u64) -> Address { if let Some(preparation_data_entry) = self.proposer_preparation_data().await.get(&proposer_index) @@ -875,6 +876,7 @@ impl ExecutionLayer { } } + #[instrument(level = "debug", skip_all)] pub async fn get_proposer_gas_limit(&self, proposer_index: u64) -> Option { self.proposer_preparation_data() .await @@ -891,6 +893,7 @@ impl ExecutionLayer { /// /// The result will be returned from the first node that returns successfully. No more nodes /// will be contacted. + #[instrument(level = "debug", skip_all)] pub async fn get_payload( &self, payload_parameters: PayloadParameters<'_>, @@ -996,6 +999,7 @@ impl ExecutionLayer { timed_future(metrics::GET_BLINDED_PAYLOAD_BUILDER, async { builder .get_builder_header::(slot, parent_hash, pubkey) + .instrument(debug_span!("get_builder_header")) .await }), timed_future(metrics::GET_BLINDED_PAYLOAD_LOCAL, async { @@ -1237,6 +1241,7 @@ impl ExecutionLayer { .await } + #[instrument(level = "debug", skip_all)] async fn get_full_payload_with( &self, payload_parameters: PayloadParameters<'_>, diff --git a/beacon_node/http_api/src/produce_block.rs b/beacon_node/http_api/src/produce_block.rs index 932fb00179..367e09969b 100644 --- a/beacon_node/http_api/src/produce_block.rs +++ b/beacon_node/http_api/src/produce_block.rs @@ -10,8 +10,10 @@ use beacon_chain::{ BeaconBlockResponseWrapper, BeaconChain, BeaconChainTypes, ProduceBlockVerification, }; use eth2::types::{self as api_types, ProduceBlockV3Metadata, SkipRandaoVerification}; +use lighthouse_tracing::{SPAN_PRODUCE_BLOCK_V2, SPAN_PRODUCE_BLOCK_V3}; use ssz::Encode; use std::sync::Arc; +use tracing::instrument; use types::{payload::BlockProductionVersion, *}; use warp::{ Reply, @@ -40,6 +42,11 @@ pub fn get_randao_verification( Ok(randao_verification) } +#[instrument( + name = SPAN_PRODUCE_BLOCK_V3, + skip_all, + fields(%slot) +)] pub async fn produce_block_v3( accept_header: Option, chain: Arc>, @@ -155,6 +162,11 @@ pub async fn produce_blinded_block_v2( build_response_v2(chain, block_response_type, accept_header) } +#[instrument( + name = SPAN_PRODUCE_BLOCK_V2, + skip_all, + fields(%slot) +)] pub async fn produce_block_v2( accept_header: Option, chain: Arc>, diff --git a/beacon_node/lighthouse_tracing/src/lib.rs b/beacon_node/lighthouse_tracing/src/lib.rs index d31df4e3dd..1787399761 100644 --- a/beacon_node/lighthouse_tracing/src/lib.rs +++ b/beacon_node/lighthouse_tracing/src/lib.rs @@ -3,7 +3,9 @@ //! TODO: These span identifiers will be used to implement selective tracing export (to be implemented), //! where only the listed root spans and their descendants will be exported to the tracing backend. -/// Root span name for publish_block +/// Root span names for block production and publishing +pub const SPAN_PRODUCE_BLOCK_V2: &str = "produce_block_v2"; +pub const SPAN_PRODUCE_BLOCK_V3: &str = "produce_block_v3"; pub const SPAN_PUBLISH_BLOCK: &str = "publish_block"; /// Data Availability checker span identifiers @@ -42,11 +44,14 @@ pub const SPAN_HANDLE_LIGHT_CLIENT_FINALITY_UPDATE: &str = "handle_light_client_ /// Only these spans and their descendants will be processed to reduce noise from /// uninstrumented code paths. New root spans must be added to this list to be traced. pub const LH_BN_ROOT_SPAN_NAMES: &[&str] = &[ - SPAN_SYNCING_CHAIN, + SPAN_PRODUCE_BLOCK_V2, + SPAN_PRODUCE_BLOCK_V3, + SPAN_PUBLISH_BLOCK, SPAN_PENDING_COMPONENTS, SPAN_PROCESS_GOSSIP_DATA_COLUMN, SPAN_PROCESS_GOSSIP_BLOB, SPAN_PROCESS_GOSSIP_BLOCK, + SPAN_SYNCING_CHAIN, SPAN_OUTGOING_RANGE_REQUEST, SPAN_SINGLE_BLOCK_LOOKUP, SPAN_PROCESS_RPC_BLOCK, diff --git a/consensus/fork_choice/src/fork_choice.rs b/consensus/fork_choice/src/fork_choice.rs index 19f294d439..fe1f5fba9e 100644 --- a/consensus/fork_choice/src/fork_choice.rs +++ b/consensus/fork_choice/src/fork_choice.rs @@ -523,6 +523,7 @@ where /// /// You *must* call `get_head` for the proposal slot prior to calling this function and pass /// in the result of `get_head` as `canonical_head`. + #[instrument(level = "debug", skip_all)] pub fn get_proposer_head( &self, current_slot: Slot, diff --git a/consensus/state_processing/src/per_slot_processing.rs b/consensus/state_processing/src/per_slot_processing.rs index 04b1e8148f..8695054e1e 100644 --- a/consensus/state_processing/src/per_slot_processing.rs +++ b/consensus/state_processing/src/per_slot_processing.rs @@ -26,7 +26,7 @@ impl From for Error { /// If the root of the supplied `state` is known, then it can be passed as `state_root`. If /// `state_root` is `None`, the root of `state` will be computed using a cached tree hash. /// Providing the `state_root` makes this function several orders of magnitude faster. -#[instrument(skip_all)] +#[instrument(level = "debug", skip_all)] pub fn per_slot_processing( state: &mut BeaconState, state_root: Option,