Instrument tracing on block production code path (#8017)

Partially #7814. Instrument block production code path. New root spans: * `produce_block_v3` * `produce_block_v2` Example traces: <img width="518" height="432" alt="image" src="https://github.com/user-attachments/assets/a9413d25-501c-49dc-95cc-623db5988981" /> Co-Authored-By: Jimmy Chen <jchen.tc@gmail.com>
2026-04-26 17:23:41 +00:00 · 2025-09-10 13:30:51 +10:00
parent 2b22903fba
commit 8a4f6cf0d5
7 changed files with 101 additions and 50 deletions
--- a/beacon_node/beacon_chain/src/beacon_chain.rs
+++ b/beacon_node/beacon_chain/src/beacon_chain.rs
@@ -1437,6 +1437,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
    ///
    /// Returns `None` when the state is not found in the database or there is an error skipping
    /// to a future state.
+    #[instrument(level = "debug", skip_all)]
    pub fn state_at_slot(
        &self,
        slot: Slot,
@@ -4466,6 +4467,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
    }

    /// If configured, wait for the fork choice run at the start of the slot to complete.
+    #[instrument(level = "debug", skip_all)]
    fn wait_for_fork_choice_before_block_production(
        self: &Arc<Self>,
        slot: Slot,
@@ -4528,10 +4530,15 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
        //
        // Load the parent state from disk.
        let chain = self.clone();
+        let span = Span::current();
        let (state, state_root_opt) = self
            .task_executor
            .spawn_blocking_handle(
-                move || chain.load_state_for_block_production(slot),
+                move || {
+                    let _guard =
+                        debug_span!(parent: span, "load_state_for_block_production").entered();
+                    chain.load_state_for_block_production(slot)
+                },
                "load_state_for_block_production",
            )
            .ok_or(BlockProductionError::ShuttingDown)?
@@ -4618,6 +4625,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
    /// Fetch the beacon state to use for producing a block if a 1-slot proposer re-org is viable.
    ///
    /// This function will return `None` if proposer re-orgs are disabled.
+    #[instrument(skip_all, level = "debug")]
    fn get_state_for_re_org(
        &self,
        slot: Slot,
@@ -5072,6 +5080,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
    /// equal to the root of `state`. Providing this value will serve as an optimization to avoid
    /// performing a tree hash in some scenarios.
    #[allow(clippy::too_many_arguments)]
+    #[instrument(level = "debug", skip_all)]
    pub async fn produce_block_on_state(
        self: &Arc<Self>,
        state: BeaconState<T::EthSpec>,
@@ -5091,10 +5100,13 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
            .graffiti_calculator
            .get_graffiti(validator_graffiti)
            .await;
+        let span = Span::current();
        let mut partial_beacon_block = self
            .task_executor
            .spawn_blocking_handle(
                move || {
+                    let _guard =
+                        debug_span!(parent: span, "produce_partial_beacon_block").entered();
                    chain.produce_partial_beacon_block(
                        state,
                        state_root_opt,
@@ -5130,10 +5142,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
            match block_contents_type {
                BlockProposalContentsType::Full(block_contents) => {
                    let chain = self.clone();
+                    let span = Span::current();
                    let beacon_block_response = self
                        .task_executor
                        .spawn_blocking_handle(
                            move || {
+                                let _guard =
+                                    debug_span!(parent: span, "complete_partial_beacon_block")
+                                        .entered();
                                chain.complete_partial_beacon_block(
                                    partial_beacon_block,
                                    Some(block_contents),
@@ -5150,10 +5166,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
                }
                BlockProposalContentsType::Blinded(block_contents) => {
                    let chain = self.clone();
+                    let span = Span::current();
                    let beacon_block_response = self
                        .task_executor
                        .spawn_blocking_handle(
                            move || {
+                                let _guard =
+                                    debug_span!(parent: span, "complete_partial_beacon_block")
+                                        .entered();
                                chain.complete_partial_beacon_block(
                                    partial_beacon_block,
                                    Some(block_contents),
@@ -5171,10 +5191,13 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
            }
        } else {
            let chain = self.clone();
+            let span = Span::current();
            let beacon_block_response = self
                .task_executor
                .spawn_blocking_handle(
                    move || {
+                        let _guard =
+                            debug_span!(parent: span, "complete_partial_beacon_block").entered();
                        chain.complete_partial_beacon_block(
                            partial_beacon_block,
                            None,
@@ -5276,51 +5299,54 @@ impl<T: BeaconChainTypes> BeaconChain<T> {

        // Iterate through the naive aggregation pool and ensure all the attestations from there
        // are included in the operation pool.
-        let unagg_import_timer =
-            metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES);
-        for attestation in self.naive_aggregation_pool.read().iter() {
-            let import = |attestation: &Attestation<T::EthSpec>| {
-                let attesting_indices =
-                    get_attesting_indices_from_state(&state, attestation.to_ref())?;
-                self.op_pool
-                    .insert_attestation(attestation.clone(), attesting_indices)
-            };
-            if let Err(e) = import(attestation) {
-                // Don't stop block production if there's an error, just create a log.
-                error!(
-                    reason = ?e,
-                    "Attestation did not transfer to op pool"
-                );
+        {
+            let _guard = debug_span!("import_naive_aggregation_pool").entered();
+            let _unagg_import_timer =
+                metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES);
+            for attestation in self.naive_aggregation_pool.read().iter() {
+                let import = |attestation: &Attestation<T::EthSpec>| {
+                    let attesting_indices =
+                        get_attesting_indices_from_state(&state, attestation.to_ref())?;
+                    self.op_pool
+                        .insert_attestation(attestation.clone(), attesting_indices)
+                };
+                if let Err(e) = import(attestation) {
+                    // Don't stop block production if there's an error, just create a log.
+                    error!(
+                        reason = ?e,
+                        "Attestation did not transfer to op pool"
+                    );
+                }
            }
-        }
-        drop(unagg_import_timer);
-
-        let attestation_packing_timer =
-            metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES);
-
-        // Epoch cache and total balance cache are required for op pool packing.
-        state.build_total_active_balance_cache(&self.spec)?;
-        initialize_epoch_cache(&mut state, &self.spec)?;
-
-        let mut prev_filter_cache = HashMap::new();
-        let prev_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
-            self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state)
-        };
-        let mut curr_filter_cache = HashMap::new();
-        let curr_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
-            self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state)
        };

-        let mut attestations = self
-            .op_pool
-            .get_attestations(
-                &state,
-                prev_attestation_filter,
-                curr_attestation_filter,
-                &self.spec,
-            )
-            .map_err(BlockProductionError::OpPoolError)?;
-        drop(attestation_packing_timer);
+        let mut attestations = {
+            let _guard = debug_span!("pack_attestations").entered();
+            let _attestation_packing_timer =
+                metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES);
+
+            // Epoch cache and total balance cache are required for op pool packing.
+            state.build_total_active_balance_cache(&self.spec)?;
+            initialize_epoch_cache(&mut state, &self.spec)?;
+
+            let mut prev_filter_cache = HashMap::new();
+            let prev_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
+                self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state)
+            };
+            let mut curr_filter_cache = HashMap::new();
+            let curr_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
+                self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state)
+            };
+
+            self.op_pool
+                .get_attestations(
+                    &state,
+                    prev_attestation_filter,
+                    curr_attestation_filter,
+                    &self.spec,
+                )
+                .map_err(BlockProductionError::OpPoolError)?
+        };

        // If paranoid mode is enabled re-check the signatures of every included message.
        // This will be a lot slower but guards against bugs in block production and can be
--- a/beacon_node/beacon_chain/src/execution_payload.rs
+++ b/beacon_node/beacon_chain/src/execution_payload.rs
@@ -24,7 +24,7 @@ use state_processing::per_block_processing::{
 };
 use std::sync::Arc;
 use tokio::task::JoinHandle;
-use tracing::{debug, warn};
+use tracing::{Instrument, debug, debug_span, warn};
 use tree_hash::TreeHash;
 use types::payload::BlockProductionVersion;
 use types::*;
@@ -403,8 +403,9 @@ pub fn get_execution_payload<T: BeaconChainTypes>(
                    block_production_version,
                )
                .await
-            },
-            "get_execution_payload",
+            }
+            .instrument(debug_span!("prepare_execution_payload")),
+            "prepare_execution_payload",
        )
        .ok_or(BlockProductionError::ShuttingDown)?;

@@ -503,6 +504,7 @@ where
            },
            "prepare_execution_payload_forkchoice_update_params",
        )
+        .instrument(debug_span!("forkchoice_update_params"))
        .await
        .map_err(|e| BlockProductionError::BeaconChain(Box::new(e)))?;

--- a/beacon_node/execution_layer/src/lib.rs
+++ b/beacon_node/execution_layer/src/lib.rs
@@ -43,7 +43,7 @@ use tokio::{
    time::sleep,
 };
 use tokio_stream::wrappers::WatchStream;
-use tracing::{debug, error, info, warn};
+use tracing::{Instrument, debug, debug_span, error, info, instrument, warn};
 use tree_hash::TreeHash;
 use types::beacon_block_body::KzgCommitments;
 use types::builder_bid::BuilderBid;
@@ -851,6 +851,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
    }

    /// Returns the fee-recipient address that should be used to build a block
+    #[instrument(level = "debug", skip_all)]
    pub async fn get_suggested_fee_recipient(&self, proposer_index: u64) -> Address {
        if let Some(preparation_data_entry) =
            self.proposer_preparation_data().await.get(&proposer_index)
@@ -875,6 +876,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
        }
    }

+    #[instrument(level = "debug", skip_all)]
    pub async fn get_proposer_gas_limit(&self, proposer_index: u64) -> Option<u64> {
        self.proposer_preparation_data()
            .await
@@ -891,6 +893,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
    ///
    /// The result will be returned from the first node that returns successfully. No more nodes
    /// will be contacted.
+    #[instrument(level = "debug", skip_all)]
    pub async fn get_payload(
        &self,
        payload_parameters: PayloadParameters<'_>,
@@ -996,6 +999,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
            timed_future(metrics::GET_BLINDED_PAYLOAD_BUILDER, async {
                builder
                    .get_builder_header::<E>(slot, parent_hash, pubkey)
+                    .instrument(debug_span!("get_builder_header"))
                    .await
            }),
            timed_future(metrics::GET_BLINDED_PAYLOAD_LOCAL, async {
@@ -1237,6 +1241,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
            .await
    }

+    #[instrument(level = "debug", skip_all)]
    async fn get_full_payload_with(
        &self,
        payload_parameters: PayloadParameters<'_>,
--- a/beacon_node/http_api/src/produce_block.rs
+++ b/beacon_node/http_api/src/produce_block.rs
@@ -10,8 +10,10 @@ use beacon_chain::{
    BeaconBlockResponseWrapper, BeaconChain, BeaconChainTypes, ProduceBlockVerification,
 };
 use eth2::types::{self as api_types, ProduceBlockV3Metadata, SkipRandaoVerification};
+use lighthouse_tracing::{SPAN_PRODUCE_BLOCK_V2, SPAN_PRODUCE_BLOCK_V3};
 use ssz::Encode;
 use std::sync::Arc;
+use tracing::instrument;
 use types::{payload::BlockProductionVersion, *};
 use warp::{
    Reply,
@@ -40,6 +42,11 @@ pub fn get_randao_verification(
    Ok(randao_verification)
 }

+#[instrument(
+    name = SPAN_PRODUCE_BLOCK_V3,
+    skip_all,
+    fields(%slot)
+)]
 pub async fn produce_block_v3<T: BeaconChainTypes>(
    accept_header: Option<api_types::Accept>,
    chain: Arc<BeaconChain<T>>,
@@ -155,6 +162,11 @@ pub async fn produce_blinded_block_v2<T: BeaconChainTypes>(
    build_response_v2(chain, block_response_type, accept_header)
 }

+#[instrument(
+    name = SPAN_PRODUCE_BLOCK_V2,
+    skip_all,
+    fields(%slot)
+)]
 pub async fn produce_block_v2<T: BeaconChainTypes>(
    accept_header: Option<api_types::Accept>,
    chain: Arc<BeaconChain<T>>,
--- a/beacon_node/lighthouse_tracing/src/lib.rs
+++ b/beacon_node/lighthouse_tracing/src/lib.rs
@@ -3,7 +3,9 @@
 //! TODO: These span identifiers will be used to implement selective tracing export (to be implemented),
 //! where only the listed root spans and their descendants will be exported to the tracing backend.

-/// Root span name for publish_block
+/// Root span names for block production and publishing
+pub const SPAN_PRODUCE_BLOCK_V2: &str = "produce_block_v2";
+pub const SPAN_PRODUCE_BLOCK_V3: &str = "produce_block_v3";
 pub const SPAN_PUBLISH_BLOCK: &str = "publish_block";

 /// Data Availability checker span identifiers
@@ -42,11 +44,14 @@ pub const SPAN_HANDLE_LIGHT_CLIENT_FINALITY_UPDATE: &str = "handle_light_client_
 /// Only these spans and their descendants will be processed to reduce noise from
 /// uninstrumented code paths. New root spans must be added to this list to be traced.
 pub const LH_BN_ROOT_SPAN_NAMES: &[&str] = &[
-    SPAN_SYNCING_CHAIN,
+    SPAN_PRODUCE_BLOCK_V2,
+    SPAN_PRODUCE_BLOCK_V3,
+    SPAN_PUBLISH_BLOCK,
    SPAN_PENDING_COMPONENTS,
    SPAN_PROCESS_GOSSIP_DATA_COLUMN,
    SPAN_PROCESS_GOSSIP_BLOB,
    SPAN_PROCESS_GOSSIP_BLOCK,
+    SPAN_SYNCING_CHAIN,
    SPAN_OUTGOING_RANGE_REQUEST,
    SPAN_SINGLE_BLOCK_LOOKUP,
    SPAN_PROCESS_RPC_BLOCK,