From 8a4f6cf0d5b6b261b2c3439ce7c05383a53d30c5 Mon Sep 17 00:00:00 2001
From: Jimmy Chen <jimmy@sigmaprime.io>
Date: Wed, 10 Sep 2025 13:30:51 +1000
Subject: [PATCH] Instrument tracing on block production code path (#8017)

Partially #7814. Instrument block production code path.

New root spans:
* `produce_block_v3`
* `produce_block_v2`

Example traces:

<img width="518" height="432" alt="image" src="https://github.com/user-attachments/assets/a9413d25-501c-49dc-95cc-623db5988981" />





Co-Authored-By: Jimmy Chen <jchen.tc@gmail.com>
---
 beacon_node/beacon_chain/src/beacon_chain.rs  | 112 +++++++++++-------
 .../beacon_chain/src/execution_payload.rs     |   8 +-
 beacon_node/execution_layer/src/lib.rs        |   7 +-
 beacon_node/http_api/src/produce_block.rs     |  12 ++
 beacon_node/lighthouse_tracing/src/lib.rs     |   9 +-
 consensus/fork_choice/src/fork_choice.rs      |   1 +
 .../src/per_slot_processing.rs                |   2 +-
 7 files changed, 101 insertions(+), 50 deletions(-)
diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs
index b8a6529653..6e11b66610 100644
--- a/beacon_node/beacon_chain/src/beacon_chain.rs
+++ b/beacon_node/beacon_chain/src/beacon_chain.rs
@@ -1437,6 +1437,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
     ///
     /// Returns `None` when the state is not found in the database or there is an error skipping
     /// to a future state.
+    #[instrument(level = "debug", skip_all)]
     pub fn state_at_slot(
         &self,
         slot: Slot,
@@ -4466,6 +4467,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
     }
 
     /// If configured, wait for the fork choice run at the start of the slot to complete.
+    #[instrument(level = "debug", skip_all)]
     fn wait_for_fork_choice_before_block_production(
         self: &Arc<Self>,
         slot: Slot,
@@ -4528,10 +4530,15 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
         //
         // Load the parent state from disk.
         let chain = self.clone();
+        let span = Span::current();
         let (state, state_root_opt) = self
             .task_executor
             .spawn_blocking_handle(
-                move || chain.load_state_for_block_production(slot),
+                move || {
+                    let _guard =
+                        debug_span!(parent: span, "load_state_for_block_production").entered();
+                    chain.load_state_for_block_production(slot)
+                },
                 "load_state_for_block_production",
             )
             .ok_or(BlockProductionError::ShuttingDown)?
@@ -4618,6 +4625,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
     /// Fetch the beacon state to use for producing a block if a 1-slot proposer re-org is viable.
     ///
     /// This function will return `None` if proposer re-orgs are disabled.
+    #[instrument(skip_all, level = "debug")]
     fn get_state_for_re_org(
         &self,
         slot: Slot,
@@ -5072,6 +5080,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
     /// equal to the root of `state`. Providing this value will serve as an optimization to avoid
     /// performing a tree hash in some scenarios.
     #[allow(clippy::too_many_arguments)]
+    #[instrument(level = "debug", skip_all)]
     pub async fn produce_block_on_state(
         self: &Arc<Self>,
         state: BeaconState<T::EthSpec>,
@@ -5091,10 +5100,13 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
             .graffiti_calculator
             .get_graffiti(validator_graffiti)
             .await;
+        let span = Span::current();
         let mut partial_beacon_block = self
             .task_executor
             .spawn_blocking_handle(
                 move || {
+                    let _guard =
+                        debug_span!(parent: span, "produce_partial_beacon_block").entered();
                     chain.produce_partial_beacon_block(
                         state,
                         state_root_opt,
@@ -5130,10 +5142,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
             match block_contents_type {
                 BlockProposalContentsType::Full(block_contents) => {
                     let chain = self.clone();
+                    let span = Span::current();
                     let beacon_block_response = self
                         .task_executor
                         .spawn_blocking_handle(
                             move || {
+                                let _guard =
+                                    debug_span!(parent: span, "complete_partial_beacon_block")
+                                        .entered();
                                 chain.complete_partial_beacon_block(
                                     partial_beacon_block,
                                     Some(block_contents),
@@ -5150,10 +5166,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
                 }
                 BlockProposalContentsType::Blinded(block_contents) => {
                     let chain = self.clone();
+                    let span = Span::current();
                     let beacon_block_response = self
                         .task_executor
                         .spawn_blocking_handle(
                             move || {
+                                let _guard =
+                                    debug_span!(parent: span, "complete_partial_beacon_block")
+                                        .entered();
                                 chain.complete_partial_beacon_block(
                                     partial_beacon_block,
                                     Some(block_contents),
@@ -5171,10 +5191,13 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
             }
         } else {
             let chain = self.clone();
+            let span = Span::current();
             let beacon_block_response = self
                 .task_executor
                 .spawn_blocking_handle(
                     move || {
+                        let _guard =
+                            debug_span!(parent: span, "complete_partial_beacon_block").entered();
                         chain.complete_partial_beacon_block(
                             partial_beacon_block,
                             None,
@@ -5276,51 +5299,54 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
 
         // Iterate through the naive aggregation pool and ensure all the attestations from there
         // are included in the operation pool.
-        let unagg_import_timer =
-            metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES);
-        for attestation in self.naive_aggregation_pool.read().iter() {
-            let import = |attestation: &Attestation<T::EthSpec>| {
-                let attesting_indices =
-                    get_attesting_indices_from_state(&state, attestation.to_ref())?;
-                self.op_pool
-                    .insert_attestation(attestation.clone(), attesting_indices)
-            };
-            if let Err(e) = import(attestation) {
-                // Don't stop block production if there's an error, just create a log.
-                error!(
-                    reason = ?e,
-                    "Attestation did not transfer to op pool"
-                );
+        {
+            let _guard = debug_span!("import_naive_aggregation_pool").entered();
+            let _unagg_import_timer =
+                metrics::start_timer(&metrics::BLOCK_PRODUCTION_UNAGGREGATED_TIMES);
+            for attestation in self.naive_aggregation_pool.read().iter() {
+                let import = |attestation: &Attestation<T::EthSpec>| {
+                    let attesting_indices =
+                        get_attesting_indices_from_state(&state, attestation.to_ref())?;
+                    self.op_pool
+                        .insert_attestation(attestation.clone(), attesting_indices)
+                };
+                if let Err(e) = import(attestation) {
+                    // Don't stop block production if there's an error, just create a log.
+                    error!(
+                        reason = ?e,
+                        "Attestation did not transfer to op pool"
+                    );
+                }
             }
-        }
-        drop(unagg_import_timer);
-
-        let attestation_packing_timer =
-            metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES);
-
-        // Epoch cache and total balance cache are required for op pool packing.
-        state.build_total_active_balance_cache(&self.spec)?;
-        initialize_epoch_cache(&mut state, &self.spec)?;
-
-        let mut prev_filter_cache = HashMap::new();
-        let prev_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
-            self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state)
-        };
-        let mut curr_filter_cache = HashMap::new();
-        let curr_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
-            self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state)
         };
 
-        let mut attestations = self
-            .op_pool
-            .get_attestations(
-                &state,
-                prev_attestation_filter,
-                curr_attestation_filter,
-                &self.spec,
-            )
-            .map_err(BlockProductionError::OpPoolError)?;
-        drop(attestation_packing_timer);
+        let mut attestations = {
+            let _guard = debug_span!("pack_attestations").entered();
+            let _attestation_packing_timer =
+                metrics::start_timer(&metrics::BLOCK_PRODUCTION_ATTESTATION_TIMES);
+
+            // Epoch cache and total balance cache are required for op pool packing.
+            state.build_total_active_balance_cache(&self.spec)?;
+            initialize_epoch_cache(&mut state, &self.spec)?;
+
+            let mut prev_filter_cache = HashMap::new();
+            let prev_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
+                self.filter_op_pool_attestation(&mut prev_filter_cache, att, &state)
+            };
+            let mut curr_filter_cache = HashMap::new();
+            let curr_attestation_filter = |att: &CompactAttestationRef<T::EthSpec>| {
+                self.filter_op_pool_attestation(&mut curr_filter_cache, att, &state)
+            };
+
+            self.op_pool
+                .get_attestations(
+                    &state,
+                    prev_attestation_filter,
+                    curr_attestation_filter,
+                    &self.spec,
+                )
+                .map_err(BlockProductionError::OpPoolError)?
+        };
 
         // If paranoid mode is enabled re-check the signatures of every included message.
         // This will be a lot slower but guards against bugs in block production and can be
diff --git a/beacon_node/beacon_chain/src/execution_payload.rs b/beacon_node/beacon_chain/src/execution_payload.rs
index 697fee351e..f0cab06ca3 100644
--- a/beacon_node/beacon_chain/src/execution_payload.rs
+++ b/beacon_node/beacon_chain/src/execution_payload.rs
@@ -24,7 +24,7 @@ use state_processing::per_block_processing::{
 };
 use std::sync::Arc;
 use tokio::task::JoinHandle;
-use tracing::{debug, warn};
+use tracing::{Instrument, debug, debug_span, warn};
 use tree_hash::TreeHash;
 use types::payload::BlockProductionVersion;
 use types::*;
@@ -403,8 +403,9 @@ pub fn get_execution_payload<T: BeaconChainTypes>(
                     block_production_version,
                 )
                 .await
-            },
-            "get_execution_payload",
+            }
+            .instrument(debug_span!("prepare_execution_payload")),
+            "prepare_execution_payload",
         )
         .ok_or(BlockProductionError::ShuttingDown)?;
 
@@ -503,6 +504,7 @@ where
             },
             "prepare_execution_payload_forkchoice_update_params",
         )
+        .instrument(debug_span!("forkchoice_update_params"))
         .await
         .map_err(|e| BlockProductionError::BeaconChain(Box::new(e)))?;
 
diff --git a/beacon_node/execution_layer/src/lib.rs b/beacon_node/execution_layer/src/lib.rs
index b53c4cde4e..5b48b81aa6 100644
--- a/beacon_node/execution_layer/src/lib.rs
+++ b/beacon_node/execution_layer/src/lib.rs
@@ -43,7 +43,7 @@ use tokio::{
     time::sleep,
 };
 use tokio_stream::wrappers::WatchStream;
-use tracing::{debug, error, info, warn};
+use tracing::{Instrument, debug, debug_span, error, info, instrument, warn};
 use tree_hash::TreeHash;
 use types::beacon_block_body::KzgCommitments;
 use types::builder_bid::BuilderBid;
@@ -851,6 +851,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
     }
 
     /// Returns the fee-recipient address that should be used to build a block
+    #[instrument(level = "debug", skip_all)]
     pub async fn get_suggested_fee_recipient(&self, proposer_index: u64) -> Address {
         if let Some(preparation_data_entry) =
             self.proposer_preparation_data().await.get(&proposer_index)
@@ -875,6 +876,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
         }
     }
 
+    #[instrument(level = "debug", skip_all)]
     pub async fn get_proposer_gas_limit(&self, proposer_index: u64) -> Option<u64> {
         self.proposer_preparation_data()
             .await
@@ -891,6 +893,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
     ///
     /// The result will be returned from the first node that returns successfully. No more nodes
     /// will be contacted.
+    #[instrument(level = "debug", skip_all)]
     pub async fn get_payload(
         &self,
         payload_parameters: PayloadParameters<'_>,
@@ -996,6 +999,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
             timed_future(metrics::GET_BLINDED_PAYLOAD_BUILDER, async {
                 builder
                     .get_builder_header::<E>(slot, parent_hash, pubkey)
+                    .instrument(debug_span!("get_builder_header"))
                     .await
             }),
             timed_future(metrics::GET_BLINDED_PAYLOAD_LOCAL, async {
@@ -1237,6 +1241,7 @@ impl<E: EthSpec> ExecutionLayer<E> {
             .await
     }
 
+    #[instrument(level = "debug", skip_all)]
     async fn get_full_payload_with(
         &self,
         payload_parameters: PayloadParameters<'_>,
diff --git a/beacon_node/http_api/src/produce_block.rs b/beacon_node/http_api/src/produce_block.rs
index 932fb00179..367e09969b 100644
--- a/beacon_node/http_api/src/produce_block.rs
+++ b/beacon_node/http_api/src/produce_block.rs
@@ -10,8 +10,10 @@ use beacon_chain::{
     BeaconBlockResponseWrapper, BeaconChain, BeaconChainTypes, ProduceBlockVerification,
 };
 use eth2::types::{self as api_types, ProduceBlockV3Metadata, SkipRandaoVerification};
+use lighthouse_tracing::{SPAN_PRODUCE_BLOCK_V2, SPAN_PRODUCE_BLOCK_V3};
 use ssz::Encode;
 use std::sync::Arc;
+use tracing::instrument;
 use types::{payload::BlockProductionVersion, *};
 use warp::{
     Reply,
@@ -40,6 +42,11 @@ pub fn get_randao_verification(
     Ok(randao_verification)
 }
 
+#[instrument(
+    name = SPAN_PRODUCE_BLOCK_V3,
+    skip_all,
+    fields(%slot)
+)]
 pub async fn produce_block_v3<T: BeaconChainTypes>(
     accept_header: Option<api_types::Accept>,
     chain: Arc<BeaconChain<T>>,
@@ -155,6 +162,11 @@ pub async fn produce_blinded_block_v2<T: BeaconChainTypes>(
     build_response_v2(chain, block_response_type, accept_header)
 }
 
+#[instrument(
+    name = SPAN_PRODUCE_BLOCK_V2,
+    skip_all,
+    fields(%slot)
+)]
 pub async fn produce_block_v2<T: BeaconChainTypes>(
     accept_header: Option<api_types::Accept>,
     chain: Arc<BeaconChain<T>>,
diff --git a/beacon_node/lighthouse_tracing/src/lib.rs b/beacon_node/lighthouse_tracing/src/lib.rs
index d31df4e3dd..1787399761 100644
--- a/beacon_node/lighthouse_tracing/src/lib.rs
+++ b/beacon_node/lighthouse_tracing/src/lib.rs
@@ -3,7 +3,9 @@
 //! TODO: These span identifiers will be used to implement selective tracing export (to be implemented),
 //! where only the listed root spans and their descendants will be exported to the tracing backend.
 
-/// Root span name for publish_block
+/// Root span names for block production and publishing
+pub const SPAN_PRODUCE_BLOCK_V2: &str = "produce_block_v2";
+pub const SPAN_PRODUCE_BLOCK_V3: &str = "produce_block_v3";
 pub const SPAN_PUBLISH_BLOCK: &str = "publish_block";
 
 /// Data Availability checker span identifiers
@@ -42,11 +44,14 @@ pub const SPAN_HANDLE_LIGHT_CLIENT_FINALITY_UPDATE: &str = "handle_light_client_
 /// Only these spans and their descendants will be processed to reduce noise from
 /// uninstrumented code paths. New root spans must be added to this list to be traced.
 pub const LH_BN_ROOT_SPAN_NAMES: &[&str] = &[
-    SPAN_SYNCING_CHAIN,
+    SPAN_PRODUCE_BLOCK_V2,
+    SPAN_PRODUCE_BLOCK_V3,
+    SPAN_PUBLISH_BLOCK,
     SPAN_PENDING_COMPONENTS,
     SPAN_PROCESS_GOSSIP_DATA_COLUMN,
     SPAN_PROCESS_GOSSIP_BLOB,
     SPAN_PROCESS_GOSSIP_BLOCK,
+    SPAN_SYNCING_CHAIN,
     SPAN_OUTGOING_RANGE_REQUEST,
     SPAN_SINGLE_BLOCK_LOOKUP,
     SPAN_PROCESS_RPC_BLOCK,
diff --git a/consensus/fork_choice/src/fork_choice.rs b/consensus/fork_choice/src/fork_choice.rs
index 19f294d439..fe1f5fba9e 100644
--- a/consensus/fork_choice/src/fork_choice.rs
+++ b/consensus/fork_choice/src/fork_choice.rs
@@ -523,6 +523,7 @@ where
     ///
     /// You *must* call `get_head` for the proposal slot prior to calling this function and pass
     /// in the result of `get_head` as `canonical_head`.
+    #[instrument(level = "debug", skip_all)]
     pub fn get_proposer_head(
         &self,
         current_slot: Slot,
diff --git a/consensus/state_processing/src/per_slot_processing.rs b/consensus/state_processing/src/per_slot_processing.rs
index 04b1e8148f..8695054e1e 100644
--- a/consensus/state_processing/src/per_slot_processing.rs
+++ b/consensus/state_processing/src/per_slot_processing.rs
@@ -26,7 +26,7 @@ impl From<ArithError> for Error {
 /// If the root of the supplied `state` is known, then it can be passed as `state_root`. If
 /// `state_root` is `None`, the root of `state` will be computed using a cached tree hash.
 /// Providing the `state_root` makes this function several orders of magnitude faster.
-#[instrument(skip_all)]
+#[instrument(level = "debug", skip_all)]
 pub fn per_slot_processing<E: EthSpec>(
     state: &mut BeaconState<E>,
     state_root: Option<Hash256>,