Instrument tracing spans for block processing and import (#7816)

#7815

- removes all existing spans, so some span fields that appear in logs like `service_name` may be lost.
- instruments a few key code paths in the beacon node, starting from **root spans** named below:

* Gossip block and blobs
* `process_gossip_data_column_sidecar`
* `process_gossip_blob`
* `process_gossip_block`
* Rpc block and blobs
* `process_rpc_block`
* `process_rpc_blobs`
* `process_rpc_custody_columns`
* Rpc blocks (range and backfill)
* `process_chain_segment`
* `PendingComponents` lifecycle
* `pending_components`

To test locally:
* Run Grafana and Tempo with https://github.com/sigp/lighthouse-metrics/pull/57
* Run Lighthouse BN with `--telemetry-collector-url http://localhost:4317`

Some captured traces can be found here: https://hackmd.io/@jimmygchen/r1sLOxPPeg

Removing the old spans seem to have reduced the memory usage quite a lot - i think we were using them on long running tasks and too excessively:
<img width="910" height="495" alt="image" src="https://github.com/user-attachments/assets/5208bbe4-53b2-4ead-bc71-0b782c788669" />
This commit is contained in:
Jimmy Chen
2025-08-08 15:32:22 +10:00
committed by GitHub
parent 6dfab22267
commit 40c2fd5ff4
52 changed files with 633 additions and 1164 deletions

View File

@@ -32,7 +32,7 @@ use mockall_double::double;
use ssz_types::FixedVector;
use state_processing::per_block_processing::deneb::kzg_commitment_to_versioned_hash;
use std::sync::Arc;
use tracing::{debug, warn};
use tracing::{debug, instrument, warn, Span};
use types::blob_sidecar::BlobSidecarError;
use types::data_column_sidecar::DataColumnSidecarError;
use types::{
@@ -68,6 +68,7 @@ pub enum FetchEngineBlobError {
/// Fetches blobs from the EL mempool and processes them. It also broadcasts unseen blobs or
/// data columns (PeerDAS onwards) to the network, using the supplied `publish_fn`.
#[instrument(skip_all)]
pub async fn fetch_and_process_engine_blobs<T: BeaconChainTypes>(
chain: Arc<BeaconChain<T>>,
block_root: Hash256,
@@ -140,6 +141,7 @@ async fn fetch_and_process_engine_blobs_inner<T: BeaconChainTypes>(
}
}
#[instrument(skip_all, level = "debug")]
async fn fetch_and_process_blobs_v1<T: BeaconChainTypes>(
chain_adapter: FetchBlobsBeaconAdapter<T>,
block_root: Hash256,
@@ -232,6 +234,7 @@ async fn fetch_and_process_blobs_v1<T: BeaconChainTypes>(
Ok(Some(availability_processing_status))
}
#[instrument(skip_all, level = "debug")]
async fn fetch_and_process_blobs_v2<T: BeaconChainTypes>(
chain_adapter: FetchBlobsBeaconAdapter<T>,
block_root: Hash256,
@@ -342,10 +345,12 @@ async fn compute_custody_columns_to_import<T: BeaconChainTypes>(
let spec = chain_adapter.spec().clone();
let chain_adapter_cloned = chain_adapter.clone();
let custody_columns_indices = custody_columns_indices.to_vec();
let current_span = Span::current();
chain_adapter
.executor()
.spawn_blocking_handle(
move || {
let _guard = current_span.enter();
let mut timer = metrics::start_timer_vec(
&metrics::DATA_COLUMN_SIDECAR_COMPUTATION,
&[&blobs.len().to_string()],