Instrument tracing spans for block processing and import (#7816)

#7815

- removes all existing spans, so some span fields that appear in logs like `service_name` may be lost.
- instruments a few key code paths in the beacon node, starting from **root spans** named below:

* Gossip block and blobs
* `process_gossip_data_column_sidecar`
* `process_gossip_blob`
* `process_gossip_block`
* Rpc block and blobs
* `process_rpc_block`
* `process_rpc_blobs`
* `process_rpc_custody_columns`
* Rpc blocks (range and backfill)
* `process_chain_segment`
* `PendingComponents` lifecycle
* `pending_components`

To test locally:
* Run Grafana and Tempo with https://github.com/sigp/lighthouse-metrics/pull/57
* Run Lighthouse BN with `--telemetry-collector-url http://localhost:4317`

Some captured traces can be found here: https://hackmd.io/@jimmygchen/r1sLOxPPeg

Removing the old spans seem to have reduced the memory usage quite a lot - i think we were using them on long running tasks and too excessively:
<img width="910" height="495" alt="image" src="https://github.com/user-attachments/assets/5208bbe4-53b2-4ead-bc71-0b782c788669" />
This commit is contained in:
Jimmy Chen
2025-08-08 15:32:22 +10:00
committed by GitHub
parent 6dfab22267
commit 40c2fd5ff4
52 changed files with 633 additions and 1164 deletions

View File

@@ -13,7 +13,7 @@ use std::cmp::Ordering;
use std::collections::BTreeSet;
use std::marker::PhantomData;
use std::time::Duration;
use tracing::{debug, warn};
use tracing::{debug, instrument, warn};
use types::{
consts::bellatrix::INTERVALS_PER_SLOT, AbstractExecPayload, AttestationShufflingId,
AttesterSlashingRef, BeaconBlockRef, BeaconState, BeaconStateError, ChainSpec, Checkpoint,
@@ -472,6 +472,7 @@ where
/// Is equivalent to:
///
/// https://github.com/ethereum/eth2.0-specs/blob/v0.12.1/specs/phase0/fork-choice.md#get_head
#[instrument(skip_all, level = "debug")]
pub fn get_head(
&mut self,
system_time_current_slot: Slot,
@@ -646,6 +647,12 @@ where
/// The supplied block **must** pass the `state_transition` function as it will not be run
/// here.
#[allow(clippy::too_many_arguments)]
#[instrument(
name = "fork_choice_on_block",
skip_all,
fields(
fork_choice_block_delay = ?block_delay
))]
pub fn on_block<Payload: AbstractExecPayload<E>>(
&mut self,
system_time_current_slot: Slot,

View File

@@ -35,6 +35,7 @@ safe_arith = { workspace = true }
smallvec = { workspace = true }
ssz_types = { workspace = true }
test_random_derive = { path = "../../common/test_random_derive" }
tracing = { workspace = true }
tree_hash = { workspace = true }
types = { workspace = true }

View File

@@ -5,12 +5,14 @@ use crate::metrics::{
};
use crate::{BlockProcessingError, EpochProcessingError};
use metrics::set_gauge;
use tracing::instrument;
use types::{
is_progressive_balances_enabled, BeaconState, BeaconStateError, ChainSpec, Epoch,
EpochTotalBalances, EthSpec, ParticipationFlags, ProgressiveBalancesCache, Validator,
};
/// Initializes the `ProgressiveBalancesCache` if it is unbuilt.
#[instrument(skip_all, level = "debug")]
pub fn initialize_progressive_balances_cache<E: EthSpec>(
state: &mut BeaconState<E>,
spec: &ChainSpec,

View File

@@ -3,6 +3,7 @@ use crate::common::base::SqrtTotalActiveBalance;
use crate::common::{altair, base};
use crate::metrics;
use safe_arith::SafeArith;
use tracing::instrument;
use types::epoch_cache::{EpochCache, EpochCacheError, EpochCacheKey};
use types::{
ActivationQueue, BeaconState, ChainSpec, EthSpec, FixedBytesExtended, ForkName, Hash256,
@@ -130,6 +131,7 @@ pub fn is_epoch_cache_initialized<E: EthSpec>(
.is_ok())
}
#[instrument(skip_all, level = "debug")]
pub fn initialize_epoch_cache<E: EthSpec>(
state: &mut BeaconState<E>,
spec: &ChainSpec,

View File

@@ -47,6 +47,7 @@ use crate::common::update_progressive_balances_cache::{
use crate::epoch_cache::initialize_epoch_cache;
#[cfg(feature = "arbitrary-fuzz")]
use arbitrary::Arbitrary;
use tracing::instrument;
/// The strategy to be used when validating the block's signatures.
#[cfg_attr(feature = "arbitrary-fuzz", derive(Arbitrary))]
@@ -97,6 +98,7 @@ pub enum VerifyBlockRoot {
/// re-calculating the root when it is already known. Note `block_root` should be equal to the
/// tree hash root of the block, NOT the signing root of the block. This function takes
/// care of mixing in the domain.
#[instrument(skip_all)]
pub fn per_block_processing<E: EthSpec, Payload: AbstractExecPayload<E>>(
state: &mut BeaconState<E>,
signed_block: &SignedBeaconBlock<E, Payload>,

View File

@@ -5,6 +5,7 @@ pub use epoch_processing_summary::{EpochProcessingSummary, ParticipationEpochSum
use errors::EpochProcessingError as Error;
pub use justification_and_finalization_state::JustificationAndFinalizationState;
use safe_arith::SafeArith;
use tracing::instrument;
use types::{BeaconState, ChainSpec, EthSpec};
pub use registry_updates::{process_registry_updates, process_registry_updates_slow};
@@ -30,6 +31,7 @@ pub mod weigh_justification_and_finalization;
///
/// Mutates the given `BeaconState`, returning early if an error is encountered. If an error is
/// returned, a state might be "half-processed" and therefore in an invalid state.
#[instrument(skip_all)]
pub fn process_epoch<E: EthSpec>(
state: &mut BeaconState<E>,
spec: &ChainSpec,

View File

@@ -11,6 +11,7 @@ use itertools::izip;
use safe_arith::{SafeArith, SafeArithIter};
use std::cmp::{max, min};
use std::collections::{BTreeSet, HashMap};
use tracing::instrument;
use types::{
consts::altair::{
NUM_FLAG_INDICES, PARTICIPATION_FLAG_WEIGHTS, TIMELY_HEAD_FLAG_INDEX,
@@ -134,6 +135,7 @@ impl ValidatorInfo {
}
}
#[instrument(skip_all)]
pub fn process_epoch_single_pass<E: EthSpec>(
state: &mut BeaconState<E>,
spec: &ChainSpec,

View File

@@ -4,6 +4,7 @@ use crate::upgrade::{
};
use crate::{per_epoch_processing::EpochProcessingSummary, *};
use safe_arith::{ArithError, SafeArith};
use tracing::instrument;
use types::*;
#[derive(Debug, PartialEq)]
@@ -25,6 +26,7 @@ impl From<ArithError> for Error {
/// If the root of the supplied `state` is known, then it can be passed as `state_root`. If
/// `state_root` is `None`, the root of `state` will be computed using a cached tree hash.
/// Providing the `state_root` makes this function several orders of magnitude faster.
#[instrument(skip_all)]
pub fn per_slot_processing<E: EthSpec>(
state: &mut BeaconState<E>,
state_root: Option<Hash256>,
@@ -85,6 +87,7 @@ pub fn per_slot_processing<E: EthSpec>(
Ok(summary)
}
#[instrument(skip_all)]
fn cache_state<E: EthSpec>(
state: &mut BeaconState<E>,
state_root: Option<Hash256>,

View File

@@ -34,6 +34,7 @@ pub use crate::beacon_state::slashings_cache::SlashingsCache;
pub use eth_spec::*;
pub use iter::BlockRootsIter;
pub use milhouse::{interface::Interface, List, Vector};
use tracing::instrument;
#[macro_use]
mod committee_cache;
@@ -1873,6 +1874,7 @@ impl<E: EthSpec> BeaconState<E> {
}
/// Build the total active balance cache for the current epoch if it is not already built.
#[instrument(skip_all, level = "debug")]
pub fn build_total_active_balance_cache(&mut self, spec: &ChainSpec) -> Result<(), Error> {
if self
.get_total_active_balance_at_epoch(self.current_epoch())
@@ -1931,6 +1933,7 @@ impl<E: EthSpec> BeaconState<E> {
}
/// Build all caches (except the tree hash cache), if they need to be built.
#[instrument(skip_all, level = "debug")]
pub fn build_caches(&mut self, spec: &ChainSpec) -> Result<(), Error> {
self.build_all_committee_caches(spec)?;
self.update_pubkey_cache()?;
@@ -1941,6 +1944,7 @@ impl<E: EthSpec> BeaconState<E> {
}
/// Build all committee caches, if they need to be built.
#[instrument(skip_all, level = "debug")]
pub fn build_all_committee_caches(&mut self, spec: &ChainSpec) -> Result<(), Error> {
self.build_committee_cache(RelativeEpoch::Previous, spec)?;
self.build_committee_cache(RelativeEpoch::Current, spec)?;
@@ -1949,6 +1953,7 @@ impl<E: EthSpec> BeaconState<E> {
}
/// Build the exit cache, if it needs to be built.
#[instrument(skip_all, level = "debug")]
pub fn build_exit_cache(&mut self, spec: &ChainSpec) -> Result<(), Error> {
if self.exit_cache().check_initialized().is_err() {
*self.exit_cache_mut() = ExitCache::new(self.validators(), spec)?;
@@ -1957,6 +1962,7 @@ impl<E: EthSpec> BeaconState<E> {
}
/// Build the slashings cache if it needs to be built.
#[instrument(skip_all, level = "debug")]
pub fn build_slashings_cache(&mut self) -> Result<(), Error> {
let latest_block_slot = self.latest_block_header().slot;
if !self.slashings_cache().is_initialized(latest_block_slot) {
@@ -1994,6 +2000,7 @@ impl<E: EthSpec> BeaconState<E> {
}
/// Build a committee cache, unless it is has already been built.
#[instrument(skip_all, level = "debug")]
pub fn build_committee_cache(
&mut self,
relative_epoch: RelativeEpoch,
@@ -2114,6 +2121,7 @@ impl<E: EthSpec> BeaconState<E> {
///
/// Adds all `pubkeys` from the `validators` which are not already in the cache. Will
/// never re-add a pubkey.
#[instrument(skip_all, level = "debug")]
pub fn update_pubkey_cache(&mut self) -> Result<(), Error> {
let mut pubkey_cache = mem::take(self.pubkey_cache_mut());
let start_index = pubkey_cache.len();
@@ -2194,6 +2202,7 @@ impl<E: EthSpec> BeaconState<E> {
/// Compute the tree hash root of the state using the tree hash cache.
///
/// Initialize the tree hash cache if it isn't already initialized.
#[instrument(skip_all, level = "debug")]
pub fn update_tree_hash_cache<'a>(&'a mut self) -> Result<Hash256, Error> {
self.apply_pending_mutations()?;
map_beacon_state_ref!(&'a _, self.to_ref(), |inner, cons| {