Instrument tracing spans for block processing and import (#7816)

#7815

- removes all existing spans, so some span fields that appear in logs like `service_name` may be lost.
- instruments a few key code paths in the beacon node, starting from **root spans** named below:

* Gossip block and blobs
* `process_gossip_data_column_sidecar`
* `process_gossip_blob`
* `process_gossip_block`
* Rpc block and blobs
* `process_rpc_block`
* `process_rpc_blobs`
* `process_rpc_custody_columns`
* Rpc blocks (range and backfill)
* `process_chain_segment`
* `PendingComponents` lifecycle
* `pending_components`

To test locally:
* Run Grafana and Tempo with https://github.com/sigp/lighthouse-metrics/pull/57
* Run Lighthouse BN with `--telemetry-collector-url http://localhost:4317`

Some captured traces can be found here: https://hackmd.io/@jimmygchen/r1sLOxPPeg

Removing the old spans seem to have reduced the memory usage quite a lot - i think we were using them on long running tasks and too excessively:
<img width="910" height="495" alt="image" src="https://github.com/user-attachments/assets/5208bbe4-53b2-4ead-bc71-0b782c788669" />
This commit is contained in:
Jimmy Chen
2025-08-08 15:32:22 +10:00
committed by GitHub
parent 6dfab22267
commit 40c2fd5ff4
52 changed files with 633 additions and 1164 deletions

View File

@@ -17,7 +17,7 @@ use std::marker::PhantomData;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Duration;
use tracing::{debug, instrument, trace};
use tracing::{debug, trace};
use types::{EthSpec, ForkContext};
pub(crate) use handler::{HandlerErr, HandlerEvent};
@@ -169,12 +169,6 @@ pub struct RPC<Id: ReqId, E: EthSpec> {
}
impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn new(
fork_context: Arc<ForkContext>,
enable_light_client_server: bool,
@@ -207,12 +201,6 @@ impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
/// Sends an RPC response.
/// Returns an `Err` if the request does exist in the active inbound requests list.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn send_response(
&mut self,
request_id: InboundRequestId,
@@ -282,12 +270,6 @@ impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
/// Submits an RPC request.
///
/// The peer must be connected for this to succeed.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn send_request(&mut self, peer_id: PeerId, request_id: Id, req: RequestType<E>) {
match self
.outbound_request_limiter
@@ -306,12 +288,6 @@ impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
/// Lighthouse wishes to disconnect from this peer by sending a Goodbye message. This
/// gracefully terminates the RPC behaviour with a goodbye message.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn shutdown(&mut self, peer_id: PeerId, id: Id, reason: GoodbyeReason) {
self.events.push(ToSwarm::NotifyHandler {
peer_id,
@@ -320,23 +296,11 @@ impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
});
}
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn update_seq_number(&mut self, seq_number: u64) {
self.seq_number = seq_number
}
/// Send a Ping request to the destination `PeerId` via `ConnectionId`.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn ping(&mut self, peer_id: PeerId, id: Id) {
let ping = Ping {
data: self.seq_number,