Instrument tracing spans for block processing and import (#7816)

#7815

- removes all existing spans, so some span fields that appear in logs like `service_name` may be lost.
- instruments a few key code paths in the beacon node, starting from **root spans** named below:

* Gossip block and blobs
* `process_gossip_data_column_sidecar`
* `process_gossip_blob`
* `process_gossip_block`
* Rpc block and blobs
* `process_rpc_block`
* `process_rpc_blobs`
* `process_rpc_custody_columns`
* Rpc blocks (range and backfill)
* `process_chain_segment`
* `PendingComponents` lifecycle
* `pending_components`

To test locally:
* Run Grafana and Tempo with https://github.com/sigp/lighthouse-metrics/pull/57
* Run Lighthouse BN with `--telemetry-collector-url http://localhost:4317`

Some captured traces can be found here: https://hackmd.io/@jimmygchen/r1sLOxPPeg

Removing the old spans seem to have reduced the memory usage quite a lot - i think we were using them on long running tasks and too excessively:
<img width="910" height="495" alt="image" src="https://github.com/user-attachments/assets/5208bbe4-53b2-4ead-bc71-0b782c788669" />
This commit is contained in:
Jimmy Chen
2025-08-08 15:32:22 +10:00
committed by GitHub
parent 6dfab22267
commit 40c2fd5ff4
52 changed files with 633 additions and 1164 deletions

View File

@@ -17,7 +17,7 @@ use std::marker::PhantomData;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Duration;
use tracing::{debug, instrument, trace};
use tracing::{debug, trace};
use types::{EthSpec, ForkContext};
pub(crate) use handler::{HandlerErr, HandlerEvent};
@@ -169,12 +169,6 @@ pub struct RPC<Id: ReqId, E: EthSpec> {
}
impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn new(
fork_context: Arc<ForkContext>,
enable_light_client_server: bool,
@@ -207,12 +201,6 @@ impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
/// Sends an RPC response.
/// Returns an `Err` if the request does exist in the active inbound requests list.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn send_response(
&mut self,
request_id: InboundRequestId,
@@ -282,12 +270,6 @@ impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
/// Submits an RPC request.
///
/// The peer must be connected for this to succeed.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn send_request(&mut self, peer_id: PeerId, request_id: Id, req: RequestType<E>) {
match self
.outbound_request_limiter
@@ -306,12 +288,6 @@ impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
/// Lighthouse wishes to disconnect from this peer by sending a Goodbye message. This
/// gracefully terminates the RPC behaviour with a goodbye message.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn shutdown(&mut self, peer_id: PeerId, id: Id, reason: GoodbyeReason) {
self.events.push(ToSwarm::NotifyHandler {
peer_id,
@@ -320,23 +296,11 @@ impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
});
}
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn update_seq_number(&mut self, seq_number: u64) {
self.seq_number = seq_number
}
/// Send a Ping request to the destination `PeerId` via `ConnectionId`.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p_rpc"),
name = "libp2p_rpc",
skip_all
)]
pub fn ping(&mut self, peer_id: PeerId, id: Id) {
let ping = Ping {
data: self.seq_number,

View File

@@ -38,7 +38,7 @@ use std::path::PathBuf;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use tracing::{debug, error, info, instrument, trace, warn};
use tracing::{debug, error, info, trace, warn};
use types::{
consts::altair::SYNC_COMMITTEE_SUBNET_COUNT, EnrForkId, EthSpec, ForkContext, Slot, SubnetId,
};
@@ -167,12 +167,6 @@ pub struct Network<E: EthSpec> {
/// Implements the combined behaviour for the libp2p service.
impl<E: EthSpec> Network<E> {
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub async fn new(
executor: task_executor::TaskExecutor,
mut ctx: ServiceContext<'_>,
@@ -535,12 +529,6 @@ impl<E: EthSpec> Network<E> {
/// - Starts listening in the given ports.
/// - Dials boot-nodes and libp2p peers.
/// - Subscribes to starting gossipsub topics.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
async fn start(&mut self, config: &crate::NetworkConfig) -> Result<(), String> {
let enr = self.network_globals.local_enr();
info!(
@@ -664,114 +652,48 @@ impl<E: EthSpec> Network<E> {
/* Public Accessible Functions to interact with the behaviour */
/// The routing pub-sub mechanism for eth2.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn gossipsub_mut(&mut self) -> &mut Gossipsub {
&mut self.swarm.behaviour_mut().gossipsub
}
/// The Eth2 RPC specified in the wire-0 protocol.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn eth2_rpc_mut(&mut self) -> &mut RPC<AppRequestId, E> {
&mut self.swarm.behaviour_mut().eth2_rpc
}
/// Discv5 Discovery protocol.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn discovery_mut(&mut self) -> &mut Discovery<E> {
&mut self.swarm.behaviour_mut().discovery
}
/// Provides IP addresses and peer information.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn identify_mut(&mut self) -> &mut identify::Behaviour {
&mut self.swarm.behaviour_mut().identify
}
/// The peer manager that keeps track of peer's reputation and status.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn peer_manager_mut(&mut self) -> &mut PeerManager<E> {
&mut self.swarm.behaviour_mut().peer_manager
}
/// The routing pub-sub mechanism for eth2.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn gossipsub(&self) -> &Gossipsub {
&self.swarm.behaviour().gossipsub
}
/// The Eth2 RPC specified in the wire-0 protocol.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn eth2_rpc(&self) -> &RPC<AppRequestId, E> {
&self.swarm.behaviour().eth2_rpc
}
/// Discv5 Discovery protocol.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn discovery(&self) -> &Discovery<E> {
&self.swarm.behaviour().discovery
}
/// Provides IP addresses and peer information.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn identify(&self) -> &identify::Behaviour {
&self.swarm.behaviour().identify
}
/// The peer manager that keeps track of peer's reputation and status.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn peer_manager(&self) -> &PeerManager<E> {
&self.swarm.behaviour().peer_manager
}
/// Returns the local ENR of the node.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn local_enr(&self) -> Enr {
self.network_globals.local_enr()
}
@@ -780,12 +702,6 @@ impl<E: EthSpec> Network<E> {
/// Subscribes to a gossipsub topic kind, letting the network service determine the
/// encoding and fork version.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn subscribe_kind(&mut self, kind: GossipKind) -> bool {
let gossip_topic = GossipTopic::new(
kind,
@@ -798,12 +714,6 @@ impl<E: EthSpec> Network<E> {
/// Unsubscribes from a gossipsub topic kind, letting the network service determine the
/// encoding and fork version.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn unsubscribe_kind(&mut self, kind: GossipKind) -> bool {
let gossip_topic = GossipTopic::new(
kind,
@@ -814,12 +724,6 @@ impl<E: EthSpec> Network<E> {
}
/// Subscribe to all required topics for the `new_fork` with the given `new_fork_digest`.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn subscribe_new_fork_topics(&mut self, new_fork: ForkName, new_fork_digest: [u8; 4]) {
// Re-subscribe to non-core topics with the new fork digest
let subscriptions = self.network_globals.gossipsub_subscriptions.read().clone();
@@ -844,12 +748,6 @@ impl<E: EthSpec> Network<E> {
}
/// Unsubscribe from all topics that doesn't have the given fork_digest
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn unsubscribe_from_fork_topics_except(&mut self, except: [u8; 4]) {
let subscriptions = self.network_globals.gossipsub_subscriptions.read().clone();
for topic in subscriptions
@@ -862,12 +760,6 @@ impl<E: EthSpec> Network<E> {
}
/// Remove topic weight from all topics that don't have the given fork digest.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn remove_topic_weight_except(&mut self, except: [u8; 4]) {
let new_param = TopicScoreParams {
topic_weight: 0.0,
@@ -892,12 +784,6 @@ impl<E: EthSpec> Network<E> {
}
/// Subscribe to all data columns determined by the cgc.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn subscribe_new_data_column_subnets(&mut self, sampling_column_count: u64) {
self.network_globals
.update_data_column_subnets(sampling_column_count);
@@ -909,12 +795,6 @@ impl<E: EthSpec> Network<E> {
}
/// Returns the scoring parameters for a topic if set.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn get_topic_params(&self, topic: GossipTopic) -> Option<&TopicScoreParams> {
self.swarm
.behaviour()
@@ -925,12 +805,6 @@ impl<E: EthSpec> Network<E> {
/// Subscribes to a gossipsub topic.
///
/// Returns `true` if the subscription was successful and `false` otherwise.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn subscribe(&mut self, topic: GossipTopic) -> bool {
// update the network globals
self.network_globals
@@ -953,12 +827,6 @@ impl<E: EthSpec> Network<E> {
}
/// Unsubscribe from a gossipsub topic.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn unsubscribe(&mut self, topic: GossipTopic) -> bool {
// update the network globals
self.network_globals
@@ -974,12 +842,6 @@ impl<E: EthSpec> Network<E> {
}
/// Publishes a list of messages on the pubsub (gossipsub) behaviour, choosing the encoding.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn publish(&mut self, messages: Vec<PubsubMessage<E>>) {
for message in messages {
for topic in message.topics(GossipEncoding::default(), self.enr_fork_id.fork_digest) {
@@ -1034,12 +896,6 @@ impl<E: EthSpec> Network<E> {
/// Informs the gossipsub about the result of a message validation.
/// If the message is valid it will get propagated by gossipsub.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn report_message_validation_result(
&mut self,
propagation_source: &PeerId,
@@ -1074,12 +930,6 @@ impl<E: EthSpec> Network<E> {
/// Updates the current gossipsub scoring parameters based on the validator count and current
/// slot.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn update_gossipsub_parameters(
&mut self,
active_validators: usize,
@@ -1123,12 +973,7 @@ impl<E: EthSpec> Network<E> {
/* Eth2 RPC behaviour functions */
/// Send a request to a peer over RPC.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
#[allow(clippy::result_large_err)]
pub fn send_request(
&mut self,
peer_id: PeerId,
@@ -1146,12 +991,6 @@ impl<E: EthSpec> Network<E> {
}
/// Send a successful response to a peer over RPC.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn send_response<T: Into<RpcResponse<E>>>(
&mut self,
peer_id: PeerId,
@@ -1171,22 +1010,10 @@ impl<E: EthSpec> Network<E> {
}
/* Peer management functions */
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn testing_dial(&mut self, addr: Multiaddr) -> Result<(), libp2p::swarm::DialError> {
self.swarm.dial(addr)
}
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn report_peer(
&mut self,
peer_id: &PeerId,
@@ -1202,12 +1029,6 @@ impl<E: EthSpec> Network<E> {
///
/// This will send a goodbye, disconnect and then ban the peer.
/// This is fatal for a peer, and should be used in unrecoverable circumstances.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn goodbye_peer(&mut self, peer_id: &PeerId, reason: GoodbyeReason, source: ReportSource) {
self.peer_manager_mut()
.goodbye_peer(peer_id, reason, source);
@@ -1215,34 +1036,16 @@ impl<E: EthSpec> Network<E> {
/// Hard (ungraceful) disconnect for testing purposes only
/// Use goodbye_peer for disconnections, do not use this function.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn __hard_disconnect_testing_only(&mut self, peer_id: PeerId) {
let _ = self.swarm.disconnect_peer_id(peer_id);
}
/// Returns an iterator over all enr entries in the DHT.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn enr_entries(&self) -> Vec<Enr> {
self.discovery().table_entries_enr()
}
/// Add an ENR to the routing table of the discovery mechanism.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn add_enr(&mut self, enr: Enr) {
self.discovery_mut().add_enr(enr);
}
@@ -1250,12 +1053,6 @@ impl<E: EthSpec> Network<E> {
/// Updates a subnet value to the ENR attnets/syncnets bitfield.
///
/// The `value` is `true` if a subnet is being added and false otherwise.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn update_enr_subnet(&mut self, subnet_id: Subnet, value: bool) {
if let Err(e) = self.discovery_mut().update_enr_bitfield(subnet_id, value) {
crit!(error = e, "Could not update ENR bitfield");
@@ -1265,12 +1062,6 @@ impl<E: EthSpec> Network<E> {
}
/// Updates the cgc value in the ENR.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn update_enr_cgc(&mut self, new_custody_group_count: u64) {
if let Err(e) = self.discovery_mut().update_enr_cgc(new_custody_group_count) {
crit!(error = e, "Could not update cgc in ENR");
@@ -1281,12 +1072,6 @@ impl<E: EthSpec> Network<E> {
/// Attempts to discover new peers for a given subnet. The `min_ttl` gives the time at which we
/// would like to retain the peers for.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn discover_subnet_peers(&mut self, subnets_to_discover: Vec<SubnetDiscovery>) {
// If discovery is not started or disabled, ignore the request
if !self.discovery().started {
@@ -1341,12 +1126,6 @@ impl<E: EthSpec> Network<E> {
}
/// Updates the local ENR's "eth2" field with the latest EnrForkId.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub fn update_fork_version(&mut self, enr_fork_id: EnrForkId) {
self.discovery_mut().update_eth2_enr(enr_fork_id.clone());
@@ -1363,12 +1142,6 @@ impl<E: EthSpec> Network<E> {
/* Private internal functions */
/// Updates the current meta data of the node to match the local ENR.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn update_metadata_bitfields(&mut self) {
let local_attnets = self
.discovery_mut()
@@ -1399,12 +1172,6 @@ impl<E: EthSpec> Network<E> {
utils::save_metadata_to_disk(&self.network_dir, meta_data);
}
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn update_metadata_cgc(&mut self, custody_group_count: u64) {
let mut meta_data_w = self.network_globals.local_metadata.write();
@@ -1422,23 +1189,11 @@ impl<E: EthSpec> Network<E> {
}
/// Sends a Ping request to the peer.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn ping(&mut self, peer_id: PeerId) {
self.eth2_rpc_mut().ping(peer_id, AppRequestId::Internal);
}
/// Sends a METADATA request to a peer.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn send_meta_data_request(&mut self, peer_id: PeerId) {
let event = if self.fork_context.spec.is_peer_das_scheduled() {
// Nodes with higher custody will probably start advertising it
@@ -1453,21 +1208,9 @@ impl<E: EthSpec> Network<E> {
}
/// Sends a METADATA response to a peer.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
// RPC Propagation methods
/// Queues the response to be sent upwards as long at it was requested outside the Behaviour.
#[must_use = "return the response"]
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn build_response(
&mut self,
app_request_id: AppRequestId,
@@ -1486,12 +1229,6 @@ impl<E: EthSpec> Network<E> {
/// Dial cached Enrs in discovery service that are in the given `subnet_id` and aren't
/// in Connected, Dialing or Banned state.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn dial_cached_enrs_in_subnet(&mut self, subnet: Subnet, spec: Arc<ChainSpec>) {
let predicate = subnet_predicate::<E>(vec![subnet], spec);
let peers_to_dial: Vec<Enr> = self
@@ -1534,12 +1271,6 @@ impl<E: EthSpec> Network<E> {
/* Sub-behaviour event handling functions */
/// Handle a gossipsub event.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn inject_gs_event(&mut self, event: gossipsub::Event) -> Option<NetworkEvent<E>> {
match event {
gossipsub::Event::Message {
@@ -1678,12 +1409,6 @@ impl<E: EthSpec> Network<E> {
}
/// Handle an RPC event.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn inject_rpc_event(&mut self, event: RPCMessage<AppRequestId, E>) -> Option<NetworkEvent<E>> {
let peer_id = event.peer_id;
@@ -1974,12 +1699,6 @@ impl<E: EthSpec> Network<E> {
}
/// Handle an identify event.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn inject_identify_event(&mut self, event: identify::Event) -> Option<NetworkEvent<E>> {
match event {
identify::Event::Received {
@@ -2002,12 +1721,6 @@ impl<E: EthSpec> Network<E> {
}
/// Handle a peer manager event.
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn inject_pm_event(&mut self, event: PeerManagerEvent) -> Option<NetworkEvent<E>> {
match event {
PeerManagerEvent::PeerConnectedIncoming(peer_id) => {
@@ -2061,12 +1774,6 @@ impl<E: EthSpec> Network<E> {
}
}
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn inject_upnp_event(&mut self, event: libp2p::upnp::Event) {
match event {
libp2p::upnp::Event::NewExternalAddr(addr) => {
@@ -2110,12 +1817,6 @@ impl<E: EthSpec> Network<E> {
}
/* Networking polling */
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
pub async fn next_event(&mut self) -> NetworkEvent<E> {
loop {
tokio::select! {
@@ -2149,12 +1850,6 @@ impl<E: EthSpec> Network<E> {
}
}
#[instrument(parent = None,
level = "trace",
fields(service = "libp2p"),
name = "libp2p",
skip_all
)]
fn parse_swarm_event(
&mut self,
event: SwarmEvent<BehaviourEvent<E>>,