Implement tracing spans for data columm RPC requests and responses (#7831)

#7830
This commit is contained in:
Jimmy Chen
2025-08-21 09:35:51 +10:00
committed by GitHub
parent 2d223575d6
commit f19d4f6af1
18 changed files with 491 additions and 66 deletions

View File

@@ -29,6 +29,7 @@ use lighthouse_network::service::api_types::{
DataColumnsByRootRequester, Id, SingleLookupReqId, SyncRequestId,
};
use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource};
use lighthouse_tracing::SPAN_OUTGOING_RANGE_REQUEST;
use parking_lot::RwLock;
pub use requests::LookupVerifyError;
use requests::{
@@ -45,7 +46,7 @@ use std::time::Duration;
#[cfg(test)]
use task_executor::TaskExecutor;
use tokio::sync::mpsc;
use tracing::{debug, error, warn};
use tracing::{Span, debug, debug_span, error, warn};
use types::blob_sidecar::FixedBlobSidecarList;
use types::{
BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, ForkContext,
@@ -55,6 +56,18 @@ use types::{
pub mod custody;
mod requests;
macro_rules! new_range_request_span {
($self:expr, $name:literal, $parent:expr, $peer_id:expr) => {{
let client = $self.client_type(&$peer_id).kind;
debug_span!(
parent: $parent,
$name,
peer_id = %$peer_id,
client = %client
)
}};
}
/// Max retries for block components after which we fail the batch.
pub const MAX_COLUMN_RETRIES: usize = 3;
@@ -444,10 +457,16 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
request: BlocksByRangeRequest,
failed_columns: &HashSet<ColumnIndex>,
) -> Result<(), String> {
let Some(requester) = self
let Some((requester, parent_request_span)) = self
.components_by_range_requests
.keys()
.find_map(|r| if r.id == id { Some(r.requester) } else { None })
.iter()
.find_map(|(key, value)| {
if key.id == id {
Some((key.requester, value.request_span.clone()))
} else {
None
}
})
else {
return Err("request id not present".to_string());
};
@@ -485,6 +504,12 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
columns,
},
id,
new_range_request_span!(
self,
"outgoing_columns_by_range_retry",
parent_request_span.clone(),
peer_id
),
)
})
.collect::<Result<Vec<_>, _>>()
@@ -511,6 +536,13 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
peers: &HashSet<PeerId>,
peers_to_deprioritize: &HashSet<PeerId>,
) -> Result<Id, RpcRequestSendError> {
let range_request_span = debug_span!(
parent: None,
SPAN_OUTGOING_RANGE_REQUEST,
range_req_id = %requester,
peers = peers.len()
);
let _guard = range_request_span.clone().entered();
let active_request_count_by_peer = self.active_request_count_by_peer();
let Some(block_peer) = peers
@@ -561,7 +593,17 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
requester,
};
let blocks_req_id = self.send_blocks_by_range_request(block_peer, request.clone(), id)?;
let blocks_req_id = self.send_blocks_by_range_request(
block_peer,
request.clone(),
id,
new_range_request_span!(
self,
"outgoing_blocks_by_range",
range_request_span.clone(),
block_peer
),
)?;
let blobs_req_id = if matches!(batch_type, ByRangeRequestType::BlocksAndBlobs) {
Some(self.send_blobs_by_range_request(
@@ -571,6 +613,12 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
count: *request.count(),
},
id,
new_range_request_span!(
self,
"outgoing_blobs_by_range",
range_request_span.clone(),
block_peer
),
)?)
} else {
None
@@ -589,6 +637,12 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
columns,
},
id,
new_range_request_span!(
self,
"outgoing_columns_by_range",
range_request_span.clone(),
peer_id
),
)
})
.collect::<Result<Vec<_>, _>>()
@@ -605,6 +659,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
self.chain.sampling_columns_for_epoch(epoch).to_vec(),
)
}),
range_request_span,
);
self.components_by_range_requests.insert(id, info);
@@ -833,6 +888,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
// block and the peer must have it.
true,
BlocksByRootRequestItems::new(request),
// Not implemented
Span::none(),
);
Ok(LookupRequestResult::RequestSent(id.req_id))
@@ -927,6 +984,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
// have imported the block+blobs.
true,
BlobsByRootRequestItems::new(request),
// Not implemented
Span::none(),
);
Ok(LookupRequestResult::RequestSent(id.req_id))
@@ -970,6 +1029,9 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
peer_id,
expect_max_responses,
DataColumnsByRootRequestItems::new(request),
// Span is tracked in `self.custody_columns_by_root_requests` in the
// `ActiveCustodyRequest` struct.
Span::none(),
);
Ok(LookupRequestResult::RequestSent(id))
@@ -1065,6 +1127,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
peer_id: PeerId,
request: BlocksByRangeRequest,
parent_request_id: ComponentsByRangeRequestId,
request_span: Span,
) -> Result<BlocksByRangeRequestId, RpcRequestSendError> {
let id = BlocksByRangeRequestId {
id: self.next_id(),
@@ -1094,6 +1157,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
// know if there are missed blocks.
false,
BlocksByRangeRequestItems::new(request),
request_span,
);
Ok(id)
}
@@ -1103,6 +1167,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
peer_id: PeerId,
request: BlobsByRangeRequest,
parent_request_id: ComponentsByRangeRequestId,
request_span: Span,
) -> Result<BlobsByRangeRequestId, RpcRequestSendError> {
let id = BlobsByRangeRequestId {
id: self.next_id(),
@@ -1136,6 +1201,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
// know if there are missed blocks.
false,
BlobsByRangeRequestItems::new(request, max_blobs_per_block),
request_span,
);
Ok(id)
}
@@ -1145,6 +1211,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
peer_id: PeerId,
request: DataColumnsByRangeRequest,
parent_request_id: ComponentsByRangeRequestId,
request_span: Span,
) -> Result<(DataColumnsByRangeRequestId, Vec<u64>), RpcRequestSendError> {
let requested_columns = request.columns.clone();
let id = DataColumnsByRangeRequestId {
@@ -1177,6 +1244,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
// know if there are missed blocks.
false,
DataColumnsByRangeRequestItems::new(request),
request_span,
);
Ok((id, requested_columns))
}