mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-11 04:31:51 +00:00
Integrate tracing (#6339)
Tracing Integration
- [reference](5bbf1859e9/projects/project-ideas.md (L297))
- [x] replace slog & log with tracing throughout the codebase
- [x] implement custom crit log
- [x] make relevant changes in the formatter
- [x] replace sloggers
- [x] re-write SSE logging components
cc: @macladson @eserilev
This commit is contained in:
@@ -15,8 +15,9 @@ use libp2p::swarm::handler::{
|
||||
ConnectionEvent, ConnectionHandler, ConnectionHandlerEvent, DialUpgradeError,
|
||||
FullyNegotiatedInbound, FullyNegotiatedOutbound, StreamUpgradeError, SubstreamProtocol,
|
||||
};
|
||||
use libp2p::swarm::Stream;
|
||||
use slog::{crit, debug, trace};
|
||||
use libp2p::swarm::{ConnectionId, Stream};
|
||||
use libp2p::PeerId;
|
||||
use logging::crit;
|
||||
use smallvec::SmallVec;
|
||||
use std::{
|
||||
collections::{hash_map::Entry, VecDeque},
|
||||
@@ -27,6 +28,7 @@ use std::{
|
||||
};
|
||||
use tokio::time::{sleep, Sleep};
|
||||
use tokio_util::time::{delay_queue, DelayQueue};
|
||||
use tracing::{debug, trace};
|
||||
use types::{EthSpec, ForkContext};
|
||||
|
||||
/// The number of times to retry an outbound upgrade in the case of IO errors.
|
||||
@@ -135,11 +137,11 @@ where
|
||||
/// Waker, to be sure the handler gets polled when needed.
|
||||
waker: Option<std::task::Waker>,
|
||||
|
||||
/// Logger for handling RPC streams
|
||||
log: slog::Logger,
|
||||
|
||||
/// Timeout that will me used for inbound and outbound responses.
|
||||
resp_timeout: Duration,
|
||||
|
||||
/// Information about this handler for logging purposes.
|
||||
log_info: (PeerId, ConnectionId),
|
||||
}
|
||||
|
||||
enum HandlerState {
|
||||
@@ -221,8 +223,9 @@ where
|
||||
pub fn new(
|
||||
listen_protocol: SubstreamProtocol<RPCProtocol<E>, ()>,
|
||||
fork_context: Arc<ForkContext>,
|
||||
log: &slog::Logger,
|
||||
resp_timeout: Duration,
|
||||
peer_id: PeerId,
|
||||
connection_id: ConnectionId,
|
||||
) -> Self {
|
||||
RPCHandler {
|
||||
listen_protocol,
|
||||
@@ -240,8 +243,8 @@ where
|
||||
outbound_io_error_retries: 0,
|
||||
fork_context,
|
||||
waker: None,
|
||||
log: log.clone(),
|
||||
resp_timeout,
|
||||
log_info: (peer_id, connection_id),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -250,7 +253,12 @@ where
|
||||
fn shutdown(&mut self, goodbye_reason: Option<(Id, GoodbyeReason)>) {
|
||||
if matches!(self.state, HandlerState::Active) {
|
||||
if !self.dial_queue.is_empty() {
|
||||
debug!(self.log, "Starting handler shutdown"; "unsent_queued_requests" => self.dial_queue.len());
|
||||
debug!(
|
||||
unsent_queued_requests = self.dial_queue.len(),
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
"Starting handler shutdown"
|
||||
);
|
||||
}
|
||||
// We now drive to completion communications already dialed/established
|
||||
while let Some((id, req)) = self.dial_queue.pop() {
|
||||
@@ -297,8 +305,10 @@ where
|
||||
let Some(inbound_info) = self.inbound_substreams.get_mut(&inbound_id) else {
|
||||
if !matches!(response, RpcResponse::StreamTermination(..)) {
|
||||
// the stream is closed after sending the expected number of responses
|
||||
trace!(self.log, "Inbound stream has expired. Response not sent";
|
||||
"response" => %response, "id" => inbound_id);
|
||||
trace!(%response, id = ?inbound_id,
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
"Inbound stream has expired. Response not sent");
|
||||
}
|
||||
return;
|
||||
};
|
||||
@@ -313,8 +323,10 @@ where
|
||||
|
||||
if matches!(self.state, HandlerState::Deactivated) {
|
||||
// we no longer send responses after the handler is deactivated
|
||||
debug!(self.log, "Response not sent. Deactivated handler";
|
||||
"response" => %response, "id" => inbound_id);
|
||||
debug!(%response, id = ?inbound_id,
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
"Response not sent. Deactivated handler");
|
||||
return;
|
||||
}
|
||||
inbound_info.pending_items.push_back(response);
|
||||
@@ -381,7 +393,11 @@ where
|
||||
match delay.as_mut().poll(cx) {
|
||||
Poll::Ready(_) => {
|
||||
self.state = HandlerState::Deactivated;
|
||||
debug!(self.log, "Shutdown timeout elapsed, Handler deactivated");
|
||||
debug!(
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
"Shutdown timeout elapsed, Handler deactivated"
|
||||
);
|
||||
return Poll::Ready(ConnectionHandlerEvent::NotifyBehaviour(
|
||||
HandlerEvent::Close(RPCError::Disconnected),
|
||||
));
|
||||
@@ -428,7 +444,10 @@ where
|
||||
outbound_err,
|
||||
)));
|
||||
} else {
|
||||
crit!(self.log, "timed out substream not in the books"; "stream_id" => outbound_id.get_ref());
|
||||
crit!(
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
stream_id = ?outbound_id.get_ref(), "timed out substream not in the books");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -557,10 +576,24 @@ where
|
||||
// BlocksByRange is the one that typically consumes the most time.
|
||||
// Its useful to log when the request was completed.
|
||||
if matches!(info.protocol, Protocol::BlocksByRange) {
|
||||
debug!(self.log, "BlocksByRange Response sent"; "duration" => Instant::now().duration_since(info.request_start_time).as_secs());
|
||||
debug!(
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
duration = Instant::now()
|
||||
.duration_since(info.request_start_time)
|
||||
.as_secs(),
|
||||
"BlocksByRange Response sent"
|
||||
);
|
||||
}
|
||||
if matches!(info.protocol, Protocol::BlobsByRange) {
|
||||
debug!(self.log, "BlobsByRange Response sent"; "duration" => Instant::now().duration_since(info.request_start_time).as_secs());
|
||||
debug!(
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
duration = Instant::now()
|
||||
.duration_since(info.request_start_time)
|
||||
.as_secs(),
|
||||
"BlobsByRange Response sent"
|
||||
);
|
||||
}
|
||||
|
||||
// There is nothing more to process on this substream as it has
|
||||
@@ -583,10 +616,20 @@ where
|
||||
}));
|
||||
|
||||
if matches!(info.protocol, Protocol::BlocksByRange) {
|
||||
debug!(self.log, "BlocksByRange Response failed"; "duration" => info.request_start_time.elapsed().as_secs());
|
||||
debug!(
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
duration = info.request_start_time.elapsed().as_secs(),
|
||||
"BlocksByRange Response failed"
|
||||
);
|
||||
}
|
||||
if matches!(info.protocol, Protocol::BlobsByRange) {
|
||||
debug!(self.log, "BlobsByRange Response failed"; "duration" => info.request_start_time.elapsed().as_secs());
|
||||
debug!(
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
duration = info.request_start_time.elapsed().as_secs(),
|
||||
"BlobsByRange Response failed"
|
||||
);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -695,7 +738,7 @@ where
|
||||
// stream closed
|
||||
// if we expected multiple streams send a stream termination,
|
||||
// else report the stream terminating only.
|
||||
//trace!(self.log, "RPC Response - stream closed by remote");
|
||||
//"RPC Response - stream closed by remote");
|
||||
// drop the stream
|
||||
let delay_key = &entry.get().delay_key;
|
||||
let request_id = entry.get().req_id;
|
||||
@@ -772,7 +815,11 @@ where
|
||||
}
|
||||
}
|
||||
OutboundSubstreamState::Poisoned => {
|
||||
crit!(self.log, "Poisoned outbound substream");
|
||||
crit!(
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
"Poisoned outbound substream"
|
||||
);
|
||||
unreachable!("Coding Error: Outbound substream is poisoned")
|
||||
}
|
||||
}
|
||||
@@ -804,7 +851,11 @@ where
|
||||
&& self.events_out.is_empty()
|
||||
&& self.dial_negotiated == 0
|
||||
{
|
||||
debug!(self.log, "Goodbye sent, Handler deactivated");
|
||||
debug!(
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
"Goodbye sent, Handler deactivated"
|
||||
);
|
||||
self.state = HandlerState::Deactivated;
|
||||
return Poll::Ready(ConnectionHandlerEvent::NotifyBehaviour(
|
||||
HandlerEvent::Close(RPCError::Disconnected),
|
||||
@@ -997,7 +1048,11 @@ where
|
||||
)
|
||||
.is_some()
|
||||
{
|
||||
crit!(self.log, "Duplicate outbound substream id"; "id" => self.current_outbound_substream_id);
|
||||
crit!(
|
||||
peer_id = %self.log_info.0,
|
||||
connection_id = %self.log_info.1,
|
||||
|
||||
id = ?self.current_outbound_substream_id, "Duplicate outbound substream id");
|
||||
}
|
||||
self.current_outbound_substream_id.0 += 1;
|
||||
}
|
||||
@@ -1045,17 +1100,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl slog::Value for SubstreamId {
|
||||
fn serialize(
|
||||
&self,
|
||||
record: &slog::Record,
|
||||
key: slog::Key,
|
||||
serializer: &mut dyn slog::Serializer,
|
||||
) -> slog::Result {
|
||||
slog::Value::serialize(&self.0, record, key, serializer)
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a future that can be polled that will send any queued message to the peer.
|
||||
///
|
||||
/// This function returns the given substream, along with whether it has been closed or not. Any
|
||||
|
||||
Reference in New Issue
Block a user