Integrate tracing (#6339)

Tracing Integration
- [reference](5bbf1859e9/projects/project-ideas.md (L297))


  - [x] replace slog & log with tracing throughout the codebase
- [x] implement custom crit log
- [x] make relevant changes in the formatter
- [x] replace sloggers
- [x] re-write SSE logging components

cc: @macladson @eserilev
This commit is contained in:
ThreeHrSleep
2025-03-13 04:01:05 +05:30
committed by GitHub
parent f23f984f85
commit d60c24ef1c
241 changed files with 9485 additions and 9328 deletions

View File

@@ -51,10 +51,11 @@ use beacon_chain::{BeaconChain, BeaconChainTypes};
use lighthouse_network::rpc::GoodbyeReason;
use lighthouse_network::service::api_types::Id;
use lighthouse_network::{PeerId, SyncInfo};
use logging::crit;
use lru_cache::LRUTimeCache;
use slog::{crit, debug, trace, warn};
use std::collections::HashMap;
use std::sync::Arc;
use tracing::{debug, instrument, trace, warn};
use types::{Epoch, EthSpec, Hash256};
/// For how long we store failed finalized chains to prevent retries.
@@ -74,23 +75,26 @@ pub struct RangeSync<T: BeaconChainTypes> {
chains: ChainCollection<T>,
/// Chains that have failed and are stored to prevent being retried.
failed_chains: LRUTimeCache<Hash256>,
/// The syncing logger.
log: slog::Logger,
}
impl<T: BeaconChainTypes> RangeSync<T>
where
T: BeaconChainTypes,
{
pub fn new(beacon_chain: Arc<BeaconChain<T>>, log: slog::Logger) -> Self {
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
pub fn new(beacon_chain: Arc<BeaconChain<T>>) -> Self {
RangeSync {
beacon_chain: beacon_chain.clone(),
chains: ChainCollection::new(beacon_chain, log.clone()),
chains: ChainCollection::new(beacon_chain),
failed_chains: LRUTimeCache::new(std::time::Duration::from_secs(
FAILED_CHAINS_EXPIRY_SECONDS,
)),
awaiting_head_peers: HashMap::new(),
log,
}
}
@@ -99,6 +103,12 @@ where
self.failed_chains.keys().copied().collect()
}
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
pub fn state(&self) -> SyncChainStatus {
self.chains.state()
}
@@ -108,6 +118,12 @@ where
/// may need to be synced as a result. A new peer, may increase the peer pool of a finalized
/// chain, this may result in a different finalized chain from syncing as finalized chains are
/// prioritised by peer-pool size.
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
pub fn add_peer(
&mut self,
network: &mut SyncNetworkContext<T>,
@@ -133,14 +149,13 @@ where
RangeSyncType::Finalized => {
// Make sure we have not recently tried this chain
if self.failed_chains.contains(&remote_info.finalized_root) {
debug!(self.log, "Disconnecting peer that belongs to previously failed chain";
"failed_root" => %remote_info.finalized_root, "peer_id" => %peer_id);
debug!(failed_root = ?remote_info.finalized_root, %peer_id,"Disconnecting peer that belongs to previously failed chain");
network.goodbye_peer(peer_id, GoodbyeReason::IrrelevantNetwork);
return;
}
// Finalized chain search
debug!(self.log, "Finalization sync peer joined"; "peer_id" => %peer_id);
debug!(%peer_id, "Finalization sync peer joined");
self.awaiting_head_peers.remove(&peer_id);
// Because of our change in finalized sync batch size from 2 to 1 and our transition
@@ -171,8 +186,7 @@ where
if self.chains.is_finalizing_sync() {
// If there are finalized chains to sync, finish these first, before syncing head
// chains.
trace!(self.log, "Waiting for finalized sync to complete";
"peer_id" => %peer_id, "awaiting_head_peers" => &self.awaiting_head_peers.len());
trace!(%peer_id, awaiting_head_peers = &self.awaiting_head_peers.len(),"Waiting for finalized sync to complete");
self.awaiting_head_peers.insert(peer_id, remote_info);
return;
}
@@ -204,6 +218,12 @@ where
///
/// This function finds the chain that made this request. Once found, processes the result.
/// This request could complete a chain or simply add to its progress.
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
pub fn blocks_by_range_response(
&mut self,
network: &mut SyncNetworkContext<T>,
@@ -229,11 +249,17 @@ where
}
}
Err(_) => {
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
trace!(%chain_id, "BlocksByRange response for removed chain")
}
}
}
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
pub fn handle_block_process_result(
&mut self,
network: &mut SyncNetworkContext<T>,
@@ -259,13 +285,19 @@ where
}
Err(_) => {
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
trace!(%chain_id, "BlocksByRange response for removed chain")
}
}
}
/// A peer has disconnected. This removes the peer from any ongoing chains and mappings. A
/// disconnected peer could remove a chain
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
pub fn peer_disconnect(&mut self, network: &mut SyncNetworkContext<T>, peer_id: &PeerId) {
// if the peer is in the awaiting head mapping, remove it
self.awaiting_head_peers.remove(peer_id);
@@ -278,6 +310,12 @@ where
/// which pool the peer is in. The chain may also have a batch or batches awaiting
/// for this peer. If so we mark the batch as failed. The batch may then hit it's maximum
/// retries. In this case, we need to remove the chain.
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
fn remove_peer(&mut self, network: &mut SyncNetworkContext<T>, peer_id: &PeerId) {
for (removed_chain, sync_type, remove_reason) in self
.chains
@@ -297,6 +335,12 @@ where
///
/// Check to see if the request corresponds to a pending batch. If so, re-request it if possible, if there have
/// been too many failed attempts for the batch, remove the chain.
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
pub fn inject_error(
&mut self,
network: &mut SyncNetworkContext<T>,
@@ -321,11 +365,17 @@ where
}
}
Err(_) => {
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
trace!(%chain_id, "BlocksByRange response for removed chain")
}
}
}
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
fn on_chain_removed(
&mut self,
chain: SyncingChain<T>,
@@ -335,14 +385,18 @@ where
op: &'static str,
) {
if remove_reason.is_critical() {
crit!(self.log, "Chain removed"; "sync_type" => ?sync_type, &chain, "reason" => ?remove_reason, "op" => op);
crit!(?sync_type, %chain, reason = ?remove_reason,op, "Chain removed");
} else {
debug!(self.log, "Chain removed"; "sync_type" => ?sync_type, &chain, "reason" => ?remove_reason, "op" => op);
debug!(?sync_type, %chain, reason = ?remove_reason,op, "Chain removed");
}
if let RemoveChain::ChainFailed { blacklist, .. } = remove_reason {
if RangeSyncType::Finalized == sync_type && blacklist {
warn!(self.log, "Chain failed! Syncing to its head won't be retried for at least the next {} seconds", FAILED_CHAINS_EXPIRY_SECONDS; &chain);
warn!(
%chain,
"Chain failed! Syncing to its head won't be retried for at least the next {} seconds",
FAILED_CHAINS_EXPIRY_SECONDS
);
self.failed_chains.insert(chain.target_head_root);
}
}
@@ -369,6 +423,12 @@ where
}
/// Kickstarts sync.
#[instrument(parent = None,
level = "info",
fields(component = "range_sync"),
name = "range_sync",
skip_all
)]
pub fn resume(&mut self, network: &mut SyncNetworkContext<T>) {
for (removed_chain, sync_type, remove_reason) in
self.chains.call_all(|chain| chain.resume(network))