mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-09 19:51:47 +00:00
Integrate tracing (#6339)
Tracing Integration
- [reference](5bbf1859e9/projects/project-ideas.md (L297))
- [x] replace slog & log with tracing throughout the codebase
- [x] implement custom crit log
- [x] make relevant changes in the formatter
- [x] replace sloggers
- [x] re-write SSE logging components
cc: @macladson @eserilev
This commit is contained in:
@@ -51,10 +51,11 @@ use beacon_chain::{BeaconChain, BeaconChainTypes};
|
||||
use lighthouse_network::rpc::GoodbyeReason;
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
use lighthouse_network::{PeerId, SyncInfo};
|
||||
use logging::crit;
|
||||
use lru_cache::LRUTimeCache;
|
||||
use slog::{crit, debug, trace, warn};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, instrument, trace, warn};
|
||||
use types::{Epoch, EthSpec, Hash256};
|
||||
|
||||
/// For how long we store failed finalized chains to prevent retries.
|
||||
@@ -74,23 +75,26 @@ pub struct RangeSync<T: BeaconChainTypes> {
|
||||
chains: ChainCollection<T>,
|
||||
/// Chains that have failed and are stored to prevent being retried.
|
||||
failed_chains: LRUTimeCache<Hash256>,
|
||||
/// The syncing logger.
|
||||
log: slog::Logger,
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> RangeSync<T>
|
||||
where
|
||||
T: BeaconChainTypes,
|
||||
{
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>, log: slog::Logger) -> Self {
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>) -> Self {
|
||||
RangeSync {
|
||||
beacon_chain: beacon_chain.clone(),
|
||||
chains: ChainCollection::new(beacon_chain, log.clone()),
|
||||
chains: ChainCollection::new(beacon_chain),
|
||||
failed_chains: LRUTimeCache::new(std::time::Duration::from_secs(
|
||||
FAILED_CHAINS_EXPIRY_SECONDS,
|
||||
)),
|
||||
awaiting_head_peers: HashMap::new(),
|
||||
log,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,6 +103,12 @@ where
|
||||
self.failed_chains.keys().copied().collect()
|
||||
}
|
||||
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn state(&self) -> SyncChainStatus {
|
||||
self.chains.state()
|
||||
}
|
||||
@@ -108,6 +118,12 @@ where
|
||||
/// may need to be synced as a result. A new peer, may increase the peer pool of a finalized
|
||||
/// chain, this may result in a different finalized chain from syncing as finalized chains are
|
||||
/// prioritised by peer-pool size.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn add_peer(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -133,14 +149,13 @@ where
|
||||
RangeSyncType::Finalized => {
|
||||
// Make sure we have not recently tried this chain
|
||||
if self.failed_chains.contains(&remote_info.finalized_root) {
|
||||
debug!(self.log, "Disconnecting peer that belongs to previously failed chain";
|
||||
"failed_root" => %remote_info.finalized_root, "peer_id" => %peer_id);
|
||||
debug!(failed_root = ?remote_info.finalized_root, %peer_id,"Disconnecting peer that belongs to previously failed chain");
|
||||
network.goodbye_peer(peer_id, GoodbyeReason::IrrelevantNetwork);
|
||||
return;
|
||||
}
|
||||
|
||||
// Finalized chain search
|
||||
debug!(self.log, "Finalization sync peer joined"; "peer_id" => %peer_id);
|
||||
debug!(%peer_id, "Finalization sync peer joined");
|
||||
self.awaiting_head_peers.remove(&peer_id);
|
||||
|
||||
// Because of our change in finalized sync batch size from 2 to 1 and our transition
|
||||
@@ -171,8 +186,7 @@ where
|
||||
if self.chains.is_finalizing_sync() {
|
||||
// If there are finalized chains to sync, finish these first, before syncing head
|
||||
// chains.
|
||||
trace!(self.log, "Waiting for finalized sync to complete";
|
||||
"peer_id" => %peer_id, "awaiting_head_peers" => &self.awaiting_head_peers.len());
|
||||
trace!(%peer_id, awaiting_head_peers = &self.awaiting_head_peers.len(),"Waiting for finalized sync to complete");
|
||||
self.awaiting_head_peers.insert(peer_id, remote_info);
|
||||
return;
|
||||
}
|
||||
@@ -204,6 +218,12 @@ where
|
||||
///
|
||||
/// This function finds the chain that made this request. Once found, processes the result.
|
||||
/// This request could complete a chain or simply add to its progress.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn blocks_by_range_response(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -229,11 +249,17 @@ where
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
trace!(%chain_id, "BlocksByRange response for removed chain")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn handle_block_process_result(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -259,13 +285,19 @@ where
|
||||
}
|
||||
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
trace!(%chain_id, "BlocksByRange response for removed chain")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A peer has disconnected. This removes the peer from any ongoing chains and mappings. A
|
||||
/// disconnected peer could remove a chain
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn peer_disconnect(&mut self, network: &mut SyncNetworkContext<T>, peer_id: &PeerId) {
|
||||
// if the peer is in the awaiting head mapping, remove it
|
||||
self.awaiting_head_peers.remove(peer_id);
|
||||
@@ -278,6 +310,12 @@ where
|
||||
/// which pool the peer is in. The chain may also have a batch or batches awaiting
|
||||
/// for this peer. If so we mark the batch as failed. The batch may then hit it's maximum
|
||||
/// retries. In this case, we need to remove the chain.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn remove_peer(&mut self, network: &mut SyncNetworkContext<T>, peer_id: &PeerId) {
|
||||
for (removed_chain, sync_type, remove_reason) in self
|
||||
.chains
|
||||
@@ -297,6 +335,12 @@ where
|
||||
///
|
||||
/// Check to see if the request corresponds to a pending batch. If so, re-request it if possible, if there have
|
||||
/// been too many failed attempts for the batch, remove the chain.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn inject_error(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -321,11 +365,17 @@ where
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
trace!(%chain_id, "BlocksByRange response for removed chain")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn on_chain_removed(
|
||||
&mut self,
|
||||
chain: SyncingChain<T>,
|
||||
@@ -335,14 +385,18 @@ where
|
||||
op: &'static str,
|
||||
) {
|
||||
if remove_reason.is_critical() {
|
||||
crit!(self.log, "Chain removed"; "sync_type" => ?sync_type, &chain, "reason" => ?remove_reason, "op" => op);
|
||||
crit!(?sync_type, %chain, reason = ?remove_reason,op, "Chain removed");
|
||||
} else {
|
||||
debug!(self.log, "Chain removed"; "sync_type" => ?sync_type, &chain, "reason" => ?remove_reason, "op" => op);
|
||||
debug!(?sync_type, %chain, reason = ?remove_reason,op, "Chain removed");
|
||||
}
|
||||
|
||||
if let RemoveChain::ChainFailed { blacklist, .. } = remove_reason {
|
||||
if RangeSyncType::Finalized == sync_type && blacklist {
|
||||
warn!(self.log, "Chain failed! Syncing to its head won't be retried for at least the next {} seconds", FAILED_CHAINS_EXPIRY_SECONDS; &chain);
|
||||
warn!(
|
||||
%chain,
|
||||
"Chain failed! Syncing to its head won't be retried for at least the next {} seconds",
|
||||
FAILED_CHAINS_EXPIRY_SECONDS
|
||||
);
|
||||
self.failed_chains.insert(chain.target_head_root);
|
||||
}
|
||||
}
|
||||
@@ -369,6 +423,12 @@ where
|
||||
}
|
||||
|
||||
/// Kickstarts sync.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn resume(&mut self, network: &mut SyncNetworkContext<T>) {
|
||||
for (removed_chain, sync_type, remove_reason) in
|
||||
self.chains.call_all(|chain| chain.resume(network))
|
||||
|
||||
Reference in New Issue
Block a user