Integrate tracing (#6339)

Tracing Integration
- [reference](5bbf1859e9/projects/project-ideas.md (L297))


  - [x] replace slog & log with tracing throughout the codebase
- [x] implement custom crit log
- [x] make relevant changes in the formatter
- [x] replace sloggers
- [x] re-write SSE logging components

cc: @macladson @eserilev
This commit is contained in:
ThreeHrSleep
2025-03-13 04:01:05 +05:30
committed by GitHub
parent f23f984f85
commit d60c24ef1c
241 changed files with 9485 additions and 9328 deletions

View File

@@ -3,7 +3,6 @@ use crate::errors::BeaconChainError;
use crate::head_tracker::{HeadTracker, SszHeadTracker};
use crate::persisted_beacon_chain::{PersistedBeaconChain, DUMMY_CANONICAL_HEAD_BLOCK_ROOT};
use parking_lot::Mutex;
use slog::{debug, error, info, warn, Logger};
use std::collections::{HashMap, HashSet};
use std::mem;
use std::sync::{mpsc, Arc};
@@ -13,6 +12,7 @@ use store::hot_cold_store::{migrate_database, HotColdDBError};
use store::iter::RootsIterator;
use store::{Error, ItemStore, StoreItem, StoreOp};
pub use store::{HotColdDB, MemoryStore};
use tracing::{debug, error, info, warn};
use types::{
BeaconState, BeaconStateError, BeaconStateHash, Checkpoint, Epoch, EthSpec, FixedBytesExtended,
Hash256, SignedBeaconBlockHash, Slot,
@@ -44,7 +44,6 @@ pub struct BackgroundMigrator<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>>
tx_thread: Option<Mutex<(mpsc::Sender<Notification>, thread::JoinHandle<()>)>>,
/// Genesis block root, for persisting the `PersistedBeaconChain`.
genesis_block_root: Hash256,
log: Logger,
}
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -140,7 +139,6 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
db: Arc<HotColdDB<E, Hot, Cold>>,
config: MigratorConfig,
genesis_block_root: Hash256,
log: Logger,
) -> Self {
// Estimate last migration run from DB split slot.
let prev_migration = Arc::new(Mutex::new(PrevMigration {
@@ -150,14 +148,13 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
let tx_thread = if config.blocking {
None
} else {
Some(Mutex::new(Self::spawn_thread(db.clone(), log.clone())))
Some(Mutex::new(Self::spawn_thread(db.clone())))
};
Self {
db,
tx_thread,
prev_migration,
genesis_block_root,
log,
}
}
@@ -184,7 +181,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
if let Some(Notification::Finalization(notif)) =
self.send_background_notification(Notification::Finalization(notif))
{
Self::run_migration(self.db.clone(), notif, &self.log);
Self::run_migration(self.db.clone(), notif);
}
Ok(())
@@ -196,7 +193,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
{
// If we are running in foreground mode (as in tests), then this will just run a single
// batch. We may need to tweak this in future.
Self::run_reconstruction(self.db.clone(), None, &self.log);
Self::run_reconstruction(self.db.clone(), None);
}
}
@@ -204,14 +201,13 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
if let Some(Notification::PruneBlobs(data_availability_boundary)) =
self.send_background_notification(Notification::PruneBlobs(data_availability_boundary))
{
Self::run_prune_blobs(self.db.clone(), data_availability_boundary, &self.log);
Self::run_prune_blobs(self.db.clone(), data_availability_boundary);
}
}
pub fn run_reconstruction(
db: Arc<HotColdDB<E, Hot, Cold>>,
opt_tx: Option<mpsc::Sender<Notification>>,
log: &Logger,
) {
match db.reconstruct_historic_states(Some(BLOCKS_PER_RECONSTRUCTION)) {
Ok(()) => {
@@ -221,9 +217,8 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
if !db.get_anchor_info().all_historic_states_stored() {
if let Err(e) = tx.send(Notification::Reconstruction) {
error!(
log,
"Unable to requeue reconstruction notification";
"error" => ?e
error = ?e,
"Unable to requeue reconstruction notification"
);
}
}
@@ -231,24 +226,18 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
}
Err(e) => {
error!(
log,
"State reconstruction failed";
"error" => ?e,
error = ?e,
"State reconstruction failed"
);
}
}
}
pub fn run_prune_blobs(
db: Arc<HotColdDB<E, Hot, Cold>>,
data_availability_boundary: Epoch,
log: &Logger,
) {
pub fn run_prune_blobs(db: Arc<HotColdDB<E, Hot, Cold>>, data_availability_boundary: Epoch) {
if let Err(e) = db.try_prune_blobs(false, data_availability_boundary) {
error!(
log,
"Blob pruning failed";
"error" => ?e,
error = ?e,
"Blob pruning failed"
);
}
}
@@ -264,7 +253,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
// Restart the background thread if it has crashed.
if let Err(tx_err) = tx.send(notif) {
let (new_tx, new_thread) = Self::spawn_thread(self.db.clone(), self.log.clone());
let (new_tx, new_thread) = Self::spawn_thread(self.db.clone());
*tx = new_tx;
let old_thread = mem::replace(thread, new_thread);
@@ -273,9 +262,8 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
// halted normally just now as a result of us dropping the old `mpsc::Sender`.
if let Err(thread_err) = old_thread.join() {
warn!(
self.log,
"Migration thread died, so it was restarted";
"reason" => format!("{:?}", thread_err)
reason = ?thread_err,
"Migration thread died, so it was restarted"
);
}
@@ -290,21 +278,16 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
}
/// Perform the actual work of `process_finalization`.
fn run_migration(
db: Arc<HotColdDB<E, Hot, Cold>>,
notif: FinalizationNotification,
log: &Logger,
) {
fn run_migration(db: Arc<HotColdDB<E, Hot, Cold>>, notif: FinalizationNotification) {
// Do not run too frequently.
let epoch = notif.finalized_checkpoint.epoch;
let mut prev_migration = notif.prev_migration.lock();
if epoch < prev_migration.epoch + prev_migration.epochs_per_migration {
debug!(
log,
"Database consolidation deferred";
"last_finalized_epoch" => prev_migration.epoch,
"new_finalized_epoch" => epoch,
"epochs_per_migration" => prev_migration.epochs_per_migration,
last_finalized_epoch = %prev_migration.epoch,
new_finalized_epoch = %epoch,
epochs_per_migration = prev_migration.epochs_per_migration,
"Database consolidation deferred"
);
return;
}
@@ -315,7 +298,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
prev_migration.epoch = epoch;
drop(prev_migration);
debug!(log, "Database consolidation started");
debug!("Database consolidation started");
let finalized_state_root = notif.finalized_state_root;
let finalized_block_root = notif.finalized_checkpoint.root;
@@ -324,10 +307,9 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
Ok(Some(state)) => state,
other => {
error!(
log,
"Migrator failed to load state";
"state_root" => ?finalized_state_root,
"error" => ?other
state_root = ?finalized_state_root,
error = ?other,
"Migrator failed to load state"
);
return;
}
@@ -340,16 +322,14 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
&finalized_state,
notif.finalized_checkpoint,
notif.genesis_block_root,
log,
) {
Ok(PruningOutcome::Successful {
old_finalized_checkpoint,
}) => old_finalized_checkpoint,
Ok(PruningOutcome::DeferredConcurrentHeadTrackerMutation) => {
warn!(
log,
"Pruning deferred because of a concurrent mutation";
"message" => "this is expected only very rarely!"
message = "this is expected only very rarely!",
"Pruning deferred because of a concurrent mutation"
);
return;
}
@@ -358,16 +338,15 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
new_finalized_checkpoint,
}) => {
warn!(
log,
"Ignoring out of order finalization request";
"old_finalized_epoch" => old_finalized_checkpoint.epoch,
"new_finalized_epoch" => new_finalized_checkpoint.epoch,
"message" => "this is expected occasionally due to a (harmless) race condition"
old_finalized_epoch = %old_finalized_checkpoint.epoch,
new_finalized_epoch = %new_finalized_checkpoint.epoch,
message = "this is expected occasionally due to a (harmless) race condition",
"Ignoring out of order finalization request"
);
return;
}
Err(e) => {
warn!(log, "Block pruning failed"; "error" => ?e);
warn!(error = ?e,"Block pruning failed");
return;
}
};
@@ -381,17 +360,12 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
Ok(()) => {}
Err(Error::HotColdDBError(HotColdDBError::FreezeSlotUnaligned(slot))) => {
debug!(
log,
"Database migration postponed, unaligned finalized block";
"slot" => slot.as_u64()
slot = slot.as_u64(),
"Database migration postponed, unaligned finalized block"
);
}
Err(e) => {
warn!(
log,
"Database migration failed";
"error" => format!("{:?}", e)
);
warn!(error = ?e, "Database migration failed");
return;
}
};
@@ -401,12 +375,11 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
db,
old_finalized_checkpoint.epoch,
notif.finalized_checkpoint.epoch,
log,
) {
warn!(log, "Database compaction failed"; "error" => format!("{:?}", e));
warn!(error = ?e, "Database compaction failed");
}
debug!(log, "Database consolidation complete");
debug!("Database consolidation complete");
}
/// Spawn a new child thread to run the migration process.
@@ -414,7 +387,6 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
/// Return a channel handle for sending requests to the thread.
fn spawn_thread(
db: Arc<HotColdDB<E, Hot, Cold>>,
log: Logger,
) -> (mpsc::Sender<Notification>, thread::JoinHandle<()>) {
let (tx, rx) = mpsc::channel();
let inner_tx = tx.clone();
@@ -452,13 +424,13 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
// This prevents finalization from being starved while reconstruciton runs (a
// problem in previous LH versions).
if let Some(fin) = finalization_notif {
Self::run_migration(db.clone(), fin, &log);
Self::run_migration(db.clone(), fin);
}
if let Some(dab) = prune_blobs_notif {
Self::run_prune_blobs(db.clone(), dab, &log);
Self::run_prune_blobs(db.clone(), dab);
}
if reconstruction_notif.is_some() {
Self::run_reconstruction(db.clone(), Some(inner_tx.clone()), &log);
Self::run_reconstruction(db.clone(), Some(inner_tx.clone()));
}
}
});
@@ -476,7 +448,6 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
new_finalized_state: &BeaconState<E>,
new_finalized_checkpoint: Checkpoint,
genesis_block_root: Hash256,
log: &Logger,
) -> Result<PruningOutcome, BeaconChainError> {
let old_finalized_checkpoint =
store
@@ -515,10 +486,9 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
}
debug!(
log,
"Starting database pruning";
"old_finalized_epoch" => old_finalized_checkpoint.epoch,
"new_finalized_epoch" => new_finalized_checkpoint.epoch,
old_finalized_epoch = %old_finalized_checkpoint.epoch,
new_finalized_epoch = %new_finalized_checkpoint.epoch,
"Starting database pruning"
);
// For each slot between the new finalized checkpoint and the old finalized checkpoint,
// collect the beacon block root and state root of the canonical chain.
@@ -546,11 +516,10 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
let heads = head_tracker.heads();
debug!(
log,
"Extra pruning information";
"old_finalized_root" => format!("{:?}", old_finalized_checkpoint.root),
"new_finalized_root" => format!("{:?}", new_finalized_checkpoint.root),
"head_count" => heads.len(),
old_finalized_root = ?old_finalized_checkpoint.root,
new_finalized_root = ?new_finalized_checkpoint.root,
head_count = heads.len(),
"Extra pruning information"
);
for (head_hash, head_slot) in heads {
@@ -565,10 +534,9 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
}
Err(Error::SszDecodeError(e)) => {
warn!(
log,
"Forgetting invalid head block";
"block_root" => ?head_hash,
"error" => ?e,
block_root = ?head_hash,
error = ?e,
"Forgetting invalid head block"
);
abandoned_heads.insert(head_hash);
continue;
@@ -606,10 +574,9 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
// checkpoint, i.e. there aren't any forks starting at a block that is a
// strict ancestor of old_finalized_checkpoint.
warn!(
log,
"Found a chain that should already have been pruned";
"head_block_root" => format!("{:?}", head_hash),
"head_slot" => head_slot,
head_block_root = ?head_hash,
%head_slot,
"Found a chain that should already have been pruned"
);
potentially_abandoned_head.take();
break;
@@ -663,10 +630,9 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
if let Some(abandoned_head) = potentially_abandoned_head {
debug!(
log,
"Pruning head";
"head_block_root" => format!("{:?}", abandoned_head),
"head_slot" => head_slot,
head_block_root = ?abandoned_head,
%head_slot,
"Pruning head"
);
abandoned_heads.insert(abandoned_head);
abandoned_blocks.extend(
@@ -740,7 +706,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
// advance which are not canonical due to blocks being applied on top.
store.prune_old_hot_states()?;
debug!(log, "Database pruning complete");
debug!("Database pruning complete");
Ok(PruningOutcome::Successful {
old_finalized_checkpoint,
@@ -753,7 +719,6 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
db: Arc<HotColdDB<E, Hot, Cold>>,
old_finalized_epoch: Epoch,
new_finalized_epoch: Epoch,
log: &Logger,
) -> Result<(), Error> {
if !db.compact_on_prune() {
return Ok(());
@@ -775,10 +740,9 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
&& seconds_since_last_compaction > MIN_COMPACTION_PERIOD_SECONDS)
{
info!(
log,
"Starting database compaction";
"old_finalized_epoch" => old_finalized_epoch,
"new_finalized_epoch" => new_finalized_epoch,
%old_finalized_epoch,
%new_finalized_epoch,
"Starting database compaction"
);
db.compact()?;
@@ -787,7 +751,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
.unwrap_or(start_time);
db.store_compaction_timestamp(finish_time)?;
info!(log, "Database compaction complete");
info!("Database compaction complete");
}
Ok(())
}