Integrate tracing (#6339)

Tracing Integration
- [reference](5bbf1859e9/projects/project-ideas.md (L297))


  - [x] replace slog & log with tracing throughout the codebase
- [x] implement custom crit log
- [x] make relevant changes in the formatter
- [x] replace sloggers
- [x] re-write SSE logging components

cc: @macladson @eserilev
This commit is contained in:
ThreeHrSleep
2025-03-13 04:01:05 +05:30
committed by GitHub
parent f23f984f85
commit d60c24ef1c
241 changed files with 9485 additions and 9328 deletions

View File

@@ -18,7 +18,6 @@ use eth2::types::{
use futures::{stream, StreamExt};
use parking_lot::RwLock;
use safe_arith::{ArithError, SafeArith};
use slog::{debug, error, info, warn, Logger};
use slot_clock::SlotClock;
use std::cmp::min;
use std::collections::{hash_map, BTreeMap, HashMap, HashSet};
@@ -26,6 +25,7 @@ use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use tokio::{sync::mpsc::Sender, time::sleep};
use tracing::{debug, error, info, warn};
use types::{ChainSpec, Epoch, EthSpec, Hash256, PublicKeyBytes, SelectionProof, Slot};
use validator_metrics::{get_int_gauge, set_int_gauge, ATTESTATION_DUTY};
use validator_store::{Error as ValidatorStoreError, ValidatorStore};
@@ -378,7 +378,6 @@ pub fn start_update_service<T: SlotClock + 'static, E: EthSpec>(
* Spawn the task which keeps track of local block proposal duties.
*/
let duties_service = core_duties_service.clone();
let log = core_duties_service.context.log().clone();
core_duties_service.context.executor.spawn(
async move {
loop {
@@ -394,9 +393,8 @@ pub fn start_update_service<T: SlotClock + 'static, E: EthSpec>(
if let Err(e) = poll_beacon_proposers(&duties_service, &mut block_service_tx).await
{
error!(
log,
"Failed to poll beacon proposers";
"error" => ?e
error = ?e,
"Failed to poll beacon proposers"
)
}
}
@@ -413,7 +411,6 @@ pub fn start_update_service<T: SlotClock + 'static, E: EthSpec>(
* Spawn the task which keeps track of local attestation duties.
*/
let duties_service = core_duties_service.clone();
let log = core_duties_service.context.log().clone();
core_duties_service.context.executor.spawn(
async move {
loop {
@@ -428,9 +425,8 @@ pub fn start_update_service<T: SlotClock + 'static, E: EthSpec>(
if let Err(e) = poll_beacon_attesters(&duties_service).await {
error!(
log,
"Failed to poll beacon attesters";
"error" => ?e
error = ?e,
"Failed to poll beacon attesters"
);
}
}
@@ -440,15 +436,13 @@ pub fn start_update_service<T: SlotClock + 'static, E: EthSpec>(
// Spawn the task which keeps track of local sync committee duties.
let duties_service = core_duties_service.clone();
let log = core_duties_service.context.log().clone();
core_duties_service.context.executor.spawn(
async move {
loop {
if let Err(e) = poll_sync_committee_duties(&duties_service).await {
error!(
log,
"Failed to poll sync committee duties";
"error" => ?e
error = ?e,
"Failed to poll sync committee duties"
);
}
@@ -480,8 +474,6 @@ async fn poll_validator_indices<T: SlotClock + 'static, E: EthSpec>(
&[validator_metrics::UPDATE_INDICES],
);
let log = duties_service.context.log();
// Collect *all* pubkeys for resolving indices, even those undergoing doppelganger protection.
//
// Since doppelganger protection queries rely on validator indices it is important to ensure we
@@ -547,11 +539,10 @@ async fn poll_validator_indices<T: SlotClock + 'static, E: EthSpec>(
match download_result {
Ok(Some(response)) => {
info!(
log,
"Validator exists in beacon chain";
"pubkey" => ?pubkey,
"validator_index" => response.data.index,
"fee_recipient" => fee_recipient
?pubkey,
validator_index = response.data.index,
fee_recipient,
"Validator exists in beacon chain"
);
duties_service
.validator_store
@@ -575,21 +566,15 @@ async fn poll_validator_indices<T: SlotClock + 'static, E: EthSpec>(
.insert(pubkey, next_poll_slot);
}
debug!(
log,
"Validator without index";
"pubkey" => ?pubkey,
"fee_recipient" => fee_recipient
)
debug!(?pubkey, fee_recipient, "Validator without index")
}
// Don't exit early on an error, keep attempting to resolve other indices.
Err(e) => {
error!(
log,
"Failed to resolve pubkey to index";
"error" => %e,
"pubkey" => ?pubkey,
"fee_recipient" => fee_recipient
error = %e,
?pubkey,
fee_recipient,
"Failed to resolve pubkey to index"
)
}
}
@@ -613,8 +598,6 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
&[validator_metrics::UPDATE_ATTESTERS_CURRENT_EPOCH],
);
let log = duties_service.context.log();
let current_slot = duties_service
.slot_clock
.now()
@@ -653,11 +636,10 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
.await
{
error!(
log,
"Failed to download attester duties";
"current_epoch" => current_epoch,
"request_epoch" => current_epoch,
"err" => ?e,
%current_epoch,
request_epoch = %current_epoch,
err = ?e,
"Failed to download attester duties"
)
}
@@ -675,11 +657,10 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
.await
{
error!(
log,
"Failed to download attester duties";
"current_epoch" => current_epoch,
"request_epoch" => next_epoch,
"err" => ?e,
%current_epoch,
request_epoch = %next_epoch,
err = ?e,
"Failed to download attester duties"
)
}
@@ -758,9 +739,8 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
.await;
if subscription_result.as_ref().is_ok() {
debug!(
log,
"Broadcast attestation subscriptions";
"count" => subscriptions.len(),
count = subscriptions.len(),
"Broadcast attestation subscriptions"
);
for subscription_slots in subscription_slots_to_confirm {
subscription_slots.record_successful_subscription_at(current_slot);
@@ -768,9 +748,8 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
} else if let Err(e) = subscription_result {
if e.num_errors() < duties_service.beacon_nodes.num_total().await {
warn!(
log,
"Some subscriptions failed";
"error" => %e,
error = %e,
"Some subscriptions failed"
);
// If subscriptions were sent to at least one node, regard that as a success.
// There is some redundancy built into the subscription schedule to handle failures.
@@ -779,9 +758,8 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
}
} else {
error!(
log,
"All subscriptions failed";
"error" => %e
error = %e,
"All subscriptions failed"
);
}
}
@@ -809,14 +787,11 @@ async fn poll_beacon_attesters_for_epoch<T: SlotClock + 'static, E: EthSpec>(
local_indices: &[u64],
local_pubkeys: &HashSet<PublicKeyBytes>,
) -> Result<(), Error> {
let log = duties_service.context.log();
// No need to bother the BN if we don't have any validators.
if local_indices.is_empty() {
debug!(
duties_service.context.log(),
"No validators, not downloading duties";
"epoch" => epoch,
%epoch,
"No validators, not downloading duties"
);
return Ok(());
}
@@ -895,10 +870,9 @@ async fn poll_beacon_attesters_for_epoch<T: SlotClock + 'static, E: EthSpec>(
);
debug!(
log,
"Downloaded attester duties";
"dependent_root" => %dependent_root,
"num_new_duties" => new_duties.len(),
%dependent_root,
num_new_duties = new_duties.len(),
"Downloaded attester duties"
);
// Update the duties service with the new `DutyAndProof` messages.
@@ -929,10 +903,9 @@ async fn poll_beacon_attesters_for_epoch<T: SlotClock + 'static, E: EthSpec>(
&& prior_duty_and_proof.duty == duty_and_proof.duty
{
warn!(
log,
"Redundant attester duty update";
"dependent_root" => %dependent_root,
"validator_index" => duty.validator_index,
%dependent_root,
validator_index = duty.validator_index,
"Redundant attester duty update"
);
continue;
}
@@ -940,11 +913,10 @@ async fn poll_beacon_attesters_for_epoch<T: SlotClock + 'static, E: EthSpec>(
// Using `already_warned` avoids excessive logs.
if dependent_root != *prior_dependent_root && already_warned.take().is_some() {
warn!(
log,
"Attester duties re-org";
"prior_dependent_root" => %prior_dependent_root,
"dependent_root" => %dependent_root,
"note" => "this may happen from time to time"
%prior_dependent_root,
%dependent_root,
note = "this may happen from time to time",
"Attester duties re-org"
)
}
*mut_value = (dependent_root, duty_and_proof);
@@ -1056,8 +1028,6 @@ async fn fill_in_selection_proofs<T: SlotClock + 'static, E: EthSpec>(
duties: Vec<AttesterData>,
dependent_root: Hash256,
) {
let log = duties_service.context.log();
// Sort duties by slot in a BTreeMap.
let mut duties_by_slot: BTreeMap<Slot, Vec<_>> = BTreeMap::new();
@@ -1125,20 +1095,18 @@ async fn fill_in_selection_proofs<T: SlotClock + 'static, E: EthSpec>(
// A pubkey can be missing when a validator was recently
// removed via the API.
warn!(
log,
"Missing pubkey for duty and proof";
"info" => "a validator may have recently been removed from this VC",
"pubkey" => ?pubkey,
info = "a validator may have recently been removed from this VC",
?pubkey,
"Missing pubkey for duty and proof"
);
// Do not abort the entire batch for a single failure.
continue;
}
Err(e) => {
error!(
log,
"Failed to produce duty and proof";
"error" => ?e,
"msg" => "may impair attestation duties"
error = ?e,
msg = "may impair attestation duties",
"Failed to produce duty and proof"
);
// Do not abort the entire batch for a single failure.
continue;
@@ -1163,9 +1131,8 @@ async fn fill_in_selection_proofs<T: SlotClock + 'static, E: EthSpec>(
// Our selection proofs are no longer relevant due to a reorg, abandon
// this entire background process.
debug!(
log,
"Stopping selection proof background task";
"reason" => "re-org"
reason = "re-org",
"Stopping selection proof background task"
);
return;
}
@@ -1188,11 +1155,10 @@ async fn fill_in_selection_proofs<T: SlotClock + 'static, E: EthSpec>(
let time_taken_ms =
Duration::from_secs_f64(timer.map_or(0.0, |t| t.stop_and_record())).as_millis();
debug!(
log,
"Computed attestation selection proofs";
"batch_size" => batch_size,
"lookahead_slot" => lookahead_slot,
"time_taken_ms" => time_taken_ms
batch_size,
%lookahead_slot,
time_taken_ms,
"Computed attestation selection proofs"
);
} else {
// Just sleep for one slot if we are unable to read the system clock, this gives
@@ -1234,8 +1200,6 @@ async fn poll_beacon_proposers<T: SlotClock + 'static, E: EthSpec>(
&[validator_metrics::UPDATE_PROPOSERS],
);
let log = duties_service.context.log();
let current_slot = duties_service
.slot_clock
.now()
@@ -1251,7 +1215,6 @@ async fn poll_beacon_proposers<T: SlotClock + 'static, E: EthSpec>(
&initial_block_proposers,
block_service_tx,
&duties_service.validator_store,
log,
)
.await;
@@ -1290,10 +1253,9 @@ async fn poll_beacon_proposers<T: SlotClock + 'static, E: EthSpec>(
.collect::<Vec<_>>();
debug!(
log,
"Downloaded proposer duties";
"dependent_root" => %dependent_root,
"num_relevant_duties" => relevant_duties.len(),
%dependent_root,
num_relevant_duties = relevant_duties.len(),
"Downloaded proposer duties"
);
if let Some((prior_dependent_root, _)) = duties_service
@@ -1303,20 +1265,18 @@ async fn poll_beacon_proposers<T: SlotClock + 'static, E: EthSpec>(
{
if dependent_root != prior_dependent_root {
warn!(
log,
"Proposer duties re-org";
"prior_dependent_root" => %prior_dependent_root,
"dependent_root" => %dependent_root,
"msg" => "this may happen from time to time"
%prior_dependent_root,
%dependent_root,
msg = "this may happen from time to time",
"Proposer duties re-org"
)
}
}
}
// Don't return early here, we still want to try and produce blocks using the cached values.
Err(e) => error!(
log,
"Failed to download proposer duties";
"err" => %e,
err = %e,
"Failed to download proposer duties"
),
}
@@ -1341,13 +1301,11 @@ async fn poll_beacon_proposers<T: SlotClock + 'static, E: EthSpec>(
&additional_block_producers,
block_service_tx,
&duties_service.validator_store,
log,
)
.await;
debug!(
log,
"Detected new block proposer";
"current_slot" => current_slot,
%current_slot,
"Detected new block proposer"
);
validator_metrics::inc_counter(&validator_metrics::PROPOSAL_CHANGED);
}
@@ -1368,7 +1326,6 @@ async fn notify_block_production_service<T: SlotClock + 'static, E: EthSpec>(
block_proposers: &HashSet<PublicKeyBytes>,
block_service_tx: &mut Sender<BlockServiceNotification>,
validator_store: &ValidatorStore<T, E>,
log: &Logger,
) {
let non_doppelganger_proposers = block_proposers
.iter()
@@ -1385,10 +1342,9 @@ async fn notify_block_production_service<T: SlotClock + 'static, E: EthSpec>(
.await
{
error!(
log,
"Failed to notify block service";
"current_slot" => current_slot,
"error" => %e
%current_slot,
error = %e,
"Failed to notify block service"
);
};
}