Add range sync metrics to track efficiency (#6095)

* Add more range sync metrics to track efficiency

* Add ignored blocks metrics
This commit is contained in:
Lion - dapplion
2024-07-15 20:51:59 +02:00
committed by Age Manning
parent cb1e8dc3f9
commit 4bfca8251d
8 changed files with 151 additions and 28 deletions

View File

@@ -1,4 +1,6 @@
use super::batch::{BatchInfo, BatchProcessingResult, BatchState};
use super::RangeSyncType;
use crate::metrics;
use crate::network_beacon_processor::ChainSegmentProcessId;
use crate::sync::network_context::RangeRequestId;
use crate::sync::{network_context::SyncNetworkContext, BatchOperationOutcome, BatchProcessResult};
@@ -11,6 +13,7 @@ use rand::{seq::SliceRandom, Rng};
use slog::{crit, debug, o, warn};
use std::collections::{btree_map::Entry, BTreeMap, HashSet};
use std::hash::{Hash, Hasher};
use strum::IntoStaticStr;
use types::{Epoch, EthSpec, Hash256, Slot};
/// Blocks are downloaded in batches from peers. This constant specifies how many epochs worth of
@@ -53,6 +56,13 @@ pub struct KeepChain;
pub type ChainId = u64;
pub type BatchId = Epoch;
#[derive(Debug, Copy, Clone, IntoStaticStr)]
pub enum SyncingChainType {
Head,
Finalized,
Backfill,
}
/// A chain of blocks that need to be downloaded. Peers who claim to contain the target head
/// root are grouped into the peer pool and queried for batches when downloading the
/// chain.
@@ -60,6 +70,9 @@ pub struct SyncingChain<T: BeaconChainTypes> {
/// A random id used to identify this chain.
id: ChainId,
/// SyncingChain type
pub chain_type: SyncingChainType,
/// The start of the chain segment. Any epoch previous to this one has been validated.
pub start_epoch: Epoch,
@@ -126,6 +139,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
target_head_slot: Slot,
target_head_root: Hash256,
peer_id: PeerId,
chain_type: SyncingChainType,
log: &slog::Logger,
) -> Self {
let mut peers = FnvHashMap::default();
@@ -135,6 +149,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
SyncingChain {
id,
chain_type,
start_epoch,
target_head_slot,
target_head_root,
@@ -171,6 +186,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
self.validated_batches * EPOCHS_PER_BATCH
}
/// Returns the total count of pending blocks in all the batches of this chain
pub fn pending_blocks(&self) -> usize {
self.batches
.values()
.map(|batch| batch.pending_blocks())
.sum()
}
/// Removes a peer from the chain.
/// If the peer has active batches, those are considered failed and re-requested.
pub fn remove_peer(
@@ -305,7 +328,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// result callback. This is done, because an empty batch could end a chain and the logic
// for removing chains and checking completion is in the callback.
let blocks = batch.start_processing()?;
let (blocks, duration_in_awaiting_processing) = batch.start_processing()?;
metrics::observe_duration(
&metrics::SYNCING_CHAIN_BATCH_AWAITING_PROCESSING,
duration_in_awaiting_processing,
);
let process_id = ChainSegmentProcessId::RangeBatchId(self.id, batch_id);
self.current_processing_batch = Some(batch_id);
@@ -469,10 +497,27 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// We consider three cases. Batch was successfully processed, Batch failed processing due
// to a faulty peer, or batch failed processing but the peer can't be deemed faulty.
match result {
BatchProcessResult::Success { was_non_empty } => {
BatchProcessResult::Success {
sent_blocks,
imported_blocks,
} => {
if sent_blocks > imported_blocks {
let ignored_blocks = sent_blocks - imported_blocks;
metrics::inc_counter_vec_by(
&metrics::SYNCING_CHAINS_IGNORED_BLOCKS,
&[self.chain_type.into()],
ignored_blocks as u64,
);
}
metrics::inc_counter_vec(
&metrics::SYNCING_CHAINS_PROCESSED_BATCHES,
&[self.chain_type.into()],
);
batch.processing_completed(BatchProcessingResult::Success)?;
if *was_non_empty {
// was not empty = sent_blocks > 0
if *sent_blocks > 0 {
// If the processed batch was not empty, we can validate previous unvalidated
// blocks.
self.advance_chain(network, batch_id);
@@ -515,7 +560,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
match batch.processing_completed(BatchProcessingResult::FaultyFailure)? {
BatchOperationOutcome::Continue => {
// Chain can continue. Check if it can be moved forward.
if *imported_blocks {
if *imported_blocks > 0 {
// At least one block was successfully verified and imported, so we can be sure all
// previous batches are valid and we only need to download the current failed
// batch.
@@ -1142,3 +1187,12 @@ impl RemoveChain {
)
}
}
impl From<RangeSyncType> for SyncingChainType {
fn from(value: RangeSyncType) -> Self {
match value {
RangeSyncType::Head => Self::Head,
RangeSyncType::Finalized => Self::Finalized,
}
}
}