Add timing for block availability (#5510)

* Add timing for block availability

* Attestation metrics analysis

* Prettier printing

* Add some metrics and timings to track late blocks

* Update to latest unstable

* fmt

* Merge latest unstable

* Small tweaks

* Try pushing blob timing down into verification

* Simplify for clippy
This commit is contained in:
Michael Sproul
2024-04-23 23:13:34 +10:00
committed by GitHub
parent 82b131d37f
commit 72a33604b3
12 changed files with 391 additions and 156 deletions

View File

@@ -2953,7 +2953,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
}
/// Wraps `process_block` in logic to cache the block's commitments in the processing cache
/// and evict if the block was imported or erred.
/// and evict if the block was imported or errored.
pub async fn process_block_with_early_caching<B: IntoExecutionPendingBlock<T>>(
self: &Arc<Self>,
block_root: Hash256,
@@ -2998,22 +2998,20 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
// Increment the Prometheus counter for block processing requests.
metrics::inc_counter(&metrics::BLOCK_PROCESSING_REQUESTS);
let block_slot = unverified_block.block().slot();
// Set observed time if not already set. Usually this should be set by gossip or RPC,
// but just in case we set it again here (useful for tests).
if let (Some(seen_timestamp), Some(current_slot)) =
(self.slot_clock.now_duration(), self.slot_clock.now())
{
if let Some(seen_timestamp) = self.slot_clock.now_duration() {
self.block_times_cache.write().set_time_observed(
block_root,
current_slot,
block_slot,
seen_timestamp,
None,
None,
);
}
let block_slot = unverified_block.block().slot();
// A small closure to group the verification and import errors.
let chain = self.clone();
let import_block = async move {
@@ -3024,6 +3022,15 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
)?;
publish_fn()?;
let executed_block = chain.into_executed_block(execution_pending).await?;
// Record the time it took to ask the execution layer.
if let Some(seen_timestamp) = self.slot_clock.now_duration() {
self.block_times_cache.write().set_execution_time(
block_root,
block_slot,
seen_timestamp,
)
}
match executed_block {
ExecutedBlock::Available(block) => {
self.import_available_block(Box::new(block)).await
@@ -3090,8 +3097,8 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
}
}
/// Accepts a fully-verified block and awaits on it's payload verification handle to
/// get a fully `ExecutedBlock`
/// Accepts a fully-verified block and awaits on its payload verification handle to
/// get a fully `ExecutedBlock`.
///
/// An error is returned if the verification handle couldn't be awaited.
pub async fn into_executed_block(
@@ -3224,10 +3231,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
) -> Result<AvailabilityProcessingStatus, BlockError<T::EthSpec>> {
match availability {
Availability::Available(block) => {
// This is the time since start of the slot where all the components of the block have become available
let delay =
get_slot_delay_ms(timestamp_now(), block.block.slot(), &self.slot_clock);
metrics::observe_duration(&metrics::BLOCK_AVAILABILITY_DELAY, delay);
// Block is fully available, import into fork choice
self.import_available_block(block).await
}
@@ -3256,6 +3259,15 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
consensus_context,
} = import_data;
// Record the time at which this block's blobs became available.
if let Some(blobs_available) = block.blobs_available_timestamp() {
self.block_times_cache.write().set_time_blob_observed(
block_root,
block.slot(),
blobs_available,
);
}
// import
let chain = self.clone();
let block_root = self
@@ -3396,6 +3408,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
"Early attester cache insert failed";
"error" => ?e
);
} else {
let attestable_timestamp =
self.slot_clock.now_duration().unwrap_or_default();
self.block_times_cache.write().set_time_attestable(
block_root,
signed_block.slot(),
attestable_timestamp,
)
}
} else {
warn!(
@@ -3885,25 +3905,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
);
}
// Do not store metrics if the block was > 4 slots old, this helps prevent noise during
// sync.
if block_delay_total < self.slot_clock.slot_duration() * 4 {
// Observe the delay between when we observed the block and when we imported it.
let block_delays = self.block_times_cache.read().get_block_delays(
block_root,
self.slot_clock
.start_of(current_slot)
.unwrap_or_else(|| Duration::from_secs(0)),
);
metrics::observe_duration(
&metrics::BEACON_BLOCK_IMPORTED_OBSERVED_DELAY_TIME,
block_delays
.imported
.unwrap_or_else(|| Duration::from_secs(0)),
);
}
if let Some(event_handler) = self.event_handler.as_ref() {
if event_handler.has_block_subscribers() {
event_handler.register(EventKind::Block(SseBlock {

View File

@@ -14,6 +14,7 @@ use merkle_proof::MerkleTreeError;
use slog::{debug, warn};
use ssz_derive::{Decode, Encode};
use ssz_types::VariableList;
use std::time::Duration;
use tree_hash::TreeHash;
use types::blob_sidecar::BlobIdentifier;
use types::{
@@ -214,7 +215,10 @@ impl<T: BeaconChainTypes> GossipVerifiedBlob<T> {
pub fn __assumed_valid(blob: Arc<BlobSidecar<T::EthSpec>>) -> Self {
Self {
block_root: blob.block_root(),
blob: KzgVerifiedBlob { blob },
blob: KzgVerifiedBlob {
blob,
seen_timestamp: Duration::from_secs(0),
},
}
}
pub fn id(&self) -> BlobIdentifier {
@@ -260,6 +264,8 @@ impl<T: BeaconChainTypes> GossipVerifiedBlob<T> {
#[ssz(struct_behaviour = "transparent")]
pub struct KzgVerifiedBlob<E: EthSpec> {
blob: Arc<BlobSidecar<E>>,
#[ssz(skip_serializing, skip_deserializing)]
seen_timestamp: Duration,
}
impl<E: EthSpec> PartialOrd for KzgVerifiedBlob<E> {
@@ -275,8 +281,12 @@ impl<E: EthSpec> Ord for KzgVerifiedBlob<E> {
}
impl<E: EthSpec> KzgVerifiedBlob<E> {
pub fn new(blob: Arc<BlobSidecar<E>>, kzg: &Kzg) -> Result<Self, KzgError> {
verify_kzg_for_blob(blob, kzg)
pub fn new(
blob: Arc<BlobSidecar<E>>,
kzg: &Kzg,
seen_timestamp: Duration,
) -> Result<Self, KzgError> {
verify_kzg_for_blob(blob, kzg, seen_timestamp)
}
pub fn to_blob(self) -> Arc<BlobSidecar<E>> {
self.blob
@@ -294,12 +304,18 @@ impl<E: EthSpec> KzgVerifiedBlob<E> {
pub fn blob_index(&self) -> u64 {
self.blob.index
}
pub fn seen_timestamp(&self) -> Duration {
self.seen_timestamp
}
/// Construct a `KzgVerifiedBlob` that is assumed to be valid.
///
/// This should ONLY be used for testing.
#[cfg(test)]
pub fn __assumed_valid(blob: Arc<BlobSidecar<E>>) -> Self {
Self { blob }
Self {
blob,
seen_timestamp: Duration::from_secs(0),
}
}
}
@@ -309,9 +325,13 @@ impl<E: EthSpec> KzgVerifiedBlob<E> {
pub fn verify_kzg_for_blob<E: EthSpec>(
blob: Arc<BlobSidecar<E>>,
kzg: &Kzg,
seen_timestamp: Duration,
) -> Result<KzgVerifiedBlob<E>, KzgError> {
validate_blob::<E>(kzg, &blob.blob, blob.kzg_commitment, blob.kzg_proof)?;
Ok(KzgVerifiedBlob { blob })
Ok(KzgVerifiedBlob {
blob,
seen_timestamp,
})
}
pub struct KzgVerifiedBlobList<E: EthSpec> {
@@ -322,13 +342,17 @@ impl<E: EthSpec> KzgVerifiedBlobList<E> {
pub fn new<I: IntoIterator<Item = Arc<BlobSidecar<E>>>>(
blob_list: I,
kzg: &Kzg,
seen_timestamp: Duration,
) -> Result<Self, KzgError> {
let blobs = blob_list.into_iter().collect::<Vec<_>>();
verify_kzg_for_blob_list(blobs.iter(), kzg)?;
Ok(Self {
verified_blobs: blobs
.into_iter()
.map(|blob| KzgVerifiedBlob { blob })
.map(|blob| KzgVerifiedBlob {
blob,
seen_timestamp,
})
.collect(),
})
}
@@ -374,6 +398,8 @@ pub fn validate_blob_sidecar_for_gossip<T: BeaconChainTypes>(
let blob_epoch = blob_slot.epoch(T::EthSpec::slots_per_epoch());
let signed_block_header = &blob_sidecar.signed_block_header;
let seen_timestamp = chain.slot_clock.now_duration().unwrap_or_default();
// This condition is not possible if we have received the blob from the network
// since we only subscribe to `MaxBlobsPerBlock` subnets over gossip network.
// We include this check only for completeness.
@@ -641,8 +667,8 @@ pub fn validate_blob_sidecar_for_gossip<T: BeaconChainTypes>(
.kzg
.as_ref()
.ok_or(GossipBlobError::KzgNotInitialized)?;
let kzg_verified_blob =
KzgVerifiedBlob::new(blob_sidecar, kzg).map_err(GossipBlobError::KzgError)?;
let kzg_verified_blob = KzgVerifiedBlob::new(blob_sidecar, kzg, seen_timestamp)
.map_err(GossipBlobError::KzgError)?;
Ok(GossipVerifiedBlob {
block_root,

View File

@@ -18,6 +18,9 @@ type BlockRoot = Hash256;
#[derive(Clone, Default)]
pub struct Timestamps {
pub observed: Option<Duration>,
pub all_blobs_observed: Option<Duration>,
pub execution_time: Option<Duration>,
pub attestable: Option<Duration>,
pub imported: Option<Duration>,
pub set_as_head: Option<Duration>,
}
@@ -25,8 +28,25 @@ pub struct Timestamps {
// Helps arrange delay data so it is more relevant to metrics.
#[derive(Debug, Default)]
pub struct BlockDelays {
/// Time after start of slot we saw the block.
pub observed: Option<Duration>,
/// The time after the start of the slot we saw all blobs.
pub all_blobs_observed: Option<Duration>,
/// The time it took to get verification from the EL for the block.
pub execution_time: Option<Duration>,
/// The delay from the start of the slot before the block became available
///
/// Equal to max(`observed + execution_time`, `all_blobs_observed`).
pub available: Option<Duration>,
/// Time after `available`.
pub attestable: Option<Duration>,
/// Time
/// ALSO time after `available`.
///
/// We need to use `available` again rather than `attestable` to handle the case where the block
/// does not get added to the early-attester cache.
pub imported: Option<Duration>,
/// Time after `imported`.
pub set_as_head: Option<Duration>,
}
@@ -35,14 +55,34 @@ impl BlockDelays {
let observed = times
.observed
.and_then(|observed_time| observed_time.checked_sub(slot_start_time));
let all_blobs_observed = times
.all_blobs_observed
.and_then(|all_blobs_observed| all_blobs_observed.checked_sub(slot_start_time));
let execution_time = times
.execution_time
.and_then(|execution_time| execution_time.checked_sub(times.observed?));
// Duration since UNIX epoch at which block became available.
let available_time = times.execution_time.map(|execution_time| {
std::cmp::max(execution_time, times.all_blobs_observed.unwrap_or_default())
});
// Duration from the start of the slot until the block became available.
let available_delay =
available_time.and_then(|available_time| available_time.checked_sub(slot_start_time));
let attestable = times
.attestable
.and_then(|attestable_time| attestable_time.checked_sub(slot_start_time));
let imported = times
.imported
.and_then(|imported_time| imported_time.checked_sub(times.observed?));
.and_then(|imported_time| imported_time.checked_sub(available_time?));
let set_as_head = times
.set_as_head
.and_then(|set_as_head_time| set_as_head_time.checked_sub(times.imported?));
BlockDelays {
observed,
all_blobs_observed,
execution_time,
available: available_delay,
attestable,
imported,
set_as_head,
}
@@ -109,6 +149,53 @@ impl BlockTimesCache {
}
}
pub fn set_time_blob_observed(
&mut self,
block_root: BlockRoot,
slot: Slot,
timestamp: Duration,
) {
let block_times = self
.cache
.entry(block_root)
.or_insert_with(|| BlockTimesCacheValue::new(slot));
if block_times
.timestamps
.all_blobs_observed
.map_or(true, |prev| timestamp > prev)
{
block_times.timestamps.all_blobs_observed = Some(timestamp);
}
}
pub fn set_execution_time(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) {
let block_times = self
.cache
.entry(block_root)
.or_insert_with(|| BlockTimesCacheValue::new(slot));
if block_times
.timestamps
.execution_time
.map_or(true, |prev| timestamp < prev)
{
block_times.timestamps.execution_time = Some(timestamp);
}
}
pub fn set_time_attestable(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) {
let block_times = self
.cache
.entry(block_root)
.or_insert_with(|| BlockTimesCacheValue::new(slot));
if block_times
.timestamps
.attestable
.map_or(true, |prev| timestamp < prev)
{
block_times.timestamps.attestable = Some(timestamp);
}
}
pub fn set_time_imported(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) {
let block_times = self
.cache

View File

@@ -666,8 +666,7 @@ type PayloadVerificationHandle<E> =
/// - Parent is known
/// - Signatures
/// - State root check
/// - Per block processing
/// - Blobs sidecar has been validated if present
/// - Block processing
///
/// Note: a `ExecutionPendingBlock` is not _forever_ valid to be imported, it may later become invalid
/// due to finality or some other event. A `ExecutionPendingBlock` should be imported into the

View File

@@ -1405,13 +1405,6 @@ fn observe_head_block_delays<E: EthSpec, S: SlotClock>(
// Do not store metrics if the block was > 4 slots old, this helps prevent noise during
// sync.
if !block_from_sync {
// Observe the total block delay. This is the delay between the time the slot started
// and when the block was set as head.
metrics::observe_duration(
&metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME,
block_delay_total,
);
// Observe the delay between when we imported the block and when we set the block as
// head.
let block_delays = block_times_cache.get_block_delays(
@@ -1421,34 +1414,120 @@ fn observe_head_block_delays<E: EthSpec, S: SlotClock>(
.unwrap_or_else(|| Duration::from_secs(0)),
);
metrics::observe_duration(
&metrics::BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME,
block_delays
.observed
.unwrap_or_else(|| Duration::from_secs(0)),
// Update all the metrics
// Convention here is to use "Time" to indicate the duration of the event and "Delay"
// to indicate the time since the start of the slot.
//
// Observe the total block delay. This is the delay between the time the slot started
// and when the block was set as head.
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_TOTAL,
block_delay_total.as_millis() as i64,
);
metrics::observe_duration(
&metrics::BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME,
// The time at which the beacon block was first observed to be processed
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_OBSERVED_SLOT_START,
block_delays
.observed
.unwrap_or_else(|| Duration::from_secs(0))
.as_millis() as i64,
);
// The time from the start of the slot when all blobs have been observed. Technically this
// is the time we last saw a blob related to this block/slot.
metrics::set_gauge(
&metrics::BEACON_BLOB_DELAY_ALL_OBSERVED_SLOT_START,
block_delays
.all_blobs_observed
.unwrap_or_else(|| Duration::from_secs(0))
.as_millis() as i64,
);
// The time it took to check the validity with the EL
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_EXECUTION_TIME,
block_delays
.execution_time
.unwrap_or_else(|| Duration::from_secs(0))
.as_millis() as i64,
);
// The time the block became available after the start of the slot. Available here means
// that all the blobs have arrived and the block has been verified by the execution layer.
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_AVAILABLE_SLOT_START,
block_delays
.available
.unwrap_or_else(|| Duration::from_secs(0))
.as_millis() as i64,
);
// The time the block became attestable after the start of the slot.
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_ATTESTABLE_SLOT_START,
block_delays
.attestable
.unwrap_or_else(|| Duration::from_secs(0))
.as_millis() as i64,
);
// The time the block was imported since becoming available.
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_IMPORTED_TIME,
block_delays
.imported
.unwrap_or_else(|| Duration::from_secs(0))
.as_millis() as i64,
);
// The time the block was imported and setting it as head
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_HEAD_IMPORTED_TIME,
block_delays
.set_as_head
.unwrap_or_else(|| Duration::from_secs(0)),
.unwrap_or_else(|| Duration::from_secs(0))
.as_millis() as i64,
);
// If the block was enshrined as head too late for attestations to be created for it,
// log a debug warning and increment a metric.
let format_delay = |delay: &Option<Duration>| {
delay.map_or("unknown".to_string(), |d| format!("{}", d.as_millis()))
};
if late_head {
metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL);
metrics::inc_counter(&metrics::BEACON_BLOCK_DELAY_HEAD_SLOT_START_EXCEEDED_TOTAL);
debug!(
log,
"Delayed head block";
"block_root" => ?head_block_root,
"proposer_index" => head_block_proposer_index,
"slot" => head_block_slot,
"block_delay" => ?block_delay_total,
"observed_delay" => ?block_delays.observed,
"imported_delay" => ?block_delays.imported,
"set_as_head_delay" => ?block_delays.set_as_head,
"total_delay_ms" => block_delay_total.as_millis(),
"observed_delay_ms" => format_delay(&block_delays.observed),
"blob_delay_ms" => format_delay(&block_delays.all_blobs_observed),
"execution_time_ms" => format_delay(&block_delays.execution_time),
"available_delay_ms" => format_delay(&block_delays.available),
"attestable_delay_ms" => format_delay(&block_delays.attestable),
"imported_time_ms" => format_delay(&block_delays.imported),
"set_as_head_time_ms" => format_delay(&block_delays.set_as_head),
);
} else {
debug!(
log,
"On-time head block";
"block_root" => ?head_block_root,
"proposer_index" => head_block_proposer_index,
"slot" => head_block_slot,
"total_delay_ms" => block_delay_total.as_millis(),
"observed_delay_ms" => format_delay(&block_delays.observed),
"blob_delay_ms" => format_delay(&block_delays.all_blobs_observed),
"execution_time_ms" => format_delay(&block_delays.execution_time),
"available_delay_ms" => format_delay(&block_delays.available),
"attestable_delay_ms" => format_delay(&block_delays.attestable),
"imported_time_ms" => format_delay(&block_delays.imported),
"set_as_head_time_ms" => format_delay(&block_delays.set_as_head),
);
}
}

View File

@@ -14,6 +14,7 @@ use std::fmt;
use std::fmt::Debug;
use std::num::NonZeroUsize;
use std::sync::Arc;
use std::time::Duration;
use task_executor::TaskExecutor;
use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList};
use types::{BlobSidecarList, ChainSpec, Epoch, EthSpec, Hash256, SignedBeaconBlock};
@@ -176,8 +177,14 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
return Err(AvailabilityCheckError::KzgNotInitialized);
};
let verified_blobs = KzgVerifiedBlobList::new(Vec::from(blobs).into_iter().flatten(), kzg)
.map_err(AvailabilityCheckError::Kzg)?;
let seen_timestamp = self
.slot_clock
.now_duration()
.ok_or(AvailabilityCheckError::SlotClockError)?;
let verified_blobs =
KzgVerifiedBlobList::new(Vec::from(blobs).into_iter().flatten(), kzg, seen_timestamp)
.map_err(AvailabilityCheckError::Kzg)?;
self.availability_cache
.put_kzg_verified_blobs(block_root, verified_blobs)
@@ -225,6 +232,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
block_root,
block,
blobs: None,
blobs_available_timestamp: None,
}))
}
}
@@ -244,6 +252,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
block_root,
block,
blobs: verified_blobs,
blobs_available_timestamp: None,
}))
}
}
@@ -289,6 +298,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
block_root,
block,
blobs: None,
blobs_available_timestamp: None,
}))
}
}
@@ -303,6 +313,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
block_root,
block,
blobs: verified_blobs,
blobs_available_timestamp: None,
}))
}
}
@@ -462,6 +473,8 @@ pub struct AvailableBlock<E: EthSpec> {
block_root: Hash256,
block: Arc<SignedBeaconBlock<E>>,
blobs: Option<BlobSidecarList<E>>,
/// Timestamp at which this block first became available (UNIX timestamp, time since 1970).
blobs_available_timestamp: Option<Duration>,
}
impl<E: EthSpec> AvailableBlock<E> {
@@ -474,6 +487,7 @@ impl<E: EthSpec> AvailableBlock<E> {
block_root,
block,
blobs,
blobs_available_timestamp: None,
}
}
@@ -488,6 +502,10 @@ impl<E: EthSpec> AvailableBlock<E> {
self.blobs.as_ref()
}
pub fn blobs_available_timestamp(&self) -> Option<Duration> {
self.blobs_available_timestamp
}
pub fn deconstruct(
self,
) -> (
@@ -499,6 +517,7 @@ impl<E: EthSpec> AvailableBlock<E> {
block_root,
block,
blobs,
blobs_available_timestamp: _,
} = self;
(block_root, block, blobs)
}

View File

@@ -19,6 +19,7 @@ pub enum Error {
ParentStateMissing(Hash256),
BlockReplayError(state_processing::BlockReplayError),
RebuildingStateCaches(BeaconStateError),
SlotClockError,
}
pub enum ErrorCategory {
@@ -39,7 +40,8 @@ impl Error {
| Error::Unexpected
| Error::ParentStateMissing(_)
| Error::BlockReplayError(_)
| Error::RebuildingStateCaches(_) => ErrorCategory::Internal,
| Error::RebuildingStateCaches(_)
| Error::SlotClockError => ErrorCategory::Internal,
Error::Kzg(_)
| Error::BlobIndexInvalid(_)
| Error::KzgCommitmentMismatch { .. }

View File

@@ -204,6 +204,12 @@ impl<E: EthSpec> PendingComponents<E> {
executed_block,
} = self;
let blobs_available_timestamp = verified_blobs
.iter()
.flatten()
.map(|blob| blob.seen_timestamp())
.max();
let Some(diet_executed_block) = executed_block else {
return Err(AvailabilityCheckError::Unexpected);
};
@@ -231,6 +237,7 @@ impl<E: EthSpec> PendingComponents<E> {
block_root,
block,
blobs: Some(verified_blobs),
blobs_available_timestamp,
};
Ok(Availability::Available(Box::new(
AvailableExecutedBlock::new(available_block, import_data, payload_verification_outcome),

View File

@@ -847,37 +847,55 @@ lazy_static! {
"Number of attester slashings seen",
&["src", "validator"]
);
}
// Prevent recursion limit
lazy_static! {
/*
* Block Delay Metrics
*/
pub static ref BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_block_observed_slot_start_delay_time",
"Duration between the start of the block's slot and the time the block was observed.",
// [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50]
decimal_buckets(-1,2)
);
pub static ref BEACON_BLOCK_IMPORTED_OBSERVED_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_block_imported_observed_delay_time",
"Duration between the time the block was observed and the time when it was imported.",
// [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5]
decimal_buckets(-2,0)
);
pub static ref BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_block_head_imported_delay_time",
"Duration between the time the block was imported and the time when it was set as head.",
// [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5]
decimal_buckets(-2,-1)
);
pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_block_head_slot_start_delay_time",
pub static ref BEACON_BLOCK_DELAY_TOTAL: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_total",
"Duration between the start of the block's slot and the time when it was set as head.",
// [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50]
decimal_buckets(-1,2)
);
pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_block_head_slot_start_delay_exceeded_total",
"Triggered when the duration between the start of the block's slot and the current time \
pub static ref BEACON_BLOCK_DELAY_OBSERVED_SLOT_START: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_observed_slot_start",
"Duration between the start of the block's slot and the time the block was observed.",
);
pub static ref BEACON_BLOB_DELAY_ALL_OBSERVED_SLOT_START: Result<IntGauge> = try_create_int_gauge(
"beacon_blob_delay_all_observed_slot_start",
"Duration between the start of the block's slot and the time the block was observed.",
);
pub static ref BEACON_BLOCK_DELAY_EXECUTION_TIME: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_execution_time",
"The duration in verifying the block with the execution layer.",
);
pub static ref BEACON_BLOCK_DELAY_AVAILABLE_SLOT_START: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_available_slot_start",
"Duration between the time that block became available and the start of the slot.",
);
pub static ref BEACON_BLOCK_DELAY_ATTESTABLE_SLOT_START: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_attestable_slot_start",
"Duration between the time that block became attestable and the start of the slot.",
);
pub static ref BEACON_BLOCK_DELAY_IMPORTED_TIME: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_imported_time",
"Duration between the time the block became available and the time when it was imported.",
);
pub static ref BEACON_BLOCK_DELAY_HEAD_IMPORTED_TIME: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_head_imported_time",
"Duration between the time that block was imported and the time when it was set as head.",
);
pub static ref BEACON_BLOCK_DELAY_HEAD_SLOT_START_EXCEEDED_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_block_delay_head_slot_start_exceeded_total",
"A counter that is triggered when the duration between the start of the block's slot and the current time \
will result in failed attestations.",
);
@@ -1130,11 +1148,9 @@ lazy_static! {
/*
* Availability related metrics
*/
pub static ref BLOCK_AVAILABILITY_DELAY: Result<Histogram> = try_create_histogram_with_buckets(
pub static ref BLOCK_AVAILABILITY_DELAY: Result<IntGauge> = try_create_int_gauge(
"block_availability_delay",
"Duration between start of the slot and the time at which all components of the block are available.",
// Create a custom bucket list for greater granularity in block delay
Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0])
);
/*

View File

@@ -24,7 +24,7 @@ use payload_status::process_payload_status;
pub use payload_status::PayloadStatus;
use sensitive_url::SensitiveUrl;
use serde::{Deserialize, Serialize};
use slog::{crit, debug, error, info, trace, warn, Logger};
use slog::{crit, debug, error, info, warn, Logger};
use slot_clock::SlotClock;
use std::collections::HashMap;
use std::fmt;
@@ -1331,15 +1331,11 @@ impl<E: EthSpec> ExecutionLayer<E> {
&metrics::EXECUTION_LAYER_REQUEST_TIMES,
&[metrics::NEW_PAYLOAD],
);
let timer = std::time::Instant::now();
let block_number = new_payload_request.block_number();
let block_hash = new_payload_request.block_hash();
trace!(
self.log(),
"Issuing engine_newPayload";
"parent_hash" => ?new_payload_request.parent_hash(),
"block_hash" => ?block_hash,
"block_number" => ?new_payload_request.block_number(),
);
let parent_hash = new_payload_request.parent_hash();
let result = self
.engine()
@@ -1347,9 +1343,19 @@ impl<E: EthSpec> ExecutionLayer<E> {
.await;
if let Ok(status) = &result {
let status_str = <&'static str>::from(status.status);
metrics::inc_counter_vec(
&metrics::EXECUTION_LAYER_PAYLOAD_STATUS,
&["new_payload", status.status.into()],
&["new_payload", status_str],
);
debug!(
self.log(),
"Processed engine_newPayload";
"status" => status_str,
"parent_hash" => ?parent_hash,
"block_hash" => ?block_hash,
"block_number" => block_number,
"response_time_ms" => timer.elapsed().as_millis()
);
}
*self.inner.last_new_payload_errored.write().await = result.is_err();

View File

@@ -248,50 +248,41 @@ lazy_static! {
/*
* Block Delay Metrics
*/
pub static ref BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_block_gossip_propagation_verification_delay_time",
"Duration between when the block is received and when it is verified for propagation.",
// [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5]
decimal_buckets(-3,-1)
pub static ref BEACON_BLOCK_DELAY_GOSSIP: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_gossip",
"The first time we see this block from gossip as a delay from the start of the slot"
);
pub static ref BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_block_gossip_slot_start_delay_time",
"Duration between when the block is received and the start of the slot it belongs to.",
// Create a custom bucket list for greater granularity in block delay
Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0])
// NOTE: Previous values, which we may want to switch back to.
// [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50]
//decimal_buckets(-1,2)
pub static ref BEACON_BLOCK_DELAY_GOSSIP_VERIFICATION: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_gossip_verification",
"Keeps track of the time delay from the start of the slot to the point we propagate the block"
);
pub static ref BEACON_BLOCK_LAST_DELAY: Result<IntGauge> = try_create_int_gauge(
"beacon_block_last_delay",
"Keeps track of the last block's delay from the start of the slot"
pub static ref BEACON_BLOCK_DELAY_FULL_VERIFICATION: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_full_verification",
"The time it takes to verify a beacon block."
);
pub static ref BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_block_gossip_arrived_late_total",
pub static ref BEACON_BLOCK_DELAY_GOSSIP_ARRIVED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_block_delay_gossip_arrived_late_total",
"Count of times when a gossip block arrived from the network later than the attestation deadline.",
);
/*
* Blob Delay Metrics
*/
pub static ref BEACON_BLOB_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_blob_gossip_propagation_verification_delay_time",
"Duration between when the blob is received over gossip and when it is verified for propagation.",
// [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5]
decimal_buckets(-3,-1)
pub static ref BEACON_BLOB_DELAY_GOSSIP: Result<IntGauge> = try_create_int_gauge(
"beacon_blob_delay_gossip_last_delay",
"The first time we see this blob as a delay from the start of the slot"
);
pub static ref BEACON_BLOB_GOSSIP_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_blob_gossip_slot_start_delay_time",
"Duration between when the blob is received over gossip and the start of the slot it belongs to.",
// Create a custom bucket list for greater granularity in block delay
Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0])
// NOTE: Previous values, which we may want to switch back to.
// [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50]
//decimal_buckets(-1,2)
pub static ref BEACON_BLOB_DELAY_GOSSIP_VERIFICATION: Result<IntGauge> = try_create_int_gauge(
"beacon_blob_delay_gossip_verification",
"Keeps track of the time delay from the start of the slot to the point we propagate the blob"
);
pub static ref BEACON_BLOB_DELAY_FULL_VERIFICATION: Result<IntGauge> = try_create_int_gauge(
"beacon_blob_last_full_verification_delay",
"The time it takes to verify a beacon blob"
);
pub static ref BEACON_BLOB_RPC_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_blob_rpc_slot_start_delay_time",
"Duration between when a blob is received over rpc and the start of the slot it belongs to.",
@@ -302,10 +293,6 @@ lazy_static! {
//decimal_buckets(-1,2)
);
pub static ref BEACON_BLOB_LAST_DELAY: Result<IntGauge> = try_create_int_gauge(
"beacon_blob_last_delay",
"Keeps track of the last blob's delay from the start of the slot"
);
pub static ref BEACON_BLOB_GOSSIP_ARRIVED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_blob_gossip_arrived_late_total",

View File

@@ -27,7 +27,7 @@ use std::fs;
use std::io::Write;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use store::hot_cold_store::HotColdDBError;
use tokio::sync::mpsc;
use types::{
@@ -615,8 +615,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let commitment = blob_sidecar.kzg_commitment;
let delay = get_slot_delay_ms(seen_duration, slot, &self.chain.slot_clock);
// Log metrics to track delay from other nodes on the network.
metrics::observe_duration(&metrics::BEACON_BLOB_GOSSIP_SLOT_START_DELAY_TIME, delay);
metrics::set_gauge(&metrics::BEACON_BLOB_LAST_DELAY, delay.as_millis() as i64);
metrics::set_gauge(&metrics::BEACON_BLOB_DELAY_GOSSIP, delay.as_millis() as i64);
match self
.chain
.verify_blob_sidecar_for_gossip(blob_sidecar, blob_index)
@@ -654,9 +653,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
.ok()
.and_then(|now| now.checked_sub(seen_duration))
{
metrics::observe_duration(
&metrics::BEACON_BLOB_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME,
duration,
metrics::set_gauge(
&metrics::BEACON_BLOB_DELAY_GOSSIP_VERIFICATION,
duration.as_millis() as i64,
);
}
self.process_gossip_verified_blob(peer_id, gossip_verified_blob, seen_duration)
@@ -747,9 +746,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
self: &Arc<Self>,
peer_id: PeerId,
verified_blob: GossipVerifiedBlob<T>,
// This value is not used presently, but it might come in handy for debugging.
_seen_duration: Duration,
) {
let processing_start_time = Instant::now();
let block_root = verified_blob.block_root();
let blob_slot = verified_blob.slot();
let blob_index = verified_blob.id().index;
@@ -764,6 +763,11 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"block_root" => %block_root
);
self.chain.recompute_head_at_current_slot().await;
metrics::set_gauge(
&metrics::BEACON_BLOB_DELAY_FULL_VERIFICATION,
processing_start_time.elapsed().as_millis() as i64,
);
}
Ok(AvailabilityProcessingStatus::MissingComponents(slot, block_root)) => {
trace!(
@@ -865,12 +869,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let block_delay =
get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock);
// Log metrics to track delay from other nodes on the network.
metrics::observe_duration(
&metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME,
block_delay,
);
metrics::set_gauge(
&metrics::BEACON_BLOCK_LAST_DELAY,
&metrics::BEACON_BLOCK_DELAY_GOSSIP,
block_delay.as_millis() as i64,
);
@@ -898,7 +899,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let verified_block = match verification_result {
Ok(verified_block) => {
if block_delay >= self.chain.slot_clock.unagg_attestation_production_delay() {
metrics::inc_counter(&metrics::BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL);
metrics::inc_counter(&metrics::BEACON_BLOCK_DELAY_GOSSIP_ARRIVED_LATE_TOTAL);
debug!(
self.log,
"Gossip block arrived late";
@@ -923,9 +924,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
.ok()
.and_then(|now| now.checked_sub(seen_duration))
{
metrics::observe_duration(
&metrics::BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME,
duration,
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_GOSSIP_VERIFICATION,
duration.as_millis() as i64,
);
}
@@ -1130,9 +1131,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
verified_block: GossipVerifiedBlock<T>,
reprocess_tx: mpsc::Sender<ReprocessQueueMessage>,
invalid_block_storage: InvalidBlockStorage,
// This value is not used presently, but it might come in handy for debugging.
_seen_duration: Duration,
) {
let processing_start_time = Instant::now();
let block = verified_block.block.block_cloned();
let block_root = verified_block.block_root;
@@ -1168,6 +1169,11 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
);
self.chain.recompute_head_at_current_slot().await;
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_FULL_VERIFICATION,
processing_start_time.elapsed().as_millis() as i64,
);
}
Ok(AvailabilityProcessingStatus::MissingComponents(slot, block_root)) => {
trace!(