Add timing for block availability (#5510)

* Add timing for block availability

* Attestation metrics analysis

* Prettier printing

* Add some metrics and timings to track late blocks

* Update to latest unstable

* fmt

* Merge latest unstable

* Small tweaks

* Try pushing blob timing down into verification

* Simplify for clippy
This commit is contained in:
Michael Sproul
2024-04-23 23:13:34 +10:00
committed by GitHub
parent 82b131d37f
commit 72a33604b3
12 changed files with 391 additions and 156 deletions

View File

@@ -248,50 +248,41 @@ lazy_static! {
/*
* Block Delay Metrics
*/
pub static ref BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_block_gossip_propagation_verification_delay_time",
"Duration between when the block is received and when it is verified for propagation.",
// [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5]
decimal_buckets(-3,-1)
pub static ref BEACON_BLOCK_DELAY_GOSSIP: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_gossip",
"The first time we see this block from gossip as a delay from the start of the slot"
);
pub static ref BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_block_gossip_slot_start_delay_time",
"Duration between when the block is received and the start of the slot it belongs to.",
// Create a custom bucket list for greater granularity in block delay
Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0])
// NOTE: Previous values, which we may want to switch back to.
// [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50]
//decimal_buckets(-1,2)
pub static ref BEACON_BLOCK_DELAY_GOSSIP_VERIFICATION: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_gossip_verification",
"Keeps track of the time delay from the start of the slot to the point we propagate the block"
);
pub static ref BEACON_BLOCK_LAST_DELAY: Result<IntGauge> = try_create_int_gauge(
"beacon_block_last_delay",
"Keeps track of the last block's delay from the start of the slot"
pub static ref BEACON_BLOCK_DELAY_FULL_VERIFICATION: Result<IntGauge> = try_create_int_gauge(
"beacon_block_delay_full_verification",
"The time it takes to verify a beacon block."
);
pub static ref BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_block_gossip_arrived_late_total",
pub static ref BEACON_BLOCK_DELAY_GOSSIP_ARRIVED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_block_delay_gossip_arrived_late_total",
"Count of times when a gossip block arrived from the network later than the attestation deadline.",
);
/*
* Blob Delay Metrics
*/
pub static ref BEACON_BLOB_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_blob_gossip_propagation_verification_delay_time",
"Duration between when the blob is received over gossip and when it is verified for propagation.",
// [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5]
decimal_buckets(-3,-1)
pub static ref BEACON_BLOB_DELAY_GOSSIP: Result<IntGauge> = try_create_int_gauge(
"beacon_blob_delay_gossip_last_delay",
"The first time we see this blob as a delay from the start of the slot"
);
pub static ref BEACON_BLOB_GOSSIP_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_blob_gossip_slot_start_delay_time",
"Duration between when the blob is received over gossip and the start of the slot it belongs to.",
// Create a custom bucket list for greater granularity in block delay
Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0])
// NOTE: Previous values, which we may want to switch back to.
// [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50]
//decimal_buckets(-1,2)
pub static ref BEACON_BLOB_DELAY_GOSSIP_VERIFICATION: Result<IntGauge> = try_create_int_gauge(
"beacon_blob_delay_gossip_verification",
"Keeps track of the time delay from the start of the slot to the point we propagate the blob"
);
pub static ref BEACON_BLOB_DELAY_FULL_VERIFICATION: Result<IntGauge> = try_create_int_gauge(
"beacon_blob_last_full_verification_delay",
"The time it takes to verify a beacon blob"
);
pub static ref BEACON_BLOB_RPC_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram_with_buckets(
"beacon_blob_rpc_slot_start_delay_time",
"Duration between when a blob is received over rpc and the start of the slot it belongs to.",
@@ -302,10 +293,6 @@ lazy_static! {
//decimal_buckets(-1,2)
);
pub static ref BEACON_BLOB_LAST_DELAY: Result<IntGauge> = try_create_int_gauge(
"beacon_blob_last_delay",
"Keeps track of the last blob's delay from the start of the slot"
);
pub static ref BEACON_BLOB_GOSSIP_ARRIVED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_blob_gossip_arrived_late_total",

View File

@@ -27,7 +27,7 @@ use std::fs;
use std::io::Write;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use store::hot_cold_store::HotColdDBError;
use tokio::sync::mpsc;
use types::{
@@ -615,8 +615,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let commitment = blob_sidecar.kzg_commitment;
let delay = get_slot_delay_ms(seen_duration, slot, &self.chain.slot_clock);
// Log metrics to track delay from other nodes on the network.
metrics::observe_duration(&metrics::BEACON_BLOB_GOSSIP_SLOT_START_DELAY_TIME, delay);
metrics::set_gauge(&metrics::BEACON_BLOB_LAST_DELAY, delay.as_millis() as i64);
metrics::set_gauge(&metrics::BEACON_BLOB_DELAY_GOSSIP, delay.as_millis() as i64);
match self
.chain
.verify_blob_sidecar_for_gossip(blob_sidecar, blob_index)
@@ -654,9 +653,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
.ok()
.and_then(|now| now.checked_sub(seen_duration))
{
metrics::observe_duration(
&metrics::BEACON_BLOB_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME,
duration,
metrics::set_gauge(
&metrics::BEACON_BLOB_DELAY_GOSSIP_VERIFICATION,
duration.as_millis() as i64,
);
}
self.process_gossip_verified_blob(peer_id, gossip_verified_blob, seen_duration)
@@ -747,9 +746,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
self: &Arc<Self>,
peer_id: PeerId,
verified_blob: GossipVerifiedBlob<T>,
// This value is not used presently, but it might come in handy for debugging.
_seen_duration: Duration,
) {
let processing_start_time = Instant::now();
let block_root = verified_blob.block_root();
let blob_slot = verified_blob.slot();
let blob_index = verified_blob.id().index;
@@ -764,6 +763,11 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"block_root" => %block_root
);
self.chain.recompute_head_at_current_slot().await;
metrics::set_gauge(
&metrics::BEACON_BLOB_DELAY_FULL_VERIFICATION,
processing_start_time.elapsed().as_millis() as i64,
);
}
Ok(AvailabilityProcessingStatus::MissingComponents(slot, block_root)) => {
trace!(
@@ -865,12 +869,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let block_delay =
get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock);
// Log metrics to track delay from other nodes on the network.
metrics::observe_duration(
&metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME,
block_delay,
);
metrics::set_gauge(
&metrics::BEACON_BLOCK_LAST_DELAY,
&metrics::BEACON_BLOCK_DELAY_GOSSIP,
block_delay.as_millis() as i64,
);
@@ -898,7 +899,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let verified_block = match verification_result {
Ok(verified_block) => {
if block_delay >= self.chain.slot_clock.unagg_attestation_production_delay() {
metrics::inc_counter(&metrics::BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL);
metrics::inc_counter(&metrics::BEACON_BLOCK_DELAY_GOSSIP_ARRIVED_LATE_TOTAL);
debug!(
self.log,
"Gossip block arrived late";
@@ -923,9 +924,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
.ok()
.and_then(|now| now.checked_sub(seen_duration))
{
metrics::observe_duration(
&metrics::BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME,
duration,
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_GOSSIP_VERIFICATION,
duration.as_millis() as i64,
);
}
@@ -1130,9 +1131,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
verified_block: GossipVerifiedBlock<T>,
reprocess_tx: mpsc::Sender<ReprocessQueueMessage>,
invalid_block_storage: InvalidBlockStorage,
// This value is not used presently, but it might come in handy for debugging.
_seen_duration: Duration,
) {
let processing_start_time = Instant::now();
let block = verified_block.block.block_cloned();
let block_root = verified_block.block_root;
@@ -1168,6 +1169,11 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
);
self.chain.recompute_head_at_current_slot().await;
metrics::set_gauge(
&metrics::BEACON_BLOCK_DELAY_FULL_VERIFICATION,
processing_start_time.elapsed().as_millis() as i64,
);
}
Ok(AvailabilityProcessingStatus::MissingComponents(slot, block_root)) => {
trace!(