Add BlockTimesCache to allow additional block delay metrics (#2546)

## Issue Addressed Closes #2528 ## Proposed Changes - Add `BlockTimesCache` to provide block timing information to `BeaconChain`. This allows additional metrics to be calculated for blocks that are set as head too late. - Thread the `seen_timestamp` of blocks received from RPC responses (except blocks from syncing) through to the sync manager, similar to what is done for blocks from gossip. ## Additional Info This provides the following additional metrics: - `BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME` - The delay between the start of the slot and when the block was first observed. - `BEACON_BLOCK_IMPORTED_OBSERVED_DELAY_TIME` - The delay between when the block was first observed and when the block was imported. - `BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME` - The delay between when the block was imported and when the block was set as head. The metric `BEACON_BLOCK_IMPORTED_SLOT_START_DELAY_TIME` was removed. A log is produced when a block is set as head too late, e.g.: ``` Aug 27 03:46:39.006 DEBG Delayed head block set_as_head_delay: Some(21.731066ms), imported_delay: Some(119.929934ms), observed_delay: Some(3.864596988s), block_delay: 4.006257988s, slot: 1931331, proposer_index: 24294, block_root: 0x937602c89d3143afa89088a44bdf4b4d0d760dad082abacb229495c048648a9e, service: beacon ```
2026-04-27 01:33:33 +00:00 · 2021-09-30 04:31:41 +00:00
parent 70441aa554
commit 4c510f8f6b
14 changed files with 389 additions and 43 deletions
--- a/beacon_node/beacon_chain/src/beacon_chain.rs
+++ b/beacon_node/beacon_chain/src/beacon_chain.rs
@@ -5,6 +5,7 @@ use crate::attestation_verification::{
 };
 use crate::attester_cache::{AttesterCache, AttesterCacheKey};
 use crate::beacon_proposer_cache::BeaconProposerCache;
+use crate::block_times_cache::BlockTimesCache;
 use crate::block_verification::{
    check_block_is_finalized_descendant, check_block_relevancy, get_block_root,
    signature_verify_chain_segment, BlockError, FullyVerifiedBlock, GossipVerifiedBlock,
@@ -38,14 +39,16 @@ use crate::sync_committee_verification::{
 };
 use crate::timeout_rw_lock::TimeoutRwLock;
 use crate::validator_monitor::{
-    get_block_delay_ms, get_slot_delay_ms, timestamp_now, ValidatorMonitor,
+    get_slot_delay_ms, timestamp_now, ValidatorMonitor,
    HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS,
 };
 use crate::validator_pubkey_cache::ValidatorPubkeyCache;
 use crate::BeaconForkChoiceStore;
 use crate::BeaconSnapshot;
 use crate::{metrics, BeaconChainError};
-use eth2::types::{EventKind, SseBlock, SseChainReorg, SseFinalizedCheckpoint, SseHead, SyncDuty};
+use eth2::types::{
+    EventKind, SseBlock, SseChainReorg, SseFinalizedCheckpoint, SseHead, SseLateHead, SyncDuty,
+};
 use fork_choice::ForkChoice;
 use futures::channel::mpsc::Sender;
 use itertools::process_results;
@@ -298,6 +301,8 @@ pub struct BeaconChain<T: BeaconChainTypes> {
    pub(crate) validator_pubkey_cache: TimeoutRwLock<ValidatorPubkeyCache<T>>,
    /// A cache used when producing attestations.
    pub(crate) attester_cache: Arc<AttesterCache>,
+    /// A cache used to keep track of various block timings.
+    pub block_times_cache: Arc<RwLock<BlockTimesCache>>,
    /// A list of any hard-coded forks that have been disabled.
    pub disabled_forks: Vec<String>,
    /// Sender given to tasks, so that if they encounter a state in which execution cannot
@@ -2538,14 +2543,9 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
        // This prevents inconsistency between the two at the expense of concurrency.
        drop(fork_choice);

-        // Log metrics to track the delay between when the block was made and when we imported it.
-        //
        // We're declaring the block "imported" at this point, since fork choice and the DB know
        // about it.
-        metrics::observe_duration(
-            &metrics::BEACON_BLOCK_IMPORTED_SLOT_START_DELAY_TIME,
-            get_block_delay_ms(timestamp_now(), block.to_ref(), &self.slot_clock),
-        );
+        let block_time_imported = timestamp_now();

        let parent_root = block.parent_root();
        let slot = block.slot();
@@ -2590,6 +2590,38 @@ impl<T: BeaconChainTypes> BeaconChain<T> {

        metrics::inc_counter(&metrics::BLOCK_PROCESSING_SUCCESSES);

+        let block_delay_total = get_slot_delay_ms(block_time_imported, slot, &self.slot_clock);
+
+        // Do not write to the cache for blocks older than 2 epochs, this helps reduce writes to
+        // the cache during sync.
+        if block_delay_total < self.slot_clock.slot_duration() * 64 {
+            // Store the timestamp of the block being imported into the cache.
+            self.block_times_cache.write().set_time_imported(
+                block_root,
+                current_slot,
+                block_time_imported,
+            );
+        }
+
+        // Do not store metrics if the block was > 4 slots old, this helps prevent noise during
+        // sync.
+        if block_delay_total < self.slot_clock.slot_duration() * 4 {
+            // Observe the delay between when we observed the block and when we imported it.
+            let block_delays = self.block_times_cache.read().get_block_delays(
+                block_root,
+                self.slot_clock
+                    .start_of(current_slot)
+                    .unwrap_or_else(|| Duration::from_secs(0)),
+            );
+
+            metrics::observe_duration(
+                &metrics::BEACON_BLOCK_IMPORTED_OBSERVED_DELAY_TIME,
+                block_delays
+                    .imported
+                    .unwrap_or_else(|| Duration::from_secs(0)),
+            );
+        }
+
        Ok(block_root)
    }

@@ -2998,7 +3030,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {

        let update_head_timer = metrics::start_timer(&metrics::UPDATE_HEAD_TIMES);

-        // These fields are used for server-sent events
+        // These fields are used for server-sent events.
        let state_root = new_head.beacon_state_root();
        let head_slot = new_head.beacon_state.slot();
        let target_epoch_start_slot = new_head
@@ -3010,6 +3042,12 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
            .previous_epoch()
            .start_slot(T::EthSpec::slots_per_epoch());
        let head_proposer_index = new_head.beacon_block.message().proposer_index();
+        let proposer_graffiti = new_head
+            .beacon_block
+            .message()
+            .body()
+            .graffiti()
+            .as_utf8_lossy();

        drop(lag_timer);

@@ -3020,35 +3058,83 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
            .try_write_for(HEAD_LOCK_TIMEOUT)
            .ok_or(Error::CanonicalHeadLockTimeout)? = new_head;

+        // The block has now been set as head so we can record times and delays.
        metrics::stop_timer(update_head_timer);

-        let block_delay = get_slot_delay_ms(timestamp_now(), head_slot, &self.slot_clock);
+        let block_time_set_as_head = timestamp_now();

-        // Observe the delay between the start of the slot and when we set the block as head.
-        metrics::observe_duration(
-            &metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME,
-            block_delay,
-        );
+        // Calculate the total delay between the start of the slot and when it was set as head.
+        let block_delay_total =
+            get_slot_delay_ms(block_time_set_as_head, head_slot, &self.slot_clock);

-        // If the block was enshrined as head too late for attestations to be created for it, log a
-        // debug warning and increment a metric.
-        //
-        // Don't create this log if the block was > 4 slots old, this helps prevent noise during
-        // sync.
-        if block_delay >= self.slot_clock.unagg_attestation_production_delay()
-            && block_delay < self.slot_clock.slot_duration() * 4
-        {
-            metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL);
-            debug!(
-                self.log,
-                "Delayed head block";
-                "block_root" => ?beacon_block_root,
-                "proposer_index" => head_proposer_index,
-                "slot" => head_slot,
-                "block_delay" => ?block_delay,
+        // Do not write to the cache for blocks older than 2 epochs, this helps reduce writes to
+        // the cache during sync.
+        if block_delay_total < self.slot_clock.slot_duration() * 64 {
+            self.block_times_cache.write().set_time_set_as_head(
+                beacon_block_root,
+                current_head.slot,
+                block_time_set_as_head,
            );
        }

+        // If a block comes in from over 4 slots ago, it is most likely a block from sync.
+        let block_from_sync = block_delay_total > self.slot_clock.slot_duration() * 4;
+
+        // Determine whether the block has been set as head too late for proper attestation
+        // production.
+        let late_head = block_delay_total >= self.slot_clock.unagg_attestation_production_delay();
+
+        // Do not store metrics if the block was > 4 slots old, this helps prevent noise during
+        // sync.
+        if !block_from_sync {
+            // Observe the total block delay. This is the delay between the time the slot started
+            // and when the block was set as head.
+            metrics::observe_duration(
+                &metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME,
+                block_delay_total,
+            );
+
+            // Observe the delay between when we imported the block and when we set the block as
+            // head.
+            let block_delays = self.block_times_cache.read().get_block_delays(
+                beacon_block_root,
+                self.slot_clock
+                    .start_of(head_slot)
+                    .unwrap_or_else(|| Duration::from_secs(0)),
+            );
+
+            metrics::observe_duration(
+                &metrics::BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME,
+                block_delays
+                    .observed
+                    .unwrap_or_else(|| Duration::from_secs(0)),
+            );
+
+            metrics::observe_duration(
+                &metrics::BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME,
+                block_delays
+                    .set_as_head
+                    .unwrap_or_else(|| Duration::from_secs(0)),
+            );
+
+            // If the block was enshrined as head too late for attestations to be created for it,
+            // log a debug warning and increment a metric.
+            if late_head {
+                metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL);
+                debug!(
+                    self.log,
+                    "Delayed head block";
+                    "block_root" => ?beacon_block_root,
+                    "proposer_index" => head_proposer_index,
+                    "slot" => head_slot,
+                    "block_delay" => ?block_delay_total,
+                    "observed_delay" => ?block_delays.observed,
+                    "imported_delay" => ?block_delays.imported,
+                    "set_as_head_delay" => ?block_delays.set_as_head,
+                );
+            }
+        }
+
        self.snapshot_cache
            .try_write_for(BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT)
            .map(|mut snapshot_cache| {
@@ -3146,6 +3232,31 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
                    epoch: head_slot.epoch(T::EthSpec::slots_per_epoch()),
                }));
            }
+
+            if !block_from_sync && late_head && event_handler.has_late_head_subscribers() {
+                let peer_info = self
+                    .block_times_cache
+                    .read()
+                    .get_peer_info(beacon_block_root);
+                let block_delays = self.block_times_cache.read().get_block_delays(
+                    beacon_block_root,
+                    self.slot_clock
+                        .start_of(head_slot)
+                        .unwrap_or_else(|| Duration::from_secs(0)),
+                );
+                event_handler.register(EventKind::LateHead(SseLateHead {
+                    slot: head_slot,
+                    block: beacon_block_root,
+                    peer_id: peer_info.id,
+                    peer_client: peer_info.client,
+                    proposer_index: head_proposer_index,
+                    proposer_graffiti,
+                    block_delay: block_delay_total,
+                    observed_delay: block_delays.observed,
+                    imported_delay: block_delays.imported,
+                    set_as_head_delay: block_delays.set_as_head,
+                }));
+            }
        }

        Ok(())
@@ -3212,6 +3323,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
        trace!(self.log, "Running beacon chain per slot tasks");
        if let Some(slot) = self.slot_clock.now() {
            self.naive_aggregation_pool.write().prune(slot);
+            self.block_times_cache.write().prune(slot);
        }
    }