Cause analysis for delayed head blocks

2026-03-19 21:04:41 +00:00 · 2022-05-30 15:37:00 +10:00
parent d03e67ac2c
commit bbb903931f
4 changed files with 79 additions and 17 deletions
--- a/beacon_node/beacon_chain/src/beacon_chain.rs
+++ b/beacon_node/beacon_chain/src/beacon_chain.rs
@@ -2751,6 +2751,13 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
                            "Early attester cache insert failed";
                            "error" => ?e
                        );
+                    } else {
+                        // Success, record the block as capable of being attested to.
+                        self.block_times_cache.write().set_time_attestable(
+                            block_root,
+                            block.slot(),
+                            timestamp_now(),
+                        );
                    }
                } else {
                    warn!(
@@ -3572,7 +3579,8 @@ impl<T: BeaconChainTypes> BeaconChain<T> {

        // Determine whether the block has been set as head too late for proper attestation
        // production.
-        let late_head = block_delay_total >= self.slot_clock.unagg_attestation_production_delay();
+        let attestation_deadline = self.slot_clock.unagg_attestation_production_delay();
+        let late_head = block_delay_total >= attestation_deadline;

        // Do not store metrics if the block was > 4 slots old, this helps prevent noise during
        // sync.
@@ -3592,34 +3600,61 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
                    .start_of(head_slot)
                    .unwrap_or_else(|| Duration::from_secs(0)),
            );
+            let observed_delay = block_delays
+                .observed
+                .unwrap_or_else(|| Duration::from_secs(0));
+            let import_delay = block_delays
+                .imported
+                .unwrap_or_else(|| Duration::from_secs(0));
+            let attestable_delay = block_delays
+                .attestable
+                .unwrap_or_else(|| Duration::from_secs(0));
+            let set_as_head_delay = block_delays
+                .set_as_head
+                .unwrap_or_else(|| Duration::from_secs(0));

            metrics::observe_duration(
                &metrics::BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME,
-                block_delays
-                    .observed
-                    .unwrap_or_else(|| Duration::from_secs(0)),
+                observed_delay,
+            );
+            metrics::observe_duration(
+                &metrics::BEACON_BLOCK_HEAD_ATTESTABLE_DELAY_TIME,
+                attestable_delay,
            );
-
            metrics::observe_duration(
                &metrics::BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME,
-                block_delays
-                    .set_as_head
-                    .unwrap_or_else(|| Duration::from_secs(0)),
+                set_as_head_delay,
            );

            // If the block was enshrined as head too late for attestations to be created for it,
            // log a debug warning and increment a metric.
-            if late_head {
-                metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL);
+            let missed_attestation_deadline = attestable_delay >= attestation_deadline;
+            if missed_attestation_deadline {
+                let due_to_late_block = observed_delay >= attestation_deadline;
+                let due_to_processing = observed_delay + import_delay >= attestation_deadline;
+
+                let reason = if due_to_late_block {
+                    metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_LATE);
+                    "late block"
+                } else if due_to_processing {
+                    metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_SLOW);
+                    "slow to process"
+                } else {
+                    metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_OTHER);
+                    "other"
+                };
+
                debug!(
                    self.log,
                    "Delayed head block";
+                    "reason" => reason,
                    "block_root" => ?beacon_block_root,
                    "proposer_index" => head_proposer_index,
                    "slot" => head_slot,
                    "block_delay" => ?block_delay_total,
                    "observed_delay" => ?block_delays.observed,
                    "imported_delay" => ?block_delays.imported,
+                    "attestable_delay" => ?attestable_delay,
                    "set_as_head_delay" => ?block_delays.set_as_head,
                );
            }
@@ -3722,6 +3757,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
                    proposer_graffiti,
                    block_delay: block_delay_total,
                    observed_delay: block_delays.observed,
+                    attestable_delay: block_delays.attestable,
                    imported_delay: block_delays.imported,
                    set_as_head_delay: block_delays.set_as_head,
                }));
--- a/beacon_node/beacon_chain/src/block_times_cache.rs
+++ b/beacon_node/beacon_chain/src/block_times_cache.rs
@@ -18,6 +18,7 @@ type BlockRoot = Hash256;
 #[derive(Clone, Default)]
 pub struct Timestamps {
    pub observed: Option<Duration>,
+    pub attestable: Option<Duration>,
    pub imported: Option<Duration>,
    pub set_as_head: Option<Duration>,
 }
@@ -26,6 +27,7 @@ pub struct Timestamps {
 #[derive(Default)]
 pub struct BlockDelays {
    pub observed: Option<Duration>,
+    pub attestable: Option<Duration>,
    pub imported: Option<Duration>,
    pub set_as_head: Option<Duration>,
 }
@@ -35,6 +37,9 @@ impl BlockDelays {
        let observed = times
            .observed
            .and_then(|observed_time| observed_time.checked_sub(slot_start_time));
+        let attestable = times
+            .attestable
+            .and_then(|attestable_time| attestable_time.checked_sub(slot_start_time));
        let imported = times
            .imported
            .and_then(|imported_time| imported_time.checked_sub(times.observed?));
@@ -43,6 +48,7 @@ impl BlockDelays {
            .and_then(|set_as_head_time| set_as_head_time.checked_sub(times.imported?));
        BlockDelays {
            observed,
+            attestable,
            imported,
            set_as_head,
        }
@@ -99,6 +105,14 @@ impl BlockTimesCache {
        };
    }

+    pub fn set_time_attestable(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) {
+        let block_times = self
+            .cache
+            .entry(block_root)
+            .or_insert_with(|| BlockTimesCacheValue::new(slot));
+        block_times.timestamps.attestable = Some(timestamp);
+    }
+
    pub fn set_time_imported(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) {
        let block_times = self
            .cache
--- a/beacon_node/beacon_chain/src/metrics.rs
+++ b/beacon_node/beacon_chain/src/metrics.rs
@@ -767,7 +767,10 @@ lazy_static! {
        "Number of attester slashings seen",
        &["src", "validator"]
    );
+}

+// Fourth lazy-static block is used to account for macro recursion limit.
+lazy_static! {
    /*
     * Block Delay Metrics
     */
@@ -783,14 +786,25 @@ lazy_static! {
        "beacon_block_head_imported_delay_time",
        "Duration between the time the block was imported and the time when it was set as head.",
    );
+    pub static ref BEACON_BLOCK_HEAD_ATTESTABLE_DELAY_TIME: Result<Histogram> = try_create_histogram(
+        "beacon_block_head_attestable_delay_time",
+        "Duration between the start of the slot and the time at which the block could be attested to.",
+    );
    pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram(
        "beacon_block_head_slot_start_delay_time",
        "Duration between the start of the block's slot and the time when it was set as head.",
    );
-    pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL: Result<IntCounter> = try_create_int_counter(
-        "beacon_block_head_slot_start_delay_exceeded_total",
-        "Triggered when the duration between the start of the block's slot and the current time \
-        will result in failed attestations.",
+    pub static ref BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_LATE: Result<IntCounter> = try_create_int_counter(
+        "beacon_block_head_missed_att_deadline_late",
+        "Total number of delayed head blocks that arrived late"
+    );
+    pub static ref BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_SLOW: Result<IntCounter> = try_create_int_counter(
+        "beacon_block_head_missed_att_deadline_slow",
+        "Total number of delayed head blocks that arrived on time but not processed in time"
+    );
+    pub static ref BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_OTHER: Result<IntCounter> = try_create_int_counter(
+        "beacon_block_head_missed_att_deadline_other",
+        "Total number of delayed head blocks that were not late and not slow to process"
    );

    /*
@@ -801,10 +815,7 @@ lazy_static! {
            "gossip_beacon_block_skipped_slots",
            "For each gossip blocks, the number of skip slots between it and its parent"
        );
-}

-// Fourth lazy-static block is used to account for macro recursion limit.
-lazy_static! {
    /*
     * Sync Committee Message Verification
     */
--- a/common/eth2/src/types.rs
+++ b/common/eth2/src/types.rs
@@ -835,6 +835,7 @@ pub struct SseLateHead {
    pub proposer_graffiti: String,
    pub block_delay: Duration,
    pub observed_delay: Option<Duration>,
+    pub attestable_delay: Option<Duration>,
    pub imported_delay: Option<Duration>,
    pub set_as_head_delay: Option<Duration>,
 }