From bbb903931ffb4cc93561a8b5924d70d376289f4b Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Mon, 30 May 2022 15:37:00 +1000 Subject: [PATCH] Cause analysis for delayed head blocks --- beacon_node/beacon_chain/src/beacon_chain.rs | 56 +++++++++++++++---- .../beacon_chain/src/block_times_cache.rs | 14 +++++ beacon_node/beacon_chain/src/metrics.rs | 25 ++++++--- common/eth2/src/types.rs | 1 + 4 files changed, 79 insertions(+), 17 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index ce88290cd8..7763a67064 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2751,6 +2751,13 @@ impl BeaconChain { "Early attester cache insert failed"; "error" => ?e ); + } else { + // Success, record the block as capable of being attested to. + self.block_times_cache.write().set_time_attestable( + block_root, + block.slot(), + timestamp_now(), + ); } } else { warn!( @@ -3572,7 +3579,8 @@ impl BeaconChain { // Determine whether the block has been set as head too late for proper attestation // production. - let late_head = block_delay_total >= self.slot_clock.unagg_attestation_production_delay(); + let attestation_deadline = self.slot_clock.unagg_attestation_production_delay(); + let late_head = block_delay_total >= attestation_deadline; // Do not store metrics if the block was > 4 slots old, this helps prevent noise during // sync. @@ -3592,34 +3600,61 @@ impl BeaconChain { .start_of(head_slot) .unwrap_or_else(|| Duration::from_secs(0)), ); + let observed_delay = block_delays + .observed + .unwrap_or_else(|| Duration::from_secs(0)); + let import_delay = block_delays + .imported + .unwrap_or_else(|| Duration::from_secs(0)); + let attestable_delay = block_delays + .attestable + .unwrap_or_else(|| Duration::from_secs(0)); + let set_as_head_delay = block_delays + .set_as_head + .unwrap_or_else(|| Duration::from_secs(0)); metrics::observe_duration( &metrics::BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME, - block_delays - .observed - .unwrap_or_else(|| Duration::from_secs(0)), + observed_delay, + ); + metrics::observe_duration( + &metrics::BEACON_BLOCK_HEAD_ATTESTABLE_DELAY_TIME, + attestable_delay, ); - metrics::observe_duration( &metrics::BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME, - block_delays - .set_as_head - .unwrap_or_else(|| Duration::from_secs(0)), + set_as_head_delay, ); // If the block was enshrined as head too late for attestations to be created for it, // log a debug warning and increment a metric. - if late_head { - metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL); + let missed_attestation_deadline = attestable_delay >= attestation_deadline; + if missed_attestation_deadline { + let due_to_late_block = observed_delay >= attestation_deadline; + let due_to_processing = observed_delay + import_delay >= attestation_deadline; + + let reason = if due_to_late_block { + metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_LATE); + "late block" + } else if due_to_processing { + metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_SLOW); + "slow to process" + } else { + metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_OTHER); + "other" + }; + debug!( self.log, "Delayed head block"; + "reason" => reason, "block_root" => ?beacon_block_root, "proposer_index" => head_proposer_index, "slot" => head_slot, "block_delay" => ?block_delay_total, "observed_delay" => ?block_delays.observed, "imported_delay" => ?block_delays.imported, + "attestable_delay" => ?attestable_delay, "set_as_head_delay" => ?block_delays.set_as_head, ); } @@ -3722,6 +3757,7 @@ impl BeaconChain { proposer_graffiti, block_delay: block_delay_total, observed_delay: block_delays.observed, + attestable_delay: block_delays.attestable, imported_delay: block_delays.imported, set_as_head_delay: block_delays.set_as_head, })); diff --git a/beacon_node/beacon_chain/src/block_times_cache.rs b/beacon_node/beacon_chain/src/block_times_cache.rs index 484de841de..6623932f59 100644 --- a/beacon_node/beacon_chain/src/block_times_cache.rs +++ b/beacon_node/beacon_chain/src/block_times_cache.rs @@ -18,6 +18,7 @@ type BlockRoot = Hash256; #[derive(Clone, Default)] pub struct Timestamps { pub observed: Option, + pub attestable: Option, pub imported: Option, pub set_as_head: Option, } @@ -26,6 +27,7 @@ pub struct Timestamps { #[derive(Default)] pub struct BlockDelays { pub observed: Option, + pub attestable: Option, pub imported: Option, pub set_as_head: Option, } @@ -35,6 +37,9 @@ impl BlockDelays { let observed = times .observed .and_then(|observed_time| observed_time.checked_sub(slot_start_time)); + let attestable = times + .attestable + .and_then(|attestable_time| attestable_time.checked_sub(slot_start_time)); let imported = times .imported .and_then(|imported_time| imported_time.checked_sub(times.observed?)); @@ -43,6 +48,7 @@ impl BlockDelays { .and_then(|set_as_head_time| set_as_head_time.checked_sub(times.imported?)); BlockDelays { observed, + attestable, imported, set_as_head, } @@ -99,6 +105,14 @@ impl BlockTimesCache { }; } + pub fn set_time_attestable(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) { + let block_times = self + .cache + .entry(block_root) + .or_insert_with(|| BlockTimesCacheValue::new(slot)); + block_times.timestamps.attestable = Some(timestamp); + } + pub fn set_time_imported(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) { let block_times = self .cache diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index efeb76e76f..f785036d45 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -767,7 +767,10 @@ lazy_static! { "Number of attester slashings seen", &["src", "validator"] ); +} +// Fourth lazy-static block is used to account for macro recursion limit. +lazy_static! { /* * Block Delay Metrics */ @@ -783,14 +786,25 @@ lazy_static! { "beacon_block_head_imported_delay_time", "Duration between the time the block was imported and the time when it was set as head.", ); + pub static ref BEACON_BLOCK_HEAD_ATTESTABLE_DELAY_TIME: Result = try_create_histogram( + "beacon_block_head_attestable_delay_time", + "Duration between the start of the slot and the time at which the block could be attested to.", + ); pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME: Result = try_create_histogram( "beacon_block_head_slot_start_delay_time", "Duration between the start of the block's slot and the time when it was set as head.", ); - pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL: Result = try_create_int_counter( - "beacon_block_head_slot_start_delay_exceeded_total", - "Triggered when the duration between the start of the block's slot and the current time \ - will result in failed attestations.", + pub static ref BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_LATE: Result = try_create_int_counter( + "beacon_block_head_missed_att_deadline_late", + "Total number of delayed head blocks that arrived late" + ); + pub static ref BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_SLOW: Result = try_create_int_counter( + "beacon_block_head_missed_att_deadline_slow", + "Total number of delayed head blocks that arrived on time but not processed in time" + ); + pub static ref BEACON_BLOCK_HEAD_MISSED_ATT_DEADLINE_OTHER: Result = try_create_int_counter( + "beacon_block_head_missed_att_deadline_other", + "Total number of delayed head blocks that were not late and not slow to process" ); /* @@ -801,10 +815,7 @@ lazy_static! { "gossip_beacon_block_skipped_slots", "For each gossip blocks, the number of skip slots between it and its parent" ); -} -// Fourth lazy-static block is used to account for macro recursion limit. -lazy_static! { /* * Sync Committee Message Verification */ diff --git a/common/eth2/src/types.rs b/common/eth2/src/types.rs index 8ef3582268..e4ba43d526 100644 --- a/common/eth2/src/types.rs +++ b/common/eth2/src/types.rs @@ -835,6 +835,7 @@ pub struct SseLateHead { pub proposer_graffiti: String, pub block_delay: Duration, pub observed_delay: Option, + pub attestable_delay: Option, pub imported_delay: Option, pub set_as_head_delay: Option, }