Add missed blocks to monitored validators (#4731)

* add missed_block metric

* init missed_block in constructor

* declare beaconproposercache in ValidatorMonitor

* refacto proposer_shuffling_decision_root to use epoch instead of current.epoch

* imple new proposer_shuffling_decision_root in callers

* push missed_blocks

* prune missed_blocks

* only add to hashmap if it's a monitored validator

* remove current_epoch dup + typos

* extract in func

* add prom metrics

* checkpoint is not only epoch but slot as well

* add safeguard if we start a new chain at slot 0

* clean

* remove unnecessary negative value for a slot

* typo in comment

* remove unused current_epoch

* share beacon_proposer_cache between validator_monitor and beacon_chain

* pass Hash256::zero()

* debug objects

* fix loop: lag is at the head

* sed s/get_slot/get_epoch

* fewer calls to cache.get_epoch

* fix typos

* remove cache first call

* export TYPICAL_SLOTS_PER_EPOCH and use it in validator_monitor

* switch to gauge & loop over missed_blocks hashset

* fix subnet_service tests

* remove unused var

* clean + fix nits

* add beacon_proposer_cache + validator_monitor in builder

* fix store_tests

* fix builder tests

* add tests

* add validator monitor set of tests

* clean tests

* nits

* optimise imports

* lint

* typo

* added self.aggregatable

* duplicate proposer_shuffling_decision_root

* remove duplication in passing beacon_proposer_cache

* remove duplication in passing beacon_proposer_cache

* using indices

* fmt

* implement missed blocks total

* nits

* avoid heap allocation

* remove recursion limit

* fix lint

* Fix valdiator monitor builder pattern

Unify validator monitor config struct

* renaming metrics

* renaming metrics in validator monitor

* add log if there's a missing validator index

* consistent log

* fix loop

* better loop

* move gauge to counter

* fmt

* add error message

* lint

* fix prom metrics

* set gauge to 0 when non-finalized epochs

* better wording

* remove hash256::zero in favour of block_root

* fix gauge total label

* fix last missed block validator

* Add `MissedBlock` struct

* Fix comment

* Refactor non-finalized block loop

* Fix off-by-one

* Avoid string allocation

* Fix compile error

* Remove non-finalized blocks metric

* fix func clojure

* remove unused variable

* remove unused DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD

* remove unused DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD in builder

* add validator index depending on the fork name

* typos

---------

Co-authored-by: Paul Hauner <paul@paulhauner.com>
This commit is contained in:
Joel Rousseau
2023-11-09 04:05:14 +00:00
committed by GitHub
parent bcca88a150
commit ac8811afac
16 changed files with 575 additions and 90 deletions

View File

@@ -1,4 +1,5 @@
use crate::beacon_chain::{CanonicalHead, BEACON_CHAIN_DB_KEY, ETH1_CACHE_DB_KEY, OP_POOL_DB_KEY};
use crate::beacon_proposer_cache::BeaconProposerCache;
use crate::data_availability_checker::DataAvailabilityChecker;
use crate::eth1_chain::{CachingEth1Backend, SszEth1};
use crate::eth1_finalization_cache::Eth1FinalizationCache;
@@ -10,7 +11,7 @@ use crate::persisted_beacon_chain::PersistedBeaconChain;
use crate::shuffling_cache::{BlockShufflingIds, ShufflingCache};
use crate::snapshot_cache::{SnapshotCache, DEFAULT_SNAPSHOT_CACHE_SIZE};
use crate::timeout_rw_lock::TimeoutRwLock;
use crate::validator_monitor::ValidatorMonitor;
use crate::validator_monitor::{ValidatorMonitor, ValidatorMonitorConfig};
use crate::validator_pubkey_cache::ValidatorPubkeyCache;
use crate::ChainConfig;
use crate::{
@@ -23,10 +24,10 @@ use fork_choice::{ForkChoice, ResetPayloadStatuses};
use futures::channel::mpsc::Sender;
use kzg::{Kzg, TrustedSetup};
use operation_pool::{OperationPool, PersistedOperationPool};
use parking_lot::RwLock;
use parking_lot::{Mutex, RwLock};
use proto_array::{DisallowedReOrgOffsets, ReOrgThreshold};
use slasher::Slasher;
use slog::{crit, debug, error, info, Logger};
use slog::{crit, debug, error, info, o, Logger};
use slot_clock::{SlotClock, TestingSlotClock};
use state_processing::per_slot_processing;
use std::marker::PhantomData;
@@ -35,8 +36,8 @@ use std::time::Duration;
use store::{Error as StoreError, HotColdDB, ItemStore, KeyValueStoreOp};
use task_executor::{ShutdownReason, TaskExecutor};
use types::{
BeaconBlock, BeaconState, ChainSpec, Checkpoint, Epoch, EthSpec, Graffiti, Hash256,
PublicKeyBytes, Signature, SignedBeaconBlock, Slot,
BeaconBlock, BeaconState, ChainSpec, Checkpoint, Epoch, EthSpec, Graffiti, Hash256, Signature,
SignedBeaconBlock, Slot,
};
/// An empty struct used to "witness" all the `BeaconChainTypes` traits. It has no user-facing
@@ -93,12 +94,12 @@ pub struct BeaconChainBuilder<T: BeaconChainTypes> {
log: Option<Logger>,
graffiti: Graffiti,
slasher: Option<Arc<Slasher<T::EthSpec>>>,
validator_monitor: Option<ValidatorMonitor<T::EthSpec>>,
// Pending I/O batch that is constructed during building and should be executed atomically
// alongside `PersistedBeaconChain` storage when `BeaconChainBuilder::build` is called.
pending_io_batch: Vec<KeyValueStoreOp>,
trusted_setup: Option<TrustedSetup>,
task_executor: Option<TaskExecutor>,
validator_monitor_config: Option<ValidatorMonitorConfig>,
}
impl<TSlotClock, TEth1Backend, TEthSpec, THotStore, TColdStore>
@@ -135,10 +136,10 @@ where
log: None,
graffiti: Graffiti::default(),
slasher: None,
validator_monitor: None,
pending_io_batch: vec![],
trusted_setup: None,
task_executor: None,
validator_monitor_config: None,
}
}
@@ -623,19 +624,8 @@ where
/// Register some validators for additional monitoring.
///
/// `validators` is a comma-separated string of 0x-formatted BLS pubkeys.
pub fn monitor_validators(
mut self,
auto_register: bool,
validators: Vec<PublicKeyBytes>,
individual_metrics_threshold: usize,
log: Logger,
) -> Self {
self.validator_monitor = Some(ValidatorMonitor::new(
validators,
auto_register,
individual_metrics_threshold,
log.clone(),
));
pub fn validator_monitor_config(mut self, config: ValidatorMonitorConfig) -> Self {
self.validator_monitor_config = Some(config);
self
}
@@ -671,11 +661,16 @@ where
let genesis_state_root = self
.genesis_state_root
.ok_or("Cannot build without a genesis state root")?;
let mut validator_monitor = self
.validator_monitor
.ok_or("Cannot build without a validator monitor")?;
let validator_monitor_config = self.validator_monitor_config.unwrap_or_default();
let head_tracker = Arc::new(self.head_tracker.unwrap_or_default());
let beacon_proposer_cache: Arc<Mutex<BeaconProposerCache>> = <_>::default();
let mut validator_monitor = ValidatorMonitor::new(
validator_monitor_config,
beacon_proposer_cache.clone(),
log.new(o!("service" => "val_mon")),
);
let current_slot = if slot_clock
.is_prior_to_genesis()
.ok_or("Unable to read slot clock")?
@@ -911,7 +906,7 @@ where
log.clone(),
)),
eth1_finalization_cache: TimeoutRwLock::new(Eth1FinalizationCache::new(log.clone())),
beacon_proposer_cache: <_>::default(),
beacon_proposer_cache,
block_times_cache: <_>::default(),
pre_finalization_block_cache: <_>::default(),
validator_pubkey_cache: TimeoutRwLock::new(validator_pubkey_cache),
@@ -1097,7 +1092,6 @@ fn descriptive_db_error(item: &str, error: &StoreError) -> String {
mod test {
use super::*;
use crate::test_utils::EphemeralHarnessType;
use crate::validator_monitor::DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD;
use ethereum_hashing::hash;
use genesis::{
generate_deterministic_keypairs, interop_genesis_state, DEFAULT_ETH1_BLOCK_HASH,
@@ -1155,12 +1149,6 @@ mod test {
.testing_slot_clock(Duration::from_secs(1))
.expect("should configure testing slot clock")
.shutdown_sender(shutdown_tx)
.monitor_validators(
true,
vec![],
DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD,
log.clone(),
)
.build()
.expect("should build");