Detailed validator monitoring (#2151)

## Issue Addressed

- Resolves #2064

## Proposed Changes

Adds a `ValidatorMonitor` struct which provides additional logging and Grafana metrics for specific validators.

Use `lighthouse bn --validator-monitor` to automatically enable monitoring for any validator that hits the [subnet subscription](https://ethereum.github.io/eth2.0-APIs/#/Validator/prepareBeaconCommitteeSubnet) HTTP API endpoint.

Also, use `lighthouse bn --validator-monitor-pubkeys` to supply a list of validators which will always be monitored.

See the new docs included in this PR for more info.

## TODO

- [x] Track validator balance, `slashed` status, etc.
- [x] ~~Register slashings in current epoch, not offense epoch~~
- [ ] Publish Grafana dashboard, update TODO link in docs
- [x] ~~#2130 is merged into this branch, resolve that~~
This commit is contained in:
Paul Hauner
2021-01-20 19:19:38 +00:00
parent 1eb0915301
commit 2b2a358522
29 changed files with 1646 additions and 37 deletions

View File

@@ -65,6 +65,7 @@ mod tests {
Duration::from_millis(SLOT_DURATION_MILLIS),
))
.shutdown_sender(shutdown_tx)
.monitor_validators(true, vec![], log)
.build()
.expect("should build"),
);

View File

@@ -224,6 +224,7 @@ impl<E: EthSpec> WorkEvent<E> {
attestation: Attestation<E>,
subnet_id: SubnetId,
should_import: bool,
seen_timestamp: Duration,
) -> Self {
Self {
drop_during_sync: true,
@@ -233,6 +234,7 @@ impl<E: EthSpec> WorkEvent<E> {
attestation: Box::new(attestation),
subnet_id,
should_import,
seen_timestamp,
},
}
}
@@ -242,6 +244,7 @@ impl<E: EthSpec> WorkEvent<E> {
message_id: MessageId,
peer_id: PeerId,
aggregate: SignedAggregateAndProof<E>,
seen_timestamp: Duration,
) -> Self {
Self {
drop_during_sync: true,
@@ -249,6 +252,7 @@ impl<E: EthSpec> WorkEvent<E> {
message_id,
peer_id,
aggregate: Box::new(aggregate),
seen_timestamp,
},
}
}
@@ -258,6 +262,7 @@ impl<E: EthSpec> WorkEvent<E> {
message_id: MessageId,
peer_id: PeerId,
block: Box<SignedBeaconBlock<E>>,
seen_timestamp: Duration,
) -> Self {
Self {
drop_during_sync: false,
@@ -265,6 +270,7 @@ impl<E: EthSpec> WorkEvent<E> {
message_id,
peer_id,
block,
seen_timestamp,
},
}
}
@@ -391,16 +397,19 @@ pub enum Work<E: EthSpec> {
attestation: Box<Attestation<E>>,
subnet_id: SubnetId,
should_import: bool,
seen_timestamp: Duration,
},
GossipAggregate {
message_id: MessageId,
peer_id: PeerId,
aggregate: Box<SignedAggregateAndProof<E>>,
seen_timestamp: Duration,
},
GossipBlock {
message_id: MessageId,
peer_id: PeerId,
block: Box<SignedBeaconBlock<E>>,
seen_timestamp: Duration,
},
GossipVoluntaryExit {
message_id: MessageId,
@@ -833,12 +842,14 @@ impl<T: BeaconChainTypes> BeaconProcessor<T> {
attestation,
subnet_id,
should_import,
seen_timestamp,
} => worker.process_gossip_attestation(
message_id,
peer_id,
*attestation,
subnet_id,
should_import,
seen_timestamp,
),
/*
* Aggregated attestation verification.
@@ -847,7 +858,13 @@ impl<T: BeaconChainTypes> BeaconProcessor<T> {
message_id,
peer_id,
aggregate,
} => worker.process_gossip_aggregate(message_id, peer_id, *aggregate),
seen_timestamp,
} => worker.process_gossip_aggregate(
message_id,
peer_id,
*aggregate,
seen_timestamp,
),
/*
* Verification for beacon blocks received on gossip.
*/
@@ -855,7 +872,8 @@ impl<T: BeaconChainTypes> BeaconProcessor<T> {
message_id,
peer_id,
block,
} => worker.process_gossip_block(message_id, peer_id, *block),
seen_timestamp,
} => worker.process_gossip_block(message_id, peer_id, *block, seen_timestamp),
/*
* Voluntary exits received on gossip.
*/

View File

@@ -1,12 +1,14 @@
use crate::{metrics, service::NetworkMessage, sync::SyncMessage};
use beacon_chain::{
attestation_verification::Error as AttnError, observed_operations::ObservationOutcome,
attestation_verification::{Error as AttnError, SignatureVerifiedAttestation},
observed_operations::ObservationOutcome,
BeaconChainError, BeaconChainTypes, BlockError, ForkChoiceError,
};
use eth2_libp2p::{MessageAcceptance, MessageId, PeerAction, PeerId, ReportSource};
use slog::{debug, error, info, trace, warn};
use ssz::Encode;
use std::time::Duration;
use types::{
Attestation, AttesterSlashing, Hash256, ProposerSlashing, SignedAggregateAndProof,
SignedBeaconBlock, SignedVoluntaryExit, SubnetId,
@@ -61,6 +63,7 @@ impl<T: BeaconChainTypes> Worker<T> {
attestation: Attestation<T::EthSpec>,
subnet_id: SubnetId,
should_import: bool,
seen_timestamp: Duration,
) {
let beacon_block_root = attestation.data.beacon_block_root;
@@ -81,6 +84,16 @@ impl<T: BeaconChainTypes> Worker<T> {
}
};
// Register the attestation with any monitored validators.
self.chain
.validator_monitor
.read()
.register_gossip_unaggregated_attestation(
seen_timestamp,
attestation.indexed_attestation(),
&self.chain.slot_clock,
);
// Indicate to the `Network` service that this message is valid and can be
// propagated on the gossip network.
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
@@ -137,6 +150,7 @@ impl<T: BeaconChainTypes> Worker<T> {
message_id: MessageId,
peer_id: PeerId,
aggregate: SignedAggregateAndProof<T::EthSpec>,
seen_timestamp: Duration,
) {
let beacon_block_root = aggregate.message.aggregate.data.beacon_block_root;
@@ -162,6 +176,17 @@ impl<T: BeaconChainTypes> Worker<T> {
// propagated on the gossip network.
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
// Register the attestation with any monitored validators.
self.chain
.validator_monitor
.read()
.register_gossip_aggregated_attestation(
seen_timestamp,
aggregate.aggregate(),
aggregate.indexed_attestation(),
&self.chain.slot_clock,
);
metrics::inc_counter(&metrics::BEACON_PROCESSOR_AGGREGATED_ATTESTATION_VERIFIED_TOTAL);
if let Err(e) = self.chain.apply_attestation_to_fork_choice(&aggregate) {
@@ -210,6 +235,7 @@ impl<T: BeaconChainTypes> Worker<T> {
message_id: MessageId,
peer_id: PeerId,
block: SignedBeaconBlock<T::EthSpec>,
seen_duration: Duration,
) {
let verified_block = match self.chain.verify_block_for_gossip(block) {
Ok(verified_block) => {
@@ -262,7 +288,19 @@ impl<T: BeaconChainTypes> Worker<T> {
metrics::inc_counter(&metrics::BEACON_PROCESSOR_GOSSIP_BLOCK_VERIFIED_TOTAL);
// Register the block with any monitored validators.
//
// Run this event *prior* to importing the block, where the block is only partially
// verified.
self.chain.validator_monitor.read().register_gossip_block(
seen_duration,
&verified_block.block.message,
verified_block.block_root,
&self.chain.slot_clock,
);
let block = Box::new(verified_block.block.clone());
match self.chain.process_block(verified_block) {
Ok(_block_root) => {
metrics::inc_counter(&metrics::BEACON_PROCESSOR_GOSSIP_BLOCK_IMPORTED_TOTAL);
@@ -359,6 +397,12 @@ impl<T: BeaconChainTypes> Worker<T> {
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
// Register the exit with any monitored validators.
self.chain
.validator_monitor
.read()
.register_gossip_voluntary_exit(&exit.as_inner().message);
self.chain.import_voluntary_exit(exit);
debug!(self.log, "Successfully imported voluntary exit");
@@ -412,6 +456,12 @@ impl<T: BeaconChainTypes> Worker<T> {
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
// Register the slashing with any monitored validators.
self.chain
.validator_monitor
.read()
.register_gossip_proposer_slashing(slashing.as_inner());
self.chain.import_proposer_slashing(slashing);
debug!(self.log, "Successfully imported proposer slashing");
@@ -457,6 +507,12 @@ impl<T: BeaconChainTypes> Worker<T> {
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
// Register the slashing with any monitored validators.
self.chain
.validator_monitor
.read()
.register_gossip_attester_slashing(slashing.as_inner());
if let Err(e) = self.chain.import_attester_slashing(slashing) {
debug!(self.log, "Error importing attester slashing"; "error" => ?e);
metrics::inc_counter(&metrics::BEACON_PROCESSOR_ATTESTER_SLASHING_ERROR_TOTAL);

View File

@@ -9,6 +9,7 @@ use eth2_libp2p::{MessageId, NetworkGlobals, PeerId, PeerRequestId, Request, Res
use slog::{debug, error, o, trace, warn};
use std::cmp;
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use tokio::sync::mpsc;
use types::{
Attestation, AttesterSlashing, ChainSpec, EthSpec, ProposerSlashing, SignedAggregateAndProof,
@@ -230,7 +231,10 @@ impl<T: BeaconChainTypes> Processor<T> {
block: Box<SignedBeaconBlock<T::EthSpec>>,
) {
self.send_beacon_processor_work(BeaconWorkEvent::gossip_beacon_block(
message_id, peer_id, block,
message_id,
peer_id,
block,
timestamp_now(),
))
}
@@ -248,6 +252,7 @@ impl<T: BeaconChainTypes> Processor<T> {
unaggregated_attestation,
subnet_id,
should_process,
timestamp_now(),
))
}
@@ -258,7 +263,10 @@ impl<T: BeaconChainTypes> Processor<T> {
aggregate: SignedAggregateAndProof<T::EthSpec>,
) {
self.send_beacon_processor_work(BeaconWorkEvent::aggregated_attestation(
message_id, peer_id, aggregate,
message_id,
peer_id,
aggregate,
timestamp_now(),
))
}
@@ -390,3 +398,9 @@ impl<T: EthSpec> HandlerNetworkContext<T> {
})
}
}
fn timestamp_now() -> Duration {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_else(|_| Duration::from_secs(0))
}