mirror of
https://github.com/sigp/lighthouse.git
synced 2026-05-07 16:55:46 +00:00
Squashed reset to unstable
This commit is contained in:
committed by
Daniel Knopik
parent
b71b5f2231
commit
f61f0b654c
@@ -8,13 +8,13 @@ authors = ["Sigma Prime <contact@sigmaprime.io>"]
|
||||
beacon_node_fallback = { workspace = true }
|
||||
environment = { workspace = true }
|
||||
eth2 = { workspace = true }
|
||||
logging = { workspace = true }
|
||||
parking_lot = { workspace = true }
|
||||
slog = { workspace = true }
|
||||
slot_clock = { workspace = true }
|
||||
task_executor = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
types = { workspace = true }
|
||||
validator_store = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
futures = { workspace = true }
|
||||
|
||||
@@ -32,16 +32,78 @@
|
||||
use beacon_node_fallback::BeaconNodeFallback;
|
||||
use environment::RuntimeContext;
|
||||
use eth2::types::LivenessResponseData;
|
||||
use logging::crit;
|
||||
use parking_lot::RwLock;
|
||||
use slog::{crit, error, info, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::future::Future;
|
||||
use std::sync::Arc;
|
||||
use task_executor::ShutdownReason;
|
||||
use tokio::time::sleep;
|
||||
use tracing::{error, info};
|
||||
use types::{Epoch, EthSpec, PublicKeyBytes, Slot};
|
||||
use validator_store::{DoppelgangerStatus, ValidatorStore};
|
||||
|
||||
/// A wrapper around `PublicKeyBytes` which encodes information about the status of a validator
|
||||
/// pubkey with regards to doppelganger protection.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum DoppelgangerStatus {
|
||||
/// Doppelganger protection has approved this for signing.
|
||||
///
|
||||
/// This is because the service has waited some period of time to
|
||||
/// detect other instances of this key on the network.
|
||||
SigningEnabled(PublicKeyBytes),
|
||||
/// Doppelganger protection is still waiting to detect other instances.
|
||||
///
|
||||
/// Do not use this pubkey for signing slashable messages!!
|
||||
///
|
||||
/// However, it can safely be used for other non-slashable operations (e.g., collecting duties
|
||||
/// or subscribing to subnets).
|
||||
SigningDisabled(PublicKeyBytes),
|
||||
/// This pubkey is unknown to the doppelganger service.
|
||||
///
|
||||
/// This represents a serious internal error in the program. This validator will be permanently
|
||||
/// disabled!
|
||||
UnknownToDoppelganger(PublicKeyBytes),
|
||||
}
|
||||
|
||||
impl DoppelgangerStatus {
|
||||
/// Only return a pubkey if it is explicitly safe for doppelganger protection.
|
||||
///
|
||||
/// If `Some(pubkey)` is returned, doppelganger has declared it safe for signing.
|
||||
///
|
||||
/// ## Note
|
||||
///
|
||||
/// "Safe" is only best-effort by doppelganger. There is no guarantee that a doppelganger
|
||||
/// doesn't exist.
|
||||
pub fn only_safe(self) -> Option<PublicKeyBytes> {
|
||||
match self {
|
||||
DoppelgangerStatus::SigningEnabled(pubkey) => Some(pubkey),
|
||||
DoppelgangerStatus::SigningDisabled(_) => None,
|
||||
DoppelgangerStatus::UnknownToDoppelganger(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a key regardless of whether or not doppelganger has approved it. Such a key might be
|
||||
/// used for signing non-slashable messages, duties collection or other activities.
|
||||
///
|
||||
/// If the validator is unknown to doppelganger then `None` will be returned.
|
||||
pub fn ignored(self) -> Option<PublicKeyBytes> {
|
||||
match self {
|
||||
DoppelgangerStatus::SigningEnabled(pubkey) => Some(pubkey),
|
||||
DoppelgangerStatus::SigningDisabled(pubkey) => Some(pubkey),
|
||||
DoppelgangerStatus::UnknownToDoppelganger(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Only return a pubkey if it will not be used for signing due to doppelganger detection.
|
||||
pub fn only_unsafe(self) -> Option<PublicKeyBytes> {
|
||||
match self {
|
||||
DoppelgangerStatus::SigningEnabled(_) => None,
|
||||
DoppelgangerStatus::SigningDisabled(pubkey) => Some(pubkey),
|
||||
DoppelgangerStatus::UnknownToDoppelganger(pubkey) => Some(pubkey),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct LivenessResponses {
|
||||
current_epoch_responses: Vec<LivenessResponseData>,
|
||||
@@ -52,6 +114,13 @@ struct LivenessResponses {
|
||||
/// validators on the network.
|
||||
pub const DEFAULT_REMAINING_DETECTION_EPOCHS: u64 = 1;
|
||||
|
||||
/// This crate cannot depend on ValidatorStore as validator_store depends on this crate and
|
||||
/// initialises the doppelganger protection. For this reason, we abstract the validator store
|
||||
/// functions this service needs through the following trait
|
||||
pub trait DoppelgangerValidatorStore {
|
||||
fn get_validator_index(&self, pubkey: &PublicKeyBytes) -> Option<u64>;
|
||||
}
|
||||
|
||||
/// Store the per-validator status of doppelganger checking.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct DoppelgangerState {
|
||||
@@ -94,9 +163,8 @@ impl DoppelgangerState {
|
||||
/// If the BN fails to respond to either of these requests, simply return an empty response.
|
||||
/// This behaviour is to help prevent spurious failures on the BN from needlessly preventing
|
||||
/// doppelganger progression.
|
||||
async fn beacon_node_liveness<T: 'static + SlotClock>(
|
||||
beacon_nodes: Arc<BeaconNodeFallback<T>>,
|
||||
log: Logger,
|
||||
async fn beacon_node_liveness<T: 'static + SlotClock, E: EthSpec>(
|
||||
beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
|
||||
current_epoch: Epoch,
|
||||
validator_indices: Vec<u64>,
|
||||
) -> LivenessResponses {
|
||||
@@ -135,10 +203,9 @@ async fn beacon_node_liveness<T: 'static + SlotClock>(
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
crit!(
|
||||
log,
|
||||
"Failed previous epoch liveness query";
|
||||
"error" => %e,
|
||||
"previous_epoch" => %previous_epoch,
|
||||
error = %e,
|
||||
previous_epoch = %previous_epoch,
|
||||
"Failed previous epoch liveness query"
|
||||
);
|
||||
// Return an empty vec. In effect, this means to keep trying to make doppelganger
|
||||
// progress even if some of the calls are failing.
|
||||
@@ -171,10 +238,9 @@ async fn beacon_node_liveness<T: 'static + SlotClock>(
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
crit!(
|
||||
log,
|
||||
"Failed current epoch liveness query";
|
||||
"error" => %e,
|
||||
"current_epoch" => %current_epoch,
|
||||
error = %e,
|
||||
current_epoch = %current_epoch,
|
||||
"Failed current epoch liveness query"
|
||||
);
|
||||
// Return an empty vec. In effect, this means to keep trying to make doppelganger
|
||||
// progress even if some of the calls are failing.
|
||||
@@ -189,11 +255,10 @@ async fn beacon_node_liveness<T: 'static + SlotClock>(
|
||||
|| current_epoch_responses.len() != previous_epoch_responses.len()
|
||||
{
|
||||
error!(
|
||||
log,
|
||||
"Liveness query omitted validators";
|
||||
"previous_epoch_response" => previous_epoch_responses.len(),
|
||||
"current_epoch_response" => current_epoch_responses.len(),
|
||||
"requested" => validator_indices.len(),
|
||||
previous_epoch_response = previous_epoch_responses.len(),
|
||||
current_epoch_response = current_epoch_responses.len(),
|
||||
requested = validator_indices.len(),
|
||||
"Liveness query omitted validators"
|
||||
)
|
||||
}
|
||||
|
||||
@@ -203,66 +268,49 @@ async fn beacon_node_liveness<T: 'static + SlotClock>(
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DoppelgangerService {
|
||||
doppelganger_states: RwLock<HashMap<PublicKeyBytes, DoppelgangerState>>,
|
||||
log: Logger,
|
||||
}
|
||||
|
||||
impl DoppelgangerService {
|
||||
pub fn new(log: Logger) -> Self {
|
||||
Self {
|
||||
doppelganger_states: <_>::default(),
|
||||
log,
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts a reoccurring future which will try to keep the doppelganger service updated each
|
||||
/// slot.
|
||||
pub fn start_update_service<E, T, V>(
|
||||
service: Arc<Self>,
|
||||
context: RuntimeContext<E>,
|
||||
validator_store: Arc<V>,
|
||||
beacon_nodes: Arc<BeaconNodeFallback<T>>,
|
||||
beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
|
||||
slot_clock: T,
|
||||
) -> Result<(), String>
|
||||
where
|
||||
E: EthSpec,
|
||||
T: 'static + SlotClock,
|
||||
V: ValidatorStore<E = E> + Send + Sync + 'static,
|
||||
V: DoppelgangerValidatorStore + Send + Sync + 'static,
|
||||
{
|
||||
// Define the `get_index` function as one that uses the validator store.
|
||||
let get_index = move |pubkey| validator_store.validator_index(&pubkey);
|
||||
let get_index = move |pubkey| validator_store.get_validator_index(&pubkey);
|
||||
|
||||
// Define the `get_liveness` function as one that queries the beacon node API.
|
||||
let log = service.log.clone();
|
||||
let get_liveness = move |current_epoch, validator_indices| {
|
||||
beacon_node_liveness::<T>(
|
||||
beacon_nodes.clone(),
|
||||
log.clone(),
|
||||
current_epoch,
|
||||
validator_indices,
|
||||
)
|
||||
beacon_node_liveness(beacon_nodes.clone(), current_epoch, validator_indices)
|
||||
};
|
||||
|
||||
let mut shutdown_sender = context.executor.shutdown_sender();
|
||||
let log = service.log.clone();
|
||||
|
||||
let mut shutdown_func = move || {
|
||||
if let Err(e) =
|
||||
shutdown_sender.try_send(ShutdownReason::Failure("Doppelganger detected."))
|
||||
{
|
||||
crit!(
|
||||
log,
|
||||
"Failed to send shutdown signal";
|
||||
"msg" => "terminate this process immediately",
|
||||
"error" => ?e
|
||||
msg = "terminate this process immediately",
|
||||
error = ?e,
|
||||
"Failed to send shutdown signal"
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
info!(
|
||||
service.log,
|
||||
"Doppelganger detection service started";
|
||||
);
|
||||
info!("Doppelganger detection service started");
|
||||
|
||||
context.executor.spawn(
|
||||
async move {
|
||||
@@ -292,9 +340,8 @@ impl DoppelgangerService {
|
||||
.await
|
||||
{
|
||||
error!(
|
||||
service.log,
|
||||
"Error during doppelganger detection";
|
||||
"error" => ?e
|
||||
error = ?e,
|
||||
"Error during doppelganger detection"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -319,10 +366,9 @@ impl DoppelgangerService {
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
crit!(
|
||||
self.log,
|
||||
"Validator unknown to doppelganger service";
|
||||
"msg" => "preventing validator from performing duties",
|
||||
"pubkey" => ?validator
|
||||
msg = "preventing validator from performing duties",
|
||||
pubkey = ?validator,
|
||||
"Validator unknown to doppelganger service"
|
||||
);
|
||||
DoppelgangerStatus::UnknownToDoppelganger(validator)
|
||||
})
|
||||
@@ -332,18 +378,17 @@ impl DoppelgangerService {
|
||||
///
|
||||
/// Validators added during the genesis epoch will not have doppelganger protection applied to
|
||||
/// them.
|
||||
pub fn register_new_validator<T: SlotClock>(
|
||||
pub fn register_new_validator<E: EthSpec, T: SlotClock>(
|
||||
&self,
|
||||
validator: PublicKeyBytes,
|
||||
slot_clock: &T,
|
||||
slots_per_epoch: u64,
|
||||
) -> Result<(), String> {
|
||||
let current_epoch = slot_clock
|
||||
// If registering before genesis, use the genesis slot.
|
||||
.now_or_genesis()
|
||||
.ok_or_else(|| "Unable to read slot clock when registering validator".to_string())?
|
||||
.epoch(slots_per_epoch);
|
||||
let genesis_epoch = slot_clock.genesis_slot().epoch(slots_per_epoch);
|
||||
.epoch(E::slots_per_epoch());
|
||||
let genesis_epoch = slot_clock.genesis_slot().epoch(E::slots_per_epoch());
|
||||
|
||||
let remaining_epochs = if current_epoch <= genesis_epoch {
|
||||
// Disable doppelganger protection when the validator was initialized before genesis.
|
||||
@@ -485,11 +530,7 @@ impl DoppelgangerService {
|
||||
|
||||
// Resolve the index from the server response back to a public key.
|
||||
let Some(pubkey) = indices_map.get(&response.index) else {
|
||||
crit!(
|
||||
self.log,
|
||||
"Inconsistent indices map";
|
||||
"validator_index" => response.index,
|
||||
);
|
||||
crit!(validator_index = response.index, "Inconsistent indices map");
|
||||
// Skip this result if an inconsistency is detected.
|
||||
continue;
|
||||
};
|
||||
@@ -499,9 +540,8 @@ impl DoppelgangerService {
|
||||
state.next_check_epoch
|
||||
} else {
|
||||
crit!(
|
||||
self.log,
|
||||
"Inconsistent doppelganger state";
|
||||
"validator_pubkey" => ?pubkey,
|
||||
validator_pubkey = ?pubkey,
|
||||
"Inconsistent doppelganger state"
|
||||
);
|
||||
// Skip this result if an inconsistency is detected.
|
||||
continue;
|
||||
@@ -515,15 +555,14 @@ impl DoppelgangerService {
|
||||
let violators_exist = !violators.is_empty();
|
||||
if violators_exist {
|
||||
crit!(
|
||||
self.log,
|
||||
"Doppelganger(s) detected";
|
||||
"msg" => "A doppelganger occurs when two different validator clients run the \
|
||||
same public key. This validator client detected another instance of a local \
|
||||
validator on the network and is shutting down to prevent potential slashable \
|
||||
offences. Ensure that you are not running a duplicate or overlapping \
|
||||
validator client",
|
||||
"doppelganger_indices" => ?violators
|
||||
)
|
||||
msg = "A doppelganger occurs when two different validator clients run the \
|
||||
same public key. This validator client detected another instance of a local \
|
||||
validator on the network and is shutting down to prevent potential slashable \
|
||||
offences. Ensure that you are not running a duplicate or overlapping \
|
||||
validator client",
|
||||
doppelganger_indices = ?violators,
|
||||
"Doppelganger(s) detected"
|
||||
);
|
||||
}
|
||||
|
||||
// The concept of "epoch satisfaction" is that for some epoch `e` we are *satisfied* that
|
||||
@@ -598,19 +637,17 @@ impl DoppelgangerService {
|
||||
doppelganger_state.complete_detection_in_epoch(previous_epoch);
|
||||
|
||||
info!(
|
||||
self.log,
|
||||
"Found no doppelganger";
|
||||
"further_checks_remaining" => doppelganger_state.remaining_epochs,
|
||||
"epoch" => response.epoch,
|
||||
"validator_index" => response.index
|
||||
further_checks_remaining = doppelganger_state.remaining_epochs,
|
||||
epoch = %response.epoch,
|
||||
validator_index = response.index,
|
||||
"Found no doppelganger"
|
||||
);
|
||||
|
||||
if doppelganger_state.remaining_epochs == 0 {
|
||||
info!(
|
||||
self.log,
|
||||
"Doppelganger detection complete";
|
||||
"msg" => "starting validator",
|
||||
"validator_index" => response.index
|
||||
msg = "starting validator",
|
||||
validator_index = response.index,
|
||||
"Doppelganger detection complete"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -629,7 +666,6 @@ impl DoppelgangerService {
|
||||
mod test {
|
||||
use super::*;
|
||||
use futures::executor::block_on;
|
||||
use logging::test_logger;
|
||||
use slot_clock::TestingSlotClock;
|
||||
use std::future;
|
||||
use std::time::Duration;
|
||||
@@ -637,7 +673,6 @@ mod test {
|
||||
test_utils::{SeedableRng, TestRandom, XorShiftRng},
|
||||
MainnetEthSpec,
|
||||
};
|
||||
use validator_store::DoppelgangerStatus;
|
||||
|
||||
const DEFAULT_VALIDATORS: usize = 8;
|
||||
|
||||
@@ -674,13 +709,12 @@ mod test {
|
||||
fn build(self) -> TestScenario {
|
||||
let mut rng = XorShiftRng::from_seed([42; 16]);
|
||||
let slot_clock = TestingSlotClock::new(Slot::new(0), GENESIS_TIME, SLOT_DURATION);
|
||||
let log = test_logger();
|
||||
|
||||
TestScenario {
|
||||
validators: (0..self.validator_count)
|
||||
.map(|_| PublicKeyBytes::random_for_test(&mut rng))
|
||||
.collect(),
|
||||
doppelganger: DoppelgangerService::new(log),
|
||||
doppelganger: DoppelgangerService::default(),
|
||||
slot_clock,
|
||||
}
|
||||
}
|
||||
@@ -739,7 +773,7 @@ mod test {
|
||||
.expect("index should exist");
|
||||
|
||||
self.doppelganger
|
||||
.register_new_validator(pubkey, &self.slot_clock, E::slots_per_epoch())
|
||||
.register_new_validator::<E, _>(pubkey, &self.slot_clock)
|
||||
.unwrap();
|
||||
self.doppelganger
|
||||
.doppelganger_states
|
||||
|
||||
Reference in New Issue
Block a user