Split the VC into crates making it more modular (#6453)

* Starting to modularize the VC

* Revert changes to eth2

* More progress

* More progress

* Compiles

* Merge latest unstable and make it compile

* Fix some lints

* Tests compile

* Merge latest unstable

* Remove unnecessary deps

* Merge latest unstable

* Correct release tests

* Merge latest unstable

* Merge remote-tracking branch 'origin/unstable' into modularize-vc

* Merge branch 'unstable' into modularize-vc

* Revert unnecessary cargo lock changes

* Update validator_client/beacon_node_fallback/Cargo.toml

* Update validator_client/http_metrics/Cargo.toml

* Update validator_client/http_metrics/src/lib.rs

* Update validator_client/initialized_validators/Cargo.toml

* Update validator_client/signing_method/Cargo.toml

* Update validator_client/validator_metrics/Cargo.toml

* Update validator_client/validator_services/Cargo.toml

* Update validator_client/validator_store/Cargo.toml

* Update validator_client/validator_store/src/lib.rs

* Merge remote-tracking branch 'origin/unstable' into modularize-vc

* Fix format string

* Rename doppelganger trait

* Don't drop the tempdir

* Cargo fmt
This commit is contained in:
Age Manning
2024-11-08 12:01:46 +11:00
committed by GitHub
parent ae160ebf07
commit 8e95024945
59 changed files with 1021 additions and 554 deletions

View File

@@ -0,0 +1,727 @@
use crate::duties_service::{DutiesService, DutyAndProof};
use beacon_node_fallback::{ApiTopic, BeaconNodeFallback};
use environment::RuntimeContext;
use futures::future::join_all;
use slog::{crit, debug, error, info, trace, warn};
use slot_clock::SlotClock;
use std::collections::HashMap;
use std::ops::Deref;
use std::sync::Arc;
use tokio::time::{sleep, sleep_until, Duration, Instant};
use tree_hash::TreeHash;
use types::{Attestation, AttestationData, ChainSpec, CommitteeIndex, EthSpec, Slot};
use validator_store::{Error as ValidatorStoreError, ValidatorStore};
/// Builds an `AttestationService`.
#[derive(Default)]
pub struct AttestationServiceBuilder<T: SlotClock + 'static, E: EthSpec> {
duties_service: Option<Arc<DutiesService<T, E>>>,
validator_store: Option<Arc<ValidatorStore<T, E>>>,
slot_clock: Option<T>,
beacon_nodes: Option<Arc<BeaconNodeFallback<T, E>>>,
context: Option<RuntimeContext<E>>,
}
impl<T: SlotClock + 'static, E: EthSpec> AttestationServiceBuilder<T, E> {
pub fn new() -> Self {
Self {
duties_service: None,
validator_store: None,
slot_clock: None,
beacon_nodes: None,
context: None,
}
}
pub fn duties_service(mut self, service: Arc<DutiesService<T, E>>) -> Self {
self.duties_service = Some(service);
self
}
pub fn validator_store(mut self, store: Arc<ValidatorStore<T, E>>) -> Self {
self.validator_store = Some(store);
self
}
pub fn slot_clock(mut self, slot_clock: T) -> Self {
self.slot_clock = Some(slot_clock);
self
}
pub fn beacon_nodes(mut self, beacon_nodes: Arc<BeaconNodeFallback<T, E>>) -> Self {
self.beacon_nodes = Some(beacon_nodes);
self
}
pub fn runtime_context(mut self, context: RuntimeContext<E>) -> Self {
self.context = Some(context);
self
}
pub fn build(self) -> Result<AttestationService<T, E>, String> {
Ok(AttestationService {
inner: Arc::new(Inner {
duties_service: self
.duties_service
.ok_or("Cannot build AttestationService without duties_service")?,
validator_store: self
.validator_store
.ok_or("Cannot build AttestationService without validator_store")?,
slot_clock: self
.slot_clock
.ok_or("Cannot build AttestationService without slot_clock")?,
beacon_nodes: self
.beacon_nodes
.ok_or("Cannot build AttestationService without beacon_nodes")?,
context: self
.context
.ok_or("Cannot build AttestationService without runtime_context")?,
}),
})
}
}
/// Helper to minimise `Arc` usage.
pub struct Inner<T, E: EthSpec> {
duties_service: Arc<DutiesService<T, E>>,
validator_store: Arc<ValidatorStore<T, E>>,
slot_clock: T,
beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
context: RuntimeContext<E>,
}
/// Attempts to produce attestations for all known validators 1/3rd of the way through each slot.
///
/// If any validators are on the same committee, a single attestation will be downloaded and
/// returned to the beacon node. This attestation will have a signature from each of the
/// validators.
pub struct AttestationService<T, E: EthSpec> {
inner: Arc<Inner<T, E>>,
}
impl<T, E: EthSpec> Clone for AttestationService<T, E> {
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
}
}
}
impl<T, E: EthSpec> Deref for AttestationService<T, E> {
type Target = Inner<T, E>;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
/// Starts the service which periodically produces attestations.
pub fn start_update_service(self, spec: &ChainSpec) -> Result<(), String> {
let log = self.context.log().clone();
let slot_duration = Duration::from_secs(spec.seconds_per_slot);
let duration_to_next_slot = self
.slot_clock
.duration_to_next_slot()
.ok_or("Unable to determine duration to next slot")?;
info!(
log,
"Attestation production service started";
"next_update_millis" => duration_to_next_slot.as_millis()
);
let executor = self.context.executor.clone();
let interval_fut = async move {
loop {
if let Some(duration_to_next_slot) = self.slot_clock.duration_to_next_slot() {
sleep(duration_to_next_slot + slot_duration / 3).await;
let log = self.context.log();
if let Err(e) = self.spawn_attestation_tasks(slot_duration) {
crit!(
log,
"Failed to spawn attestation tasks";
"error" => e
)
} else {
trace!(
log,
"Spawned attestation tasks";
)
}
} else {
error!(log, "Failed to read slot clock");
// If we can't read the slot clock, just wait another slot.
sleep(slot_duration).await;
continue;
}
}
};
executor.spawn(interval_fut, "attestation_service");
Ok(())
}
/// For each each required attestation, spawn a new task that downloads, signs and uploads the
/// attestation to the beacon node.
fn spawn_attestation_tasks(&self, slot_duration: Duration) -> Result<(), String> {
let slot = self.slot_clock.now().ok_or("Failed to read slot clock")?;
let duration_to_next_slot = self
.slot_clock
.duration_to_next_slot()
.ok_or("Unable to determine duration to next slot")?;
// If a validator needs to publish an aggregate attestation, they must do so at 2/3
// through the slot. This delay triggers at this time
let aggregate_production_instant = Instant::now()
+ duration_to_next_slot
.checked_sub(slot_duration / 3)
.unwrap_or_else(|| Duration::from_secs(0));
let duties_by_committee_index: HashMap<CommitteeIndex, Vec<DutyAndProof>> = self
.duties_service
.attesters(slot)
.into_iter()
.fold(HashMap::new(), |mut map, duty_and_proof| {
map.entry(duty_and_proof.duty.committee_index)
.or_default()
.push(duty_and_proof);
map
});
// For each committee index for this slot:
//
// - Create and publish an `Attestation` for all required validators.
// - Create and publish `SignedAggregateAndProof` for all aggregating validators.
duties_by_committee_index
.into_iter()
.for_each(|(committee_index, validator_duties)| {
// Spawn a separate task for each attestation.
self.inner.context.executor.spawn_ignoring_error(
self.clone().publish_attestations_and_aggregates(
slot,
committee_index,
validator_duties,
aggregate_production_instant,
),
"attestation publish",
);
});
// Schedule pruning of the slashing protection database once all unaggregated
// attestations have (hopefully) been signed, i.e. at the same time as aggregate
// production.
self.spawn_slashing_protection_pruning_task(slot, aggregate_production_instant);
Ok(())
}
/// Performs the first step of the attesting process: downloading `Attestation` objects,
/// signing them and returning them to the validator.
///
/// https://github.com/ethereum/eth2.0-specs/blob/v0.12.1/specs/phase0/validator.md#attesting
///
/// ## Detail
///
/// The given `validator_duties` should already be filtered to only contain those that match
/// `slot` and `committee_index`. Critical errors will be logged if this is not the case.
async fn publish_attestations_and_aggregates(
self,
slot: Slot,
committee_index: CommitteeIndex,
validator_duties: Vec<DutyAndProof>,
aggregate_production_instant: Instant,
) -> Result<(), ()> {
let log = self.context.log();
let attestations_timer = validator_metrics::start_timer_vec(
&validator_metrics::ATTESTATION_SERVICE_TIMES,
&[validator_metrics::ATTESTATIONS],
);
// There's not need to produce `Attestation` or `SignedAggregateAndProof` if we do not have
// any validators for the given `slot` and `committee_index`.
if validator_duties.is_empty() {
return Ok(());
}
// Step 1.
//
// Download, sign and publish an `Attestation` for each validator.
let attestation_opt = self
.produce_and_publish_attestations(slot, committee_index, &validator_duties)
.await
.map_err(move |e| {
crit!(
log,
"Error during attestation routine";
"error" => format!("{:?}", e),
"committee_index" => committee_index,
"slot" => slot.as_u64(),
)
})?;
drop(attestations_timer);
// Step 2.
//
// If an attestation was produced, make an aggregate.
if let Some(attestation_data) = attestation_opt {
// First, wait until the `aggregation_production_instant` (2/3rds
// of the way though the slot). As verified in the
// `delay_triggers_when_in_the_past` test, this code will still run
// even if the instant has already elapsed.
sleep_until(aggregate_production_instant).await;
// Start the metrics timer *after* we've done the delay.
let _aggregates_timer = validator_metrics::start_timer_vec(
&validator_metrics::ATTESTATION_SERVICE_TIMES,
&[validator_metrics::AGGREGATES],
);
// Then download, sign and publish a `SignedAggregateAndProof` for each
// validator that is elected to aggregate for this `slot` and
// `committee_index`.
self.produce_and_publish_aggregates(
&attestation_data,
committee_index,
&validator_duties,
)
.await
.map_err(move |e| {
crit!(
log,
"Error during attestation routine";
"error" => format!("{:?}", e),
"committee_index" => committee_index,
"slot" => slot.as_u64(),
)
})?;
}
Ok(())
}
/// Performs the first step of the attesting process: downloading `Attestation` objects,
/// signing them and returning them to the validator.
///
/// https://github.com/ethereum/eth2.0-specs/blob/v0.12.1/specs/phase0/validator.md#attesting
///
/// ## Detail
///
/// The given `validator_duties` should already be filtered to only contain those that match
/// `slot` and `committee_index`. Critical errors will be logged if this is not the case.
///
/// Only one `Attestation` is downloaded from the BN. It is then cloned and signed by each
/// validator and the list of individually-signed `Attestation` objects is returned to the BN.
async fn produce_and_publish_attestations(
&self,
slot: Slot,
committee_index: CommitteeIndex,
validator_duties: &[DutyAndProof],
) -> Result<Option<AttestationData>, String> {
let log = self.context.log();
if validator_duties.is_empty() {
return Ok(None);
}
let current_epoch = self
.slot_clock
.now()
.ok_or("Unable to determine current slot from clock")?
.epoch(E::slots_per_epoch());
let attestation_data = self
.beacon_nodes
.first_success(|beacon_node| async move {
let _timer = validator_metrics::start_timer_vec(
&validator_metrics::ATTESTATION_SERVICE_TIMES,
&[validator_metrics::ATTESTATIONS_HTTP_GET],
);
beacon_node
.get_validator_attestation_data(slot, committee_index)
.await
.map_err(|e| format!("Failed to produce attestation data: {:?}", e))
.map(|result| result.data)
})
.await
.map_err(|e| e.to_string())?;
// Create futures to produce signed `Attestation` objects.
let attestation_data_ref = &attestation_data;
let signing_futures = validator_duties.iter().map(|duty_and_proof| async move {
let duty = &duty_and_proof.duty;
let attestation_data = attestation_data_ref;
// Ensure that the attestation matches the duties.
if !duty.match_attestation_data::<E>(attestation_data, &self.context.eth2_config.spec) {
crit!(
log,
"Inconsistent validator duties during signing";
"validator" => ?duty.pubkey,
"duty_slot" => duty.slot,
"attestation_slot" => attestation_data.slot,
"duty_index" => duty.committee_index,
"attestation_index" => attestation_data.index,
);
return None;
}
let mut attestation = match Attestation::<E>::empty_for_signing(
duty.committee_index,
duty.committee_length as usize,
attestation_data.slot,
attestation_data.beacon_block_root,
attestation_data.source,
attestation_data.target,
&self.context.eth2_config.spec,
) {
Ok(attestation) => attestation,
Err(err) => {
crit!(
log,
"Invalid validator duties during signing";
"validator" => ?duty.pubkey,
"duty" => ?duty,
"err" => ?err,
);
return None;
}
};
match self
.validator_store
.sign_attestation(
duty.pubkey,
duty.validator_committee_index as usize,
&mut attestation,
current_epoch,
)
.await
{
Ok(()) => Some((attestation, duty.validator_index)),
Err(ValidatorStoreError::UnknownPubkey(pubkey)) => {
// A pubkey can be missing when a validator was recently
// removed via the API.
warn!(
log,
"Missing pubkey for attestation";
"info" => "a validator may have recently been removed from this VC",
"pubkey" => ?pubkey,
"validator" => ?duty.pubkey,
"committee_index" => committee_index,
"slot" => slot.as_u64(),
);
None
}
Err(e) => {
crit!(
log,
"Failed to sign attestation";
"error" => ?e,
"validator" => ?duty.pubkey,
"committee_index" => committee_index,
"slot" => slot.as_u64(),
);
None
}
}
});
// Execute all the futures in parallel, collecting any successful results.
let (ref attestations, ref validator_indices): (Vec<_>, Vec<_>) = join_all(signing_futures)
.await
.into_iter()
.flatten()
.unzip();
if attestations.is_empty() {
warn!(log, "No attestations were published");
return Ok(None);
}
let fork_name = self
.context
.eth2_config
.spec
.fork_name_at_slot::<E>(attestation_data.slot);
// Post the attestations to the BN.
match self
.beacon_nodes
.request(ApiTopic::Attestations, |beacon_node| async move {
let _timer = validator_metrics::start_timer_vec(
&validator_metrics::ATTESTATION_SERVICE_TIMES,
&[validator_metrics::ATTESTATIONS_HTTP_POST],
);
if fork_name.electra_enabled() {
beacon_node
.post_beacon_pool_attestations_v2(attestations, fork_name)
.await
} else {
beacon_node
.post_beacon_pool_attestations_v1(attestations)
.await
}
})
.await
{
Ok(()) => info!(
log,
"Successfully published attestations";
"count" => attestations.len(),
"validator_indices" => ?validator_indices,
"head_block" => ?attestation_data.beacon_block_root,
"committee_index" => attestation_data.index,
"slot" => attestation_data.slot.as_u64(),
"type" => "unaggregated",
),
Err(e) => error!(
log,
"Unable to publish attestations";
"error" => %e,
"committee_index" => attestation_data.index,
"slot" => slot.as_u64(),
"type" => "unaggregated",
),
}
Ok(Some(attestation_data))
}
/// Performs the second step of the attesting process: downloading an aggregated `Attestation`,
/// converting it into a `SignedAggregateAndProof` and returning it to the BN.
///
/// https://github.com/ethereum/eth2.0-specs/blob/v0.12.1/specs/phase0/validator.md#broadcast-aggregate
///
/// ## Detail
///
/// The given `validator_duties` should already be filtered to only contain those that match
/// `slot` and `committee_index`. Critical errors will be logged if this is not the case.
///
/// Only one aggregated `Attestation` is downloaded from the BN. It is then cloned and signed
/// by each validator and the list of individually-signed `SignedAggregateAndProof` objects is
/// returned to the BN.
async fn produce_and_publish_aggregates(
&self,
attestation_data: &AttestationData,
committee_index: CommitteeIndex,
validator_duties: &[DutyAndProof],
) -> Result<(), String> {
let log = self.context.log();
if !validator_duties
.iter()
.any(|duty_and_proof| duty_and_proof.selection_proof.is_some())
{
// Exit early if no validator is aggregator
return Ok(());
}
let fork_name = self
.context
.eth2_config
.spec
.fork_name_at_slot::<E>(attestation_data.slot);
let aggregated_attestation = &self
.beacon_nodes
.first_success(|beacon_node| async move {
let _timer = validator_metrics::start_timer_vec(
&validator_metrics::ATTESTATION_SERVICE_TIMES,
&[validator_metrics::AGGREGATES_HTTP_GET],
);
if fork_name.electra_enabled() {
beacon_node
.get_validator_aggregate_attestation_v2(
attestation_data.slot,
attestation_data.tree_hash_root(),
committee_index,
)
.await
.map_err(|e| {
format!("Failed to produce an aggregate attestation: {:?}", e)
})?
.ok_or_else(|| format!("No aggregate available for {:?}", attestation_data))
.map(|result| result.data)
} else {
beacon_node
.get_validator_aggregate_attestation_v1(
attestation_data.slot,
attestation_data.tree_hash_root(),
)
.await
.map_err(|e| {
format!("Failed to produce an aggregate attestation: {:?}", e)
})?
.ok_or_else(|| format!("No aggregate available for {:?}", attestation_data))
.map(|result| result.data)
}
})
.await
.map_err(|e| e.to_string())?;
// Create futures to produce the signed aggregated attestations.
let signing_futures = validator_duties.iter().map(|duty_and_proof| async move {
let duty = &duty_and_proof.duty;
let selection_proof = duty_and_proof.selection_proof.as_ref()?;
if !duty.match_attestation_data::<E>(attestation_data, &self.context.eth2_config.spec) {
crit!(log, "Inconsistent validator duties during signing");
return None;
}
match self
.validator_store
.produce_signed_aggregate_and_proof(
duty.pubkey,
duty.validator_index,
aggregated_attestation.clone(),
selection_proof.clone(),
)
.await
{
Ok(aggregate) => Some(aggregate),
Err(ValidatorStoreError::UnknownPubkey(pubkey)) => {
// A pubkey can be missing when a validator was recently
// removed via the API.
debug!(
log,
"Missing pubkey for aggregate";
"pubkey" => ?pubkey,
);
None
}
Err(e) => {
crit!(
log,
"Failed to sign aggregate";
"error" => ?e,
"pubkey" => ?duty.pubkey,
);
None
}
}
});
// Execute all the futures in parallel, collecting any successful results.
let signed_aggregate_and_proofs = join_all(signing_futures)
.await
.into_iter()
.flatten()
.collect::<Vec<_>>();
if !signed_aggregate_and_proofs.is_empty() {
let signed_aggregate_and_proofs_slice = signed_aggregate_and_proofs.as_slice();
match self
.beacon_nodes
.first_success(|beacon_node| async move {
let _timer = validator_metrics::start_timer_vec(
&validator_metrics::ATTESTATION_SERVICE_TIMES,
&[validator_metrics::AGGREGATES_HTTP_POST],
);
if fork_name.electra_enabled() {
beacon_node
.post_validator_aggregate_and_proof_v2(
signed_aggregate_and_proofs_slice,
fork_name,
)
.await
} else {
beacon_node
.post_validator_aggregate_and_proof_v1(
signed_aggregate_and_proofs_slice,
)
.await
}
})
.await
{
Ok(()) => {
for signed_aggregate_and_proof in signed_aggregate_and_proofs {
let attestation = signed_aggregate_and_proof.message().aggregate();
info!(
log,
"Successfully published attestation";
"aggregator" => signed_aggregate_and_proof.message().aggregator_index(),
"signatures" => attestation.num_set_aggregation_bits(),
"head_block" => format!("{:?}", attestation.data().beacon_block_root),
"committee_index" => attestation.committee_index(),
"slot" => attestation.data().slot.as_u64(),
"type" => "aggregated",
);
}
}
Err(e) => {
for signed_aggregate_and_proof in signed_aggregate_and_proofs {
let attestation = &signed_aggregate_and_proof.message().aggregate();
crit!(
log,
"Failed to publish attestation";
"error" => %e,
"aggregator" => signed_aggregate_and_proof.message().aggregator_index(),
"committee_index" => attestation.committee_index(),
"slot" => attestation.data().slot.as_u64(),
"type" => "aggregated",
);
}
}
}
}
Ok(())
}
/// Spawn a blocking task to run the slashing protection pruning process.
///
/// Start the task at `pruning_instant` to avoid interference with other tasks.
fn spawn_slashing_protection_pruning_task(&self, slot: Slot, pruning_instant: Instant) {
let attestation_service = self.clone();
let executor = self.inner.context.executor.clone();
let current_epoch = slot.epoch(E::slots_per_epoch());
// Wait for `pruning_instant` in a regular task, and then switch to a blocking one.
self.inner.context.executor.spawn(
async move {
sleep_until(pruning_instant).await;
executor.spawn_blocking(
move || {
attestation_service
.validator_store
.prune_slashing_protection_db(current_epoch, false)
},
"slashing_protection_pruning",
)
},
"slashing_protection_pre_pruning",
);
}
}
#[cfg(test)]
mod tests {
use super::*;
use futures::future::FutureExt;
use parking_lot::RwLock;
/// This test is to ensure that a `tokio_timer::Sleep` with an instant in the past will still
/// trigger.
#[tokio::test]
async fn delay_triggers_when_in_the_past() {
let in_the_past = Instant::now() - Duration::from_secs(2);
let state_1 = Arc::new(RwLock::new(in_the_past));
let state_2 = state_1.clone();
sleep_until(in_the_past)
.map(move |()| *state_1.write() = Instant::now())
.await;
assert!(
*state_2.read() > in_the_past,
"state should have been updated"
);
}
}

View File

@@ -0,0 +1,689 @@
use beacon_node_fallback::{ApiTopic, BeaconNodeFallback, Error as FallbackError, Errors};
use bls::SignatureBytes;
use environment::RuntimeContext;
use eth2::types::{FullBlockContents, PublishBlockRequest};
use eth2::{BeaconNodeHttpClient, StatusCode};
use graffiti_file::{determine_graffiti, GraffitiFile};
use slog::{crit, debug, error, info, trace, warn, Logger};
use slot_clock::SlotClock;
use std::fmt::Debug;
use std::future::Future;
use std::ops::Deref;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc;
use types::{
BlindedBeaconBlock, BlockType, EthSpec, Graffiti, PublicKeyBytes, SignedBlindedBeaconBlock,
Slot,
};
use validator_store::{Error as ValidatorStoreError, ValidatorStore};
#[derive(Debug)]
pub enum BlockError {
/// A recoverable error that can be retried, as the validator has not signed anything.
Recoverable(String),
/// An irrecoverable error has occurred during block proposal and should not be retried, as a
/// block may have already been signed.
Irrecoverable(String),
}
impl From<Errors<BlockError>> for BlockError {
fn from(e: Errors<BlockError>) -> Self {
if e.0.iter().any(|(_, error)| {
matches!(
error,
FallbackError::RequestFailed(BlockError::Irrecoverable(_))
)
}) {
BlockError::Irrecoverable(e.to_string())
} else {
BlockError::Recoverable(e.to_string())
}
}
}
/// Builds a `BlockService`.
#[derive(Default)]
pub struct BlockServiceBuilder<T, E: EthSpec> {
validator_store: Option<Arc<ValidatorStore<T, E>>>,
slot_clock: Option<Arc<T>>,
beacon_nodes: Option<Arc<BeaconNodeFallback<T, E>>>,
proposer_nodes: Option<Arc<BeaconNodeFallback<T, E>>>,
context: Option<RuntimeContext<E>>,
graffiti: Option<Graffiti>,
graffiti_file: Option<GraffitiFile>,
}
impl<T: SlotClock + 'static, E: EthSpec> BlockServiceBuilder<T, E> {
pub fn new() -> Self {
Self {
validator_store: None,
slot_clock: None,
beacon_nodes: None,
proposer_nodes: None,
context: None,
graffiti: None,
graffiti_file: None,
}
}
pub fn validator_store(mut self, store: Arc<ValidatorStore<T, E>>) -> Self {
self.validator_store = Some(store);
self
}
pub fn slot_clock(mut self, slot_clock: T) -> Self {
self.slot_clock = Some(Arc::new(slot_clock));
self
}
pub fn beacon_nodes(mut self, beacon_nodes: Arc<BeaconNodeFallback<T, E>>) -> Self {
self.beacon_nodes = Some(beacon_nodes);
self
}
pub fn proposer_nodes(mut self, proposer_nodes: Arc<BeaconNodeFallback<T, E>>) -> Self {
self.proposer_nodes = Some(proposer_nodes);
self
}
pub fn runtime_context(mut self, context: RuntimeContext<E>) -> Self {
self.context = Some(context);
self
}
pub fn graffiti(mut self, graffiti: Option<Graffiti>) -> Self {
self.graffiti = graffiti;
self
}
pub fn graffiti_file(mut self, graffiti_file: Option<GraffitiFile>) -> Self {
self.graffiti_file = graffiti_file;
self
}
pub fn build(self) -> Result<BlockService<T, E>, String> {
Ok(BlockService {
inner: Arc::new(Inner {
validator_store: self
.validator_store
.ok_or("Cannot build BlockService without validator_store")?,
slot_clock: self
.slot_clock
.ok_or("Cannot build BlockService without slot_clock")?,
beacon_nodes: self
.beacon_nodes
.ok_or("Cannot build BlockService without beacon_node")?,
context: self
.context
.ok_or("Cannot build BlockService without runtime_context")?,
proposer_nodes: self.proposer_nodes,
graffiti: self.graffiti,
graffiti_file: self.graffiti_file,
}),
})
}
}
// Combines a set of non-block-proposing `beacon_nodes` and only-block-proposing
// `proposer_nodes`.
pub struct ProposerFallback<T, E: EthSpec> {
beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
proposer_nodes: Option<Arc<BeaconNodeFallback<T, E>>>,
}
impl<T: SlotClock, E: EthSpec> ProposerFallback<T, E> {
// Try `func` on `self.proposer_nodes` first. If that doesn't work, try `self.beacon_nodes`.
pub async fn request_proposers_first<F, Err, R>(&self, func: F) -> Result<(), Errors<Err>>
where
F: Fn(BeaconNodeHttpClient) -> R + Clone,
R: Future<Output = Result<(), Err>>,
Err: Debug,
{
// If there are proposer nodes, try calling `func` on them and return early if they are successful.
if let Some(proposer_nodes) = &self.proposer_nodes {
if proposer_nodes
.request(ApiTopic::Blocks, func.clone())
.await
.is_ok()
{
return Ok(());
}
}
// If the proposer nodes failed, try on the non-proposer nodes.
self.beacon_nodes.request(ApiTopic::Blocks, func).await
}
// Try `func` on `self.beacon_nodes` first. If that doesn't work, try `self.proposer_nodes`.
pub async fn request_proposers_last<F, O, Err, R>(&self, func: F) -> Result<O, Errors<Err>>
where
F: Fn(BeaconNodeHttpClient) -> R + Clone,
R: Future<Output = Result<O, Err>>,
Err: Debug,
{
// Try running `func` on the non-proposer beacon nodes.
let beacon_nodes_result = self.beacon_nodes.first_success(func.clone()).await;
match (beacon_nodes_result, &self.proposer_nodes) {
// The non-proposer node call succeed, return the result.
(Ok(success), _) => Ok(success),
// The non-proposer node call failed, but we don't have any proposer nodes. Return an error.
(Err(e), None) => Err(e),
// The non-proposer node call failed, try the same call on the proposer nodes.
(Err(_), Some(proposer_nodes)) => proposer_nodes.first_success(func).await,
}
}
}
/// Helper to minimise `Arc` usage.
pub struct Inner<T, E: EthSpec> {
validator_store: Arc<ValidatorStore<T, E>>,
slot_clock: Arc<T>,
pub beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
pub proposer_nodes: Option<Arc<BeaconNodeFallback<T, E>>>,
context: RuntimeContext<E>,
graffiti: Option<Graffiti>,
graffiti_file: Option<GraffitiFile>,
}
/// Attempts to produce attestations for any block producer(s) at the start of the epoch.
pub struct BlockService<T, E: EthSpec> {
inner: Arc<Inner<T, E>>,
}
impl<T, E: EthSpec> Clone for BlockService<T, E> {
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
}
}
}
impl<T, E: EthSpec> Deref for BlockService<T, E> {
type Target = Inner<T, E>;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
/// Notification from the duties service that we should try to produce a block.
pub struct BlockServiceNotification {
pub slot: Slot,
pub block_proposers: Vec<PublicKeyBytes>,
}
impl<T: SlotClock + 'static, E: EthSpec> BlockService<T, E> {
pub fn start_update_service(
self,
mut notification_rx: mpsc::Receiver<BlockServiceNotification>,
) -> Result<(), String> {
let log = self.context.log().clone();
info!(log, "Block production service started");
let executor = self.inner.context.executor.clone();
executor.spawn(
async move {
while let Some(notif) = notification_rx.recv().await {
self.do_update(notif).await.ok();
}
debug!(log, "Block service shutting down");
},
"block_service",
);
Ok(())
}
/// Attempt to produce a block for any block producers in the `ValidatorStore`.
async fn do_update(&self, notification: BlockServiceNotification) -> Result<(), ()> {
let log = self.context.log();
let _timer = validator_metrics::start_timer_vec(
&validator_metrics::BLOCK_SERVICE_TIMES,
&[validator_metrics::FULL_UPDATE],
);
let slot = self.slot_clock.now().ok_or_else(move || {
crit!(log, "Duties manager failed to read slot clock");
})?;
if notification.slot != slot {
warn!(
log,
"Skipping block production for expired slot";
"current_slot" => slot.as_u64(),
"notification_slot" => notification.slot.as_u64(),
"info" => "Your machine could be overloaded"
);
return Ok(());
}
if slot == self.context.eth2_config.spec.genesis_slot {
debug!(
log,
"Not producing block at genesis slot";
"proposers" => format!("{:?}", notification.block_proposers),
);
return Ok(());
}
trace!(
log,
"Block service update started";
"slot" => slot.as_u64()
);
let proposers = notification.block_proposers;
if proposers.is_empty() {
trace!(
log,
"No local block proposers for this slot";
"slot" => slot.as_u64()
)
} else if proposers.len() > 1 {
error!(
log,
"Multiple block proposers for this slot";
"action" => "producing blocks for all proposers",
"num_proposers" => proposers.len(),
"slot" => slot.as_u64(),
)
}
for validator_pubkey in proposers {
let builder_boost_factor = self.get_builder_boost_factor(&validator_pubkey);
let service = self.clone();
let log = log.clone();
self.inner.context.executor.spawn(
async move {
let result = service
.publish_block(slot, validator_pubkey, builder_boost_factor)
.await;
match result {
Ok(_) => {}
Err(BlockError::Recoverable(e)) | Err(BlockError::Irrecoverable(e)) => {
error!(
log,
"Error whilst producing block";
"error" => ?e,
"block_slot" => ?slot,
"info" => "block v3 proposal failed, this error may or may not result in a missed block"
);
}
}
},
"block service",
)
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
async fn sign_and_publish_block(
&self,
proposer_fallback: ProposerFallback<T, E>,
slot: Slot,
graffiti: Option<Graffiti>,
validator_pubkey: &PublicKeyBytes,
unsigned_block: UnsignedBlock<E>,
) -> Result<(), BlockError> {
let log = self.context.log();
let signing_timer = validator_metrics::start_timer(&validator_metrics::BLOCK_SIGNING_TIMES);
let res = match unsigned_block {
UnsignedBlock::Full(block_contents) => {
let (block, maybe_blobs) = block_contents.deconstruct();
self.validator_store
.sign_block(*validator_pubkey, block, slot)
.await
.map(|b| SignedBlock::Full(PublishBlockRequest::new(Arc::new(b), maybe_blobs)))
}
UnsignedBlock::Blinded(block) => self
.validator_store
.sign_block(*validator_pubkey, block, slot)
.await
.map(Arc::new)
.map(SignedBlock::Blinded),
};
let signed_block = match res {
Ok(block) => block,
Err(ValidatorStoreError::UnknownPubkey(pubkey)) => {
// A pubkey can be missing when a validator was recently removed
// via the API.
warn!(
log,
"Missing pubkey for block";
"info" => "a validator may have recently been removed from this VC",
"pubkey" => ?pubkey,
"slot" => ?slot
);
return Ok(());
}
Err(e) => {
return Err(BlockError::Recoverable(format!(
"Unable to sign block: {:?}",
e
)))
}
};
let signing_time_ms =
Duration::from_secs_f64(signing_timer.map_or(0.0, |t| t.stop_and_record())).as_millis();
info!(
log,
"Publishing signed block";
"slot" => slot.as_u64(),
"signing_time_ms" => signing_time_ms,
);
// Publish block with first available beacon node.
//
// Try the proposer nodes first, since we've likely gone to efforts to
// protect them from DoS attacks and they're most likely to successfully
// publish a block.
proposer_fallback
.request_proposers_first(|beacon_node| async {
self.publish_signed_block_contents(&signed_block, beacon_node)
.await
})
.await?;
info!(
log,
"Successfully published block";
"block_type" => ?signed_block.block_type(),
"deposits" => signed_block.num_deposits(),
"attestations" => signed_block.num_attestations(),
"graffiti" => ?graffiti.map(|g| g.as_utf8_lossy()),
"slot" => signed_block.slot().as_u64(),
);
Ok(())
}
async fn publish_block(
self,
slot: Slot,
validator_pubkey: PublicKeyBytes,
builder_boost_factor: Option<u64>,
) -> Result<(), BlockError> {
let log = self.context.log();
let _timer = validator_metrics::start_timer_vec(
&validator_metrics::BLOCK_SERVICE_TIMES,
&[validator_metrics::BEACON_BLOCK],
);
let randao_reveal = match self
.validator_store
.randao_reveal(validator_pubkey, slot.epoch(E::slots_per_epoch()))
.await
{
Ok(signature) => signature.into(),
Err(ValidatorStoreError::UnknownPubkey(pubkey)) => {
// A pubkey can be missing when a validator was recently removed
// via the API.
warn!(
log,
"Missing pubkey for block randao";
"info" => "a validator may have recently been removed from this VC",
"pubkey" => ?pubkey,
"slot" => ?slot
);
return Ok(());
}
Err(e) => {
return Err(BlockError::Recoverable(format!(
"Unable to produce randao reveal signature: {:?}",
e
)))
}
};
let graffiti = determine_graffiti(
&validator_pubkey,
log,
self.graffiti_file.clone(),
self.validator_store.graffiti(&validator_pubkey),
self.graffiti,
);
let randao_reveal_ref = &randao_reveal;
let self_ref = &self;
let proposer_index = self.validator_store.validator_index(&validator_pubkey);
let proposer_fallback = ProposerFallback {
beacon_nodes: self.beacon_nodes.clone(),
proposer_nodes: self.proposer_nodes.clone(),
};
info!(
log,
"Requesting unsigned block";
"slot" => slot.as_u64(),
);
// Request block from first responsive beacon node.
//
// Try the proposer nodes last, since it's likely that they don't have a
// great view of attestations on the network.
let unsigned_block = proposer_fallback
.request_proposers_last(|beacon_node| async move {
let _get_timer = validator_metrics::start_timer_vec(
&validator_metrics::BLOCK_SERVICE_TIMES,
&[validator_metrics::BEACON_BLOCK_HTTP_GET],
);
Self::get_validator_block(
&beacon_node,
slot,
randao_reveal_ref,
graffiti,
proposer_index,
builder_boost_factor,
log,
)
.await
.map_err(|e| {
BlockError::Recoverable(format!(
"Error from beacon node when producing block: {:?}",
e
))
})
})
.await?;
self_ref
.sign_and_publish_block(
proposer_fallback,
slot,
graffiti,
&validator_pubkey,
unsigned_block,
)
.await?;
Ok(())
}
async fn publish_signed_block_contents(
&self,
signed_block: &SignedBlock<E>,
beacon_node: BeaconNodeHttpClient,
) -> Result<(), BlockError> {
let log = self.context.log();
let slot = signed_block.slot();
match signed_block {
SignedBlock::Full(signed_block) => {
let _post_timer = validator_metrics::start_timer_vec(
&validator_metrics::BLOCK_SERVICE_TIMES,
&[validator_metrics::BEACON_BLOCK_HTTP_POST],
);
beacon_node
.post_beacon_blocks_v2_ssz(signed_block, None)
.await
.or_else(|e| handle_block_post_error(e, slot, log))?
}
SignedBlock::Blinded(signed_block) => {
let _post_timer = validator_metrics::start_timer_vec(
&validator_metrics::BLOCK_SERVICE_TIMES,
&[validator_metrics::BLINDED_BEACON_BLOCK_HTTP_POST],
);
beacon_node
.post_beacon_blinded_blocks_v2_ssz(signed_block, None)
.await
.or_else(|e| handle_block_post_error(e, slot, log))?
}
}
Ok::<_, BlockError>(())
}
async fn get_validator_block(
beacon_node: &BeaconNodeHttpClient,
slot: Slot,
randao_reveal_ref: &SignatureBytes,
graffiti: Option<Graffiti>,
proposer_index: Option<u64>,
builder_boost_factor: Option<u64>,
log: &Logger,
) -> Result<UnsignedBlock<E>, BlockError> {
let (block_response, _) = beacon_node
.get_validator_blocks_v3::<E>(
slot,
randao_reveal_ref,
graffiti.as_ref(),
builder_boost_factor,
)
.await
.map_err(|e| {
BlockError::Recoverable(format!(
"Error from beacon node when producing block: {:?}",
e
))
})?;
let unsigned_block = match block_response.data {
eth2::types::ProduceBlockV3Response::Full(block) => UnsignedBlock::Full(block),
eth2::types::ProduceBlockV3Response::Blinded(block) => UnsignedBlock::Blinded(block),
};
info!(
log,
"Received unsigned block";
"slot" => slot.as_u64(),
);
if proposer_index != Some(unsigned_block.proposer_index()) {
return Err(BlockError::Recoverable(
"Proposer index does not match block proposer. Beacon chain re-orged".to_string(),
));
}
Ok::<_, BlockError>(unsigned_block)
}
/// Returns the builder boost factor of the given public key.
/// The priority order for fetching this value is:
///
/// 1. validator_definitions.yml
/// 2. process level flag
fn get_builder_boost_factor(&self, validator_pubkey: &PublicKeyBytes) -> Option<u64> {
// Apply per validator configuration first.
let validator_builder_boost_factor = self
.validator_store
.determine_validator_builder_boost_factor(validator_pubkey);
// Fallback to process-wide configuration if needed.
let maybe_builder_boost_factor = validator_builder_boost_factor.or_else(|| {
self.validator_store
.determine_default_builder_boost_factor()
});
if let Some(builder_boost_factor) = maybe_builder_boost_factor {
// if builder boost factor is set to 100 it should be treated
// as None to prevent unnecessary calculations that could
// lead to loss of information.
if builder_boost_factor == 100 {
return None;
}
return Some(builder_boost_factor);
}
None
}
}
pub enum UnsignedBlock<E: EthSpec> {
Full(FullBlockContents<E>),
Blinded(BlindedBeaconBlock<E>),
}
impl<E: EthSpec> UnsignedBlock<E> {
pub fn proposer_index(&self) -> u64 {
match self {
UnsignedBlock::Full(block) => block.block().proposer_index(),
UnsignedBlock::Blinded(block) => block.proposer_index(),
}
}
}
#[derive(Debug)]
pub enum SignedBlock<E: EthSpec> {
Full(PublishBlockRequest<E>),
Blinded(Arc<SignedBlindedBeaconBlock<E>>),
}
impl<E: EthSpec> SignedBlock<E> {
pub fn block_type(&self) -> BlockType {
match self {
SignedBlock::Full(_) => BlockType::Full,
SignedBlock::Blinded(_) => BlockType::Blinded,
}
}
pub fn slot(&self) -> Slot {
match self {
SignedBlock::Full(block) => block.signed_block().message().slot(),
SignedBlock::Blinded(block) => block.message().slot(),
}
}
pub fn num_deposits(&self) -> usize {
match self {
SignedBlock::Full(block) => block.signed_block().message().body().deposits().len(),
SignedBlock::Blinded(block) => block.message().body().deposits().len(),
}
}
pub fn num_attestations(&self) -> usize {
match self {
SignedBlock::Full(block) => block.signed_block().message().body().attestations_len(),
SignedBlock::Blinded(block) => block.message().body().attestations_len(),
}
}
}
fn handle_block_post_error(err: eth2::Error, slot: Slot, log: &Logger) -> Result<(), BlockError> {
// Handle non-200 success codes.
if let Some(status) = err.status() {
if status == StatusCode::ACCEPTED {
info!(
log,
"Block is already known to BN or might be invalid";
"slot" => slot,
"status_code" => status.as_u16(),
);
return Ok(());
} else if status.is_success() {
debug!(
log,
"Block published with non-standard success code";
"slot" => slot,
"status_code" => status.as_u16(),
);
return Ok(());
}
}
Err(BlockError::Irrecoverable(format!(
"Error from beacon node when publishing block: {err:?}",
)))
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,6 @@
pub mod attestation_service;
pub mod block_service;
pub mod duties_service;
pub mod preparation_service;
pub mod sync;
pub mod sync_committee_service;

View File

@@ -0,0 +1,496 @@
use beacon_node_fallback::{ApiTopic, BeaconNodeFallback};
use bls::PublicKeyBytes;
use doppelganger_service::DoppelgangerStatus;
use environment::RuntimeContext;
use parking_lot::RwLock;
use slog::{debug, error, info, warn};
use slot_clock::SlotClock;
use std::collections::HashMap;
use std::hash::Hash;
use std::ops::Deref;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::time::{sleep, Duration};
use types::{
Address, ChainSpec, EthSpec, ProposerPreparationData, SignedValidatorRegistrationData,
ValidatorRegistrationData,
};
use validator_store::{Error as ValidatorStoreError, ProposalData, ValidatorStore};
/// Number of epochs before the Bellatrix hard fork to begin posting proposer preparations.
const PROPOSER_PREPARATION_LOOKAHEAD_EPOCHS: u64 = 2;
/// Number of epochs to wait before re-submitting validator registration.
const EPOCHS_PER_VALIDATOR_REGISTRATION_SUBMISSION: u64 = 1;
/// Builds an `PreparationService`.
#[derive(Default)]
pub struct PreparationServiceBuilder<T: SlotClock + 'static, E: EthSpec> {
validator_store: Option<Arc<ValidatorStore<T, E>>>,
slot_clock: Option<T>,
beacon_nodes: Option<Arc<BeaconNodeFallback<T, E>>>,
context: Option<RuntimeContext<E>>,
builder_registration_timestamp_override: Option<u64>,
validator_registration_batch_size: Option<usize>,
}
impl<T: SlotClock + 'static, E: EthSpec> PreparationServiceBuilder<T, E> {
pub fn new() -> Self {
Self {
validator_store: None,
slot_clock: None,
beacon_nodes: None,
context: None,
builder_registration_timestamp_override: None,
validator_registration_batch_size: None,
}
}
pub fn validator_store(mut self, store: Arc<ValidatorStore<T, E>>) -> Self {
self.validator_store = Some(store);
self
}
pub fn slot_clock(mut self, slot_clock: T) -> Self {
self.slot_clock = Some(slot_clock);
self
}
pub fn beacon_nodes(mut self, beacon_nodes: Arc<BeaconNodeFallback<T, E>>) -> Self {
self.beacon_nodes = Some(beacon_nodes);
self
}
pub fn runtime_context(mut self, context: RuntimeContext<E>) -> Self {
self.context = Some(context);
self
}
pub fn builder_registration_timestamp_override(
mut self,
builder_registration_timestamp_override: Option<u64>,
) -> Self {
self.builder_registration_timestamp_override = builder_registration_timestamp_override;
self
}
pub fn validator_registration_batch_size(
mut self,
validator_registration_batch_size: usize,
) -> Self {
self.validator_registration_batch_size = Some(validator_registration_batch_size);
self
}
pub fn build(self) -> Result<PreparationService<T, E>, String> {
Ok(PreparationService {
inner: Arc::new(Inner {
validator_store: self
.validator_store
.ok_or("Cannot build PreparationService without validator_store")?,
slot_clock: self
.slot_clock
.ok_or("Cannot build PreparationService without slot_clock")?,
beacon_nodes: self
.beacon_nodes
.ok_or("Cannot build PreparationService without beacon_nodes")?,
context: self
.context
.ok_or("Cannot build PreparationService without runtime_context")?,
builder_registration_timestamp_override: self
.builder_registration_timestamp_override,
validator_registration_batch_size: self.validator_registration_batch_size.ok_or(
"Cannot build PreparationService without validator_registration_batch_size",
)?,
validator_registration_cache: RwLock::new(HashMap::new()),
}),
})
}
}
/// Helper to minimise `Arc` usage.
pub struct Inner<T, E: EthSpec> {
validator_store: Arc<ValidatorStore<T, E>>,
slot_clock: T,
beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
context: RuntimeContext<E>,
builder_registration_timestamp_override: Option<u64>,
// Used to track unpublished validator registration changes.
validator_registration_cache:
RwLock<HashMap<ValidatorRegistrationKey, SignedValidatorRegistrationData>>,
validator_registration_batch_size: usize,
}
#[derive(Hash, Eq, PartialEq, Debug, Clone)]
pub struct ValidatorRegistrationKey {
pub fee_recipient: Address,
pub gas_limit: u64,
pub pubkey: PublicKeyBytes,
}
impl From<ValidatorRegistrationData> for ValidatorRegistrationKey {
fn from(data: ValidatorRegistrationData) -> Self {
let ValidatorRegistrationData {
fee_recipient,
gas_limit,
timestamp: _,
pubkey,
} = data;
Self {
fee_recipient,
gas_limit,
pubkey,
}
}
}
/// Attempts to produce proposer preparations for all known validators at the beginning of each epoch.
pub struct PreparationService<T, E: EthSpec> {
inner: Arc<Inner<T, E>>,
}
impl<T, E: EthSpec> Clone for PreparationService<T, E> {
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
}
}
}
impl<T, E: EthSpec> Deref for PreparationService<T, E> {
type Target = Inner<T, E>;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
impl<T: SlotClock + 'static, E: EthSpec> PreparationService<T, E> {
pub fn start_update_service(self, spec: &ChainSpec) -> Result<(), String> {
self.clone().start_validator_registration_service(spec)?;
self.start_proposer_prepare_service(spec)
}
/// Starts the service which periodically produces proposer preparations.
pub fn start_proposer_prepare_service(self, spec: &ChainSpec) -> Result<(), String> {
let log = self.context.log().clone();
let slot_duration = Duration::from_secs(spec.seconds_per_slot);
info!(
log,
"Proposer preparation service started";
);
let executor = self.context.executor.clone();
let spec = spec.clone();
let interval_fut = async move {
loop {
if self.should_publish_at_current_slot(&spec) {
// Poll the endpoint immediately to ensure fee recipients are received.
self.prepare_proposers_and_publish(&spec)
.await
.map_err(|e| {
error!(
log,
"Error during proposer preparation";
"error" => ?e,
)
})
.unwrap_or(());
}
if let Some(duration_to_next_slot) = self.slot_clock.duration_to_next_slot() {
sleep(duration_to_next_slot).await;
} else {
error!(log, "Failed to read slot clock");
// If we can't read the slot clock, just wait another slot.
sleep(slot_duration).await;
}
}
};
executor.spawn(interval_fut, "preparation_service");
Ok(())
}
/// Starts the service which periodically sends connected beacon nodes validator registration information.
pub fn start_validator_registration_service(self, spec: &ChainSpec) -> Result<(), String> {
let log = self.context.log().clone();
info!(
log,
"Validator registration service started";
);
let spec = spec.clone();
let slot_duration = Duration::from_secs(spec.seconds_per_slot);
let executor = self.context.executor.clone();
let validator_registration_fut = async move {
loop {
// Poll the endpoint immediately to ensure fee recipients are received.
if let Err(e) = self.register_validators().await {
error!(log,"Error during validator registration";"error" => ?e);
}
// Wait one slot if the register validator request fails or if we should not publish at the current slot.
if let Some(duration_to_next_slot) = self.slot_clock.duration_to_next_slot() {
sleep(duration_to_next_slot).await;
} else {
error!(log, "Failed to read slot clock");
// If we can't read the slot clock, just wait another slot.
sleep(slot_duration).await;
}
}
};
executor.spawn(validator_registration_fut, "validator_registration_service");
Ok(())
}
/// Return `true` if the current slot is close to or past the Bellatrix fork epoch.
///
/// This avoids spamming the BN with preparations before the Bellatrix fork epoch, which may
/// cause errors if it doesn't support the preparation API.
fn should_publish_at_current_slot(&self, spec: &ChainSpec) -> bool {
let current_epoch = self
.slot_clock
.now()
.map_or(E::genesis_epoch(), |slot| slot.epoch(E::slots_per_epoch()));
spec.bellatrix_fork_epoch.map_or(false, |fork_epoch| {
current_epoch + PROPOSER_PREPARATION_LOOKAHEAD_EPOCHS >= fork_epoch
})
}
/// Prepare proposer preparations and send to beacon node
async fn prepare_proposers_and_publish(&self, spec: &ChainSpec) -> Result<(), String> {
let preparation_data = self.collect_preparation_data(spec);
if !preparation_data.is_empty() {
self.publish_preparation_data(preparation_data).await?;
}
Ok(())
}
fn collect_preparation_data(&self, spec: &ChainSpec) -> Vec<ProposerPreparationData> {
let log = self.context.log();
self.collect_proposal_data(|pubkey, proposal_data| {
if let Some(fee_recipient) = proposal_data.fee_recipient {
Some(ProposerPreparationData {
// Ignore fee recipients for keys without indices, they are inactive.
validator_index: proposal_data.validator_index?,
fee_recipient,
})
} else {
if spec.bellatrix_fork_epoch.is_some() {
error!(
log,
"Validator is missing fee recipient";
"msg" => "update validator_definitions.yml",
"pubkey" => ?pubkey
);
}
None
}
})
}
fn collect_validator_registration_keys(&self) -> Vec<ValidatorRegistrationKey> {
self.collect_proposal_data(|pubkey, proposal_data| {
// Ignore fee recipients for keys without indices, they are inactive.
proposal_data.validator_index?;
// We don't log for missing fee recipients here because this will be logged more
// frequently in `collect_preparation_data`.
proposal_data.fee_recipient.and_then(|fee_recipient| {
proposal_data
.builder_proposals
.then_some(ValidatorRegistrationKey {
fee_recipient,
gas_limit: proposal_data.gas_limit,
pubkey,
})
})
})
}
fn collect_proposal_data<G, U>(&self, map_fn: G) -> Vec<U>
where
G: Fn(PublicKeyBytes, ProposalData) -> Option<U>,
{
let all_pubkeys: Vec<_> = self
.validator_store
.voting_pubkeys(DoppelgangerStatus::ignored);
all_pubkeys
.into_iter()
.filter_map(|pubkey| {
let proposal_data = self.validator_store.proposal_data(&pubkey)?;
map_fn(pubkey, proposal_data)
})
.collect()
}
async fn publish_preparation_data(
&self,
preparation_data: Vec<ProposerPreparationData>,
) -> Result<(), String> {
let log = self.context.log();
// Post the proposer preparations to the BN.
let preparation_data_len = preparation_data.len();
let preparation_entries = preparation_data.as_slice();
match self
.beacon_nodes
.request(ApiTopic::Subscriptions, |beacon_node| async move {
beacon_node
.post_validator_prepare_beacon_proposer(preparation_entries)
.await
})
.await
{
Ok(()) => debug!(
log,
"Published proposer preparation";
"count" => preparation_data_len,
),
Err(e) => error!(
log,
"Unable to publish proposer preparation to all beacon nodes";
"error" => %e,
),
}
Ok(())
}
/// Register validators with builders, used in the blinded block proposal flow.
async fn register_validators(&self) -> Result<(), String> {
let registration_keys = self.collect_validator_registration_keys();
let mut changed_keys = vec![];
// Need to scope this so the read lock is not held across an await point (I don't know why
// but the explicit `drop` is not enough).
{
let guard = self.validator_registration_cache.read();
for key in registration_keys.iter() {
if !guard.contains_key(key) {
changed_keys.push(key.clone());
}
}
drop(guard);
}
// Check if any have changed or it's been `EPOCHS_PER_VALIDATOR_REGISTRATION_SUBMISSION`.
if let Some(slot) = self.slot_clock.now() {
if slot % (E::slots_per_epoch() * EPOCHS_PER_VALIDATOR_REGISTRATION_SUBMISSION) == 0 {
self.publish_validator_registration_data(registration_keys)
.await?;
} else if !changed_keys.is_empty() {
self.publish_validator_registration_data(changed_keys)
.await?;
}
}
Ok(())
}
async fn publish_validator_registration_data(
&self,
registration_keys: Vec<ValidatorRegistrationKey>,
) -> Result<(), String> {
let log = self.context.log();
let registration_data_len = registration_keys.len();
let mut signed = Vec::with_capacity(registration_data_len);
for key in registration_keys {
let cached_registration_opt =
self.validator_registration_cache.read().get(&key).cloned();
let signed_data = if let Some(signed_data) = cached_registration_opt {
signed_data
} else {
let timestamp =
if let Some(timestamp) = self.builder_registration_timestamp_override {
timestamp
} else {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_err(|e| format!("{e:?}"))?
.as_secs()
};
let ValidatorRegistrationKey {
fee_recipient,
gas_limit,
pubkey,
} = key.clone();
let signed_data = match self
.validator_store
.sign_validator_registration_data(ValidatorRegistrationData {
fee_recipient,
gas_limit,
timestamp,
pubkey,
})
.await
{
Ok(data) => data,
Err(ValidatorStoreError::UnknownPubkey(pubkey)) => {
// A pubkey can be missing when a validator was recently
// removed via the API.
debug!(
log,
"Missing pubkey for registration data";
"pubkey" => ?pubkey,
);
continue;
}
Err(e) => {
error!(
log,
"Unable to sign validator registration data";
"error" => ?e,
"pubkey" => ?pubkey
);
continue;
}
};
self.validator_registration_cache
.write()
.insert(key, signed_data.clone());
signed_data
};
signed.push(signed_data);
}
if !signed.is_empty() {
for batch in signed.chunks(self.validator_registration_batch_size) {
match self
.beacon_nodes
.broadcast(|beacon_node| async move {
beacon_node.post_validator_register_validator(batch).await
})
.await
{
Ok(()) => info!(
log,
"Published validator registrations to the builder network";
"count" => batch.len(),
),
Err(e) => warn!(
log,
"Unable to publish validator registrations to the builder network";
"error" => %e,
),
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,649 @@
use crate::duties_service::{DutiesService, Error};
use doppelganger_service::DoppelgangerStatus;
use futures::future::join_all;
use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard, RwLockWriteGuard};
use slog::{crit, debug, info, warn};
use slot_clock::SlotClock;
use std::collections::{HashMap, HashSet};
use std::marker::PhantomData;
use std::sync::Arc;
use types::{ChainSpec, EthSpec, PublicKeyBytes, Slot, SyncDuty, SyncSelectionProof, SyncSubnetId};
use validator_store::Error as ValidatorStoreError;
/// Number of epochs in advance to compute selection proofs when not in `distributed` mode.
pub const AGGREGATION_PRE_COMPUTE_EPOCHS: u64 = 2;
/// Number of slots in advance to compute selection proofs when in `distributed` mode.
pub const AGGREGATION_PRE_COMPUTE_SLOTS_DISTRIBUTED: u64 = 1;
/// Top-level data-structure containing sync duty information.
///
/// This data is structured as a series of nested `HashMap`s wrapped in `RwLock`s. Fine-grained
/// locking is used to provide maximum concurrency for the different services reading and writing.
///
/// Deadlocks are prevented by:
///
/// 1. Hierarchical locking. It is impossible to lock an inner lock (e.g. `validators`) without
/// first locking its parent.
/// 2. One-at-a-time locking. For the innermost locks on the aggregator duties, all of the functions
/// in this file take care to only lock one validator at a time. We never hold a lock while
/// trying to obtain another one (hence no lock ordering issues).
pub struct SyncDutiesMap<E: EthSpec> {
/// Map from sync committee period to duties for members of that sync committee.
committees: RwLock<HashMap<u64, CommitteeDuties>>,
/// Whether we are in `distributed` mode and using reduced lookahead for aggregate pre-compute.
distributed: bool,
_phantom: PhantomData<E>,
}
/// Duties for a single sync committee period.
#[derive(Default)]
pub struct CommitteeDuties {
/// Map from validator index to validator duties.
///
/// A `None` value indicates that the validator index is known *not* to be a member of the sync
/// committee, while a `Some` indicates a known member. An absent value indicates that the
/// validator index was not part of the set of local validators when the duties were fetched.
/// This allows us to track changes to the set of local validators.
validators: RwLock<HashMap<u64, Option<ValidatorDuties>>>,
}
/// Duties for a single validator.
pub struct ValidatorDuties {
/// The sync duty: including validator sync committee indices & pubkey.
duty: SyncDuty,
/// The aggregator duties: cached selection proofs for upcoming epochs.
aggregation_duties: AggregatorDuties,
}
/// Aggregator duties for a single validator.
pub struct AggregatorDuties {
/// The slot up to which aggregation proofs have already been computed (inclusive).
pre_compute_slot: RwLock<Option<Slot>>,
/// Map from slot & subnet ID to proof that this validator is an aggregator.
///
/// The slot is the slot at which the signed contribution and proof should be broadcast,
/// which is 1 less than the slot for which the `duty` was computed.
proofs: RwLock<HashMap<(Slot, SyncSubnetId), SyncSelectionProof>>,
}
/// Duties for multiple validators, for a single slot.
///
/// This type is returned to the sync service.
pub struct SlotDuties {
/// List of duties for all sync committee members at this slot.
///
/// Note: this is intentionally NOT split by subnet so that we only sign
/// one `SyncCommitteeMessage` per validator (recall a validator may be part of multiple
/// subnets).
pub duties: Vec<SyncDuty>,
/// Map from subnet ID to validator index and selection proof of each aggregator.
pub aggregators: HashMap<SyncSubnetId, Vec<(u64, PublicKeyBytes, SyncSelectionProof)>>,
}
impl<E: EthSpec> SyncDutiesMap<E> {
pub fn new(distributed: bool) -> Self {
Self {
committees: RwLock::new(HashMap::new()),
distributed,
_phantom: PhantomData,
}
}
/// Check if duties are already known for all of the given validators for `committee_period`.
fn all_duties_known(&self, committee_period: u64, validator_indices: &[u64]) -> bool {
self.committees
.read()
.get(&committee_period)
.map_or(false, |committee_duties| {
let validator_duties = committee_duties.validators.read();
validator_indices
.iter()
.all(|index| validator_duties.contains_key(index))
})
}
/// Number of slots in advance to compute selection proofs
fn aggregation_pre_compute_slots(&self) -> u64 {
if self.distributed {
AGGREGATION_PRE_COMPUTE_SLOTS_DISTRIBUTED
} else {
E::slots_per_epoch() * AGGREGATION_PRE_COMPUTE_EPOCHS
}
}
/// Prepare for pre-computation of selection proofs for `committee_period`.
///
/// Return the slot up to which proofs should be pre-computed, as well as a vec of
/// `(previous_pre_compute_slot, sync_duty)` pairs for all validators which need to have proofs
/// computed. See `fill_in_aggregation_proofs` for the actual calculation.
fn prepare_for_aggregator_pre_compute(
&self,
committee_period: u64,
current_slot: Slot,
spec: &ChainSpec,
) -> (Slot, Vec<(Slot, SyncDuty)>) {
let default_start_slot = std::cmp::max(
current_slot,
first_slot_of_period::<E>(committee_period, spec),
);
let pre_compute_lookahead_slots = self.aggregation_pre_compute_slots();
let pre_compute_slot = std::cmp::min(
current_slot + pre_compute_lookahead_slots,
last_slot_of_period::<E>(committee_period, spec),
);
let pre_compute_duties = self.committees.read().get(&committee_period).map_or_else(
Vec::new,
|committee_duties| {
let validator_duties = committee_duties.validators.read();
validator_duties
.values()
.filter_map(|maybe_duty| {
let duty = maybe_duty.as_ref()?;
let old_pre_compute_slot = duty
.aggregation_duties
.pre_compute_slot
.write()
.replace(pre_compute_slot);
match old_pre_compute_slot {
// No proofs pre-computed previously, compute all from the start of
// the period or the current slot (whichever is later).
None => Some((default_start_slot, duty.duty.clone())),
// Proofs computed up to `prev`, start from the subsequent epoch.
Some(prev) if prev < pre_compute_slot => {
Some((prev + 1, duty.duty.clone()))
}
// Proofs already known, no need to compute.
_ => None,
}
})
.collect()
},
);
(pre_compute_slot, pre_compute_duties)
}
fn get_or_create_committee_duties<'a, 'b>(
&'a self,
committee_period: u64,
validator_indices: impl IntoIterator<Item = &'b u64>,
) -> MappedRwLockReadGuard<'a, CommitteeDuties> {
let mut committees_writer = self.committees.write();
committees_writer
.entry(committee_period)
.or_default()
.init(validator_indices);
// Return shared reference
RwLockReadGuard::map(
RwLockWriteGuard::downgrade(committees_writer),
|committees_reader| &committees_reader[&committee_period],
)
}
/// Get duties for all validators for the given `wall_clock_slot`.
///
/// This is the entry-point for the sync committee service.
pub fn get_duties_for_slot(
&self,
wall_clock_slot: Slot,
spec: &ChainSpec,
) -> Option<SlotDuties> {
// Sync duties lag their assigned slot by 1
let duty_slot = wall_clock_slot + 1;
let sync_committee_period = duty_slot
.epoch(E::slots_per_epoch())
.sync_committee_period(spec)
.ok()?;
let committees_reader = self.committees.read();
let committee_duties = committees_reader.get(&sync_committee_period)?;
let mut duties = vec![];
let mut aggregators = HashMap::new();
committee_duties
.validators
.read()
.values()
// Filter out non-members & failed subnet IDs.
.filter_map(|opt_duties| {
let duty = opt_duties.as_ref()?;
let subnet_ids = duty.duty.subnet_ids::<E>().ok()?;
Some((duty, subnet_ids))
})
// Add duties for members to the vec of all duties, and aggregators to the
// aggregators map.
.for_each(|(validator_duty, subnet_ids)| {
duties.push(validator_duty.duty.clone());
let proofs = validator_duty.aggregation_duties.proofs.read();
for subnet_id in subnet_ids {
if let Some(proof) = proofs.get(&(wall_clock_slot, subnet_id)) {
aggregators.entry(subnet_id).or_insert_with(Vec::new).push((
validator_duty.duty.validator_index,
validator_duty.duty.pubkey,
proof.clone(),
));
}
}
});
Some(SlotDuties {
duties,
aggregators,
})
}
/// Prune duties for past sync committee periods from the map.
fn prune(&self, current_sync_committee_period: u64) {
self.committees
.write()
.retain(|period, _| *period >= current_sync_committee_period)
}
}
impl CommitteeDuties {
fn init<'b>(&mut self, validator_indices: impl IntoIterator<Item = &'b u64>) {
validator_indices.into_iter().for_each(|validator_index| {
self.validators
.get_mut()
.entry(*validator_index)
.or_insert(None);
})
}
}
impl ValidatorDuties {
fn new(duty: SyncDuty) -> Self {
Self {
duty,
aggregation_duties: AggregatorDuties {
pre_compute_slot: RwLock::new(None),
proofs: RwLock::new(HashMap::new()),
},
}
}
}
/// Number of epochs to wait from the start of the period before actually fetching duties.
fn epoch_offset(spec: &ChainSpec) -> u64 {
spec.epochs_per_sync_committee_period.as_u64() / 2
}
fn first_slot_of_period<E: EthSpec>(sync_committee_period: u64, spec: &ChainSpec) -> Slot {
(spec.epochs_per_sync_committee_period * sync_committee_period).start_slot(E::slots_per_epoch())
}
fn last_slot_of_period<E: EthSpec>(sync_committee_period: u64, spec: &ChainSpec) -> Slot {
first_slot_of_period::<E>(sync_committee_period + 1, spec) - 1
}
pub async fn poll_sync_committee_duties<T: SlotClock + 'static, E: EthSpec>(
duties_service: &Arc<DutiesService<T, E>>,
) -> Result<(), Error> {
let sync_duties = &duties_service.sync_duties;
let spec = &duties_service.spec;
let current_slot = duties_service
.slot_clock
.now()
.ok_or(Error::UnableToReadSlotClock)?;
let current_epoch = current_slot.epoch(E::slots_per_epoch());
// If the Altair fork is yet to be activated, do not attempt to poll for duties.
if spec
.altair_fork_epoch
.map_or(true, |altair_epoch| current_epoch < altair_epoch)
{
return Ok(());
}
let current_sync_committee_period = current_epoch.sync_committee_period(spec)?;
let next_sync_committee_period = current_sync_committee_period + 1;
// Collect *all* pubkeys, even those undergoing doppelganger protection.
//
// Sync committee messages are not slashable and are currently excluded from doppelganger
// protection.
let local_pubkeys: HashSet<_> = duties_service
.validator_store
.voting_pubkeys(DoppelgangerStatus::ignored);
let local_indices = {
let mut local_indices = Vec::with_capacity(local_pubkeys.len());
let vals_ref = duties_service.validator_store.initialized_validators();
let vals = vals_ref.read();
for &pubkey in &local_pubkeys {
if let Some(validator_index) = vals.get_index(&pubkey) {
local_indices.push(validator_index)
}
}
local_indices
};
// If duties aren't known for the current period, poll for them.
if !sync_duties.all_duties_known(current_sync_committee_period, &local_indices) {
poll_sync_committee_duties_for_period(
duties_service,
&local_indices,
current_sync_committee_period,
)
.await?;
// Prune previous duties (we avoid doing this too often as it locks the whole map).
sync_duties.prune(current_sync_committee_period);
}
// Pre-compute aggregator selection proofs for the current period.
let (current_pre_compute_slot, new_pre_compute_duties) = sync_duties
.prepare_for_aggregator_pre_compute(current_sync_committee_period, current_slot, spec);
if !new_pre_compute_duties.is_empty() {
let sub_duties_service = duties_service.clone();
duties_service.context.executor.spawn(
async move {
fill_in_aggregation_proofs(
sub_duties_service,
&new_pre_compute_duties,
current_sync_committee_period,
current_slot,
current_pre_compute_slot,
)
.await
},
"duties_service_sync_selection_proofs",
);
}
// If we're past the point in the current period where we should determine duties for the next
// period and they are not yet known, then poll.
if current_epoch.as_u64() % spec.epochs_per_sync_committee_period.as_u64() >= epoch_offset(spec)
&& !sync_duties.all_duties_known(next_sync_committee_period, &local_indices)
{
poll_sync_committee_duties_for_period(
duties_service,
&local_indices,
next_sync_committee_period,
)
.await?;
// Prune (this is the main code path for updating duties, so we should almost always hit
// this prune).
sync_duties.prune(current_sync_committee_period);
}
// Pre-compute aggregator selection proofs for the next period.
let aggregate_pre_compute_lookahead_slots = sync_duties.aggregation_pre_compute_slots();
if (current_slot + aggregate_pre_compute_lookahead_slots)
.epoch(E::slots_per_epoch())
.sync_committee_period(spec)?
== next_sync_committee_period
{
let (pre_compute_slot, new_pre_compute_duties) = sync_duties
.prepare_for_aggregator_pre_compute(next_sync_committee_period, current_slot, spec);
if !new_pre_compute_duties.is_empty() {
let sub_duties_service = duties_service.clone();
duties_service.context.executor.spawn(
async move {
fill_in_aggregation_proofs(
sub_duties_service,
&new_pre_compute_duties,
next_sync_committee_period,
current_slot,
pre_compute_slot,
)
.await
},
"duties_service_sync_selection_proofs",
);
}
}
Ok(())
}
pub async fn poll_sync_committee_duties_for_period<T: SlotClock + 'static, E: EthSpec>(
duties_service: &Arc<DutiesService<T, E>>,
local_indices: &[u64],
sync_committee_period: u64,
) -> Result<(), Error> {
let spec = &duties_service.spec;
let log = duties_service.context.log();
// no local validators don't need to poll for sync committee
if local_indices.is_empty() {
debug!(
duties_service.context.log(),
"No validators, not polling for sync committee duties";
"sync_committee_period" => sync_committee_period,
);
return Ok(());
}
debug!(
log,
"Fetching sync committee duties";
"sync_committee_period" => sync_committee_period,
"num_validators" => local_indices.len(),
);
let period_start_epoch = spec.epochs_per_sync_committee_period * sync_committee_period;
let duties_response = duties_service
.beacon_nodes
.first_success(|beacon_node| async move {
let _timer = validator_metrics::start_timer_vec(
&validator_metrics::DUTIES_SERVICE_TIMES,
&[validator_metrics::VALIDATOR_DUTIES_SYNC_HTTP_POST],
);
beacon_node
.post_validator_duties_sync(period_start_epoch, local_indices)
.await
})
.await;
let duties = match duties_response {
Ok(res) => res.data,
Err(e) => {
warn!(
log,
"Failed to download sync committee duties";
"sync_committee_period" => sync_committee_period,
"error" => %e,
);
return Ok(());
}
};
debug!(log, "Fetched sync duties from BN"; "count" => duties.len());
// Add duties to map.
let committee_duties = duties_service
.sync_duties
.get_or_create_committee_duties(sync_committee_period, local_indices);
let mut validator_writer = committee_duties.validators.write();
for duty in duties {
let validator_duties = validator_writer
.get_mut(&duty.validator_index)
.ok_or(Error::SyncDutiesNotFound(duty.validator_index))?;
let updated = validator_duties.as_ref().map_or(true, |existing_duties| {
let updated_due_to_reorg = existing_duties.duty.validator_sync_committee_indices
!= duty.validator_sync_committee_indices;
if updated_due_to_reorg {
warn!(
log,
"Sync committee duties changed";
"message" => "this could be due to a really long re-org, or a bug"
);
}
updated_due_to_reorg
});
if updated {
info!(
log,
"Validator in sync committee";
"validator_index" => duty.validator_index,
"sync_committee_period" => sync_committee_period,
);
*validator_duties = Some(ValidatorDuties::new(duty));
}
}
Ok(())
}
pub async fn fill_in_aggregation_proofs<T: SlotClock + 'static, E: EthSpec>(
duties_service: Arc<DutiesService<T, E>>,
pre_compute_duties: &[(Slot, SyncDuty)],
sync_committee_period: u64,
current_slot: Slot,
pre_compute_slot: Slot,
) {
let log = duties_service.context.log();
debug!(
log,
"Calculating sync selection proofs";
"period" => sync_committee_period,
"current_slot" => current_slot,
"pre_compute_slot" => pre_compute_slot
);
// Generate selection proofs for each validator at each slot, one slot at a time.
for slot in (current_slot.as_u64()..=pre_compute_slot.as_u64()).map(Slot::new) {
let mut validator_proofs = vec![];
for (validator_start_slot, duty) in pre_compute_duties {
// Proofs are already known at this slot for this validator.
if slot < *validator_start_slot {
continue;
}
let subnet_ids = match duty.subnet_ids::<E>() {
Ok(subnet_ids) => subnet_ids,
Err(e) => {
crit!(
log,
"Arithmetic error computing subnet IDs";
"error" => ?e,
);
continue;
}
};
// Create futures to produce proofs.
let duties_service_ref = &duties_service;
let futures = subnet_ids.iter().map(|subnet_id| async move {
// Construct proof for prior slot.
let proof_slot = slot - 1;
let proof = match duties_service_ref
.validator_store
.produce_sync_selection_proof(&duty.pubkey, proof_slot, *subnet_id)
.await
{
Ok(proof) => proof,
Err(ValidatorStoreError::UnknownPubkey(pubkey)) => {
// A pubkey can be missing when a validator was recently
// removed via the API.
debug!(
log,
"Missing pubkey for sync selection proof";
"pubkey" => ?pubkey,
"pubkey" => ?duty.pubkey,
"slot" => proof_slot,
);
return None;
}
Err(e) => {
warn!(
log,
"Unable to sign selection proof";
"error" => ?e,
"pubkey" => ?duty.pubkey,
"slot" => proof_slot,
);
return None;
}
};
match proof.is_aggregator::<E>() {
Ok(true) => {
debug!(
log,
"Validator is sync aggregator";
"validator_index" => duty.validator_index,
"slot" => proof_slot,
"subnet_id" => %subnet_id,
);
Some(((proof_slot, *subnet_id), proof))
}
Ok(false) => None,
Err(e) => {
warn!(
log,
"Error determining is_aggregator";
"pubkey" => ?duty.pubkey,
"slot" => proof_slot,
"error" => ?e,
);
None
}
}
});
// Execute all the futures in parallel, collecting any successful results.
let proofs = join_all(futures)
.await
.into_iter()
.flatten()
.collect::<Vec<_>>();
validator_proofs.push((duty.validator_index, proofs));
}
// Add to global storage (we add regularly so the proofs can be used ASAP).
let sync_map = duties_service.sync_duties.committees.read();
let Some(committee_duties) = sync_map.get(&sync_committee_period) else {
debug!(
log,
"Missing sync duties";
"period" => sync_committee_period,
);
continue;
};
let validators = committee_duties.validators.read();
let num_validators_updated = validator_proofs.len();
for (validator_index, proofs) in validator_proofs {
if let Some(Some(duty)) = validators.get(&validator_index) {
duty.aggregation_duties.proofs.write().extend(proofs);
} else {
debug!(
log,
"Missing sync duty to update";
"validator_index" => validator_index,
"period" => sync_committee_period,
);
}
}
if num_validators_updated > 0 {
debug!(
log,
"Finished computing sync selection proofs";
"slot" => slot,
"updated_validators" => num_validators_updated,
);
}
}
}

View File

@@ -0,0 +1,624 @@
use crate::duties_service::DutiesService;
use beacon_node_fallback::{ApiTopic, BeaconNodeFallback};
use environment::RuntimeContext;
use eth2::types::BlockId;
use futures::future::join_all;
use futures::future::FutureExt;
use slog::{crit, debug, error, info, trace, warn};
use slot_clock::SlotClock;
use std::collections::HashMap;
use std::ops::Deref;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use tokio::time::{sleep, sleep_until, Duration, Instant};
use types::{
ChainSpec, EthSpec, Hash256, PublicKeyBytes, Slot, SyncCommitteeSubscription,
SyncContributionData, SyncDuty, SyncSelectionProof, SyncSubnetId,
};
use validator_store::{Error as ValidatorStoreError, ValidatorStore};
pub const SUBSCRIPTION_LOOKAHEAD_EPOCHS: u64 = 4;
pub struct SyncCommitteeService<T: SlotClock + 'static, E: EthSpec> {
inner: Arc<Inner<T, E>>,
}
impl<T: SlotClock + 'static, E: EthSpec> Clone for SyncCommitteeService<T, E> {
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
}
}
}
impl<T: SlotClock + 'static, E: EthSpec> Deref for SyncCommitteeService<T, E> {
type Target = Inner<T, E>;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
pub struct Inner<T: SlotClock + 'static, E: EthSpec> {
duties_service: Arc<DutiesService<T, E>>,
validator_store: Arc<ValidatorStore<T, E>>,
slot_clock: T,
beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
context: RuntimeContext<E>,
/// Boolean to track whether the service has posted subscriptions to the BN at least once.
///
/// This acts as a latch that fires once upon start-up, and then never again.
first_subscription_done: AtomicBool,
}
impl<T: SlotClock + 'static, E: EthSpec> SyncCommitteeService<T, E> {
pub fn new(
duties_service: Arc<DutiesService<T, E>>,
validator_store: Arc<ValidatorStore<T, E>>,
slot_clock: T,
beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
context: RuntimeContext<E>,
) -> Self {
Self {
inner: Arc::new(Inner {
duties_service,
validator_store,
slot_clock,
beacon_nodes,
context,
first_subscription_done: AtomicBool::new(false),
}),
}
}
/// Check if the Altair fork has been activated and therefore sync duties should be performed.
///
/// Slot clock errors are mapped to `false`.
fn altair_fork_activated(&self) -> bool {
self.duties_service
.spec
.altair_fork_epoch
.and_then(|fork_epoch| {
let current_epoch = self.slot_clock.now()?.epoch(E::slots_per_epoch());
Some(current_epoch >= fork_epoch)
})
.unwrap_or(false)
}
pub fn start_update_service(self, spec: &ChainSpec) -> Result<(), String> {
let log = self.context.log().clone();
let slot_duration = Duration::from_secs(spec.seconds_per_slot);
let duration_to_next_slot = self
.slot_clock
.duration_to_next_slot()
.ok_or("Unable to determine duration to next slot")?;
info!(
log,
"Sync committee service started";
"next_update_millis" => duration_to_next_slot.as_millis()
);
let executor = self.context.executor.clone();
let interval_fut = async move {
loop {
if let Some(duration_to_next_slot) = self.slot_clock.duration_to_next_slot() {
// Wait for contribution broadcast interval 1/3 of the way through the slot.
let log = self.context.log();
sleep(duration_to_next_slot + slot_duration / 3).await;
// Do nothing if the Altair fork has not yet occurred.
if !self.altair_fork_activated() {
continue;
}
if let Err(e) = self.spawn_contribution_tasks(slot_duration).await {
crit!(
log,
"Failed to spawn sync contribution tasks";
"error" => e
)
} else {
trace!(
log,
"Spawned sync contribution tasks";
)
}
// Do subscriptions for future slots/epochs.
self.spawn_subscription_tasks();
} else {
error!(log, "Failed to read slot clock");
// If we can't read the slot clock, just wait another slot.
sleep(slot_duration).await;
}
}
};
executor.spawn(interval_fut, "sync_committee_service");
Ok(())
}
async fn spawn_contribution_tasks(&self, slot_duration: Duration) -> Result<(), String> {
let log = self.context.log().clone();
let slot = self.slot_clock.now().ok_or("Failed to read slot clock")?;
let duration_to_next_slot = self
.slot_clock
.duration_to_next_slot()
.ok_or("Unable to determine duration to next slot")?;
// If a validator needs to publish a sync aggregate, they must do so at 2/3
// through the slot. This delay triggers at this time
let aggregate_production_instant = Instant::now()
+ duration_to_next_slot
.checked_sub(slot_duration / 3)
.unwrap_or_else(|| Duration::from_secs(0));
let Some(slot_duties) = self
.duties_service
.sync_duties
.get_duties_for_slot(slot, &self.duties_service.spec)
else {
debug!(log, "No duties known for slot {}", slot);
return Ok(());
};
if slot_duties.duties.is_empty() {
debug!(
log,
"No local validators in current sync committee";
"slot" => slot,
);
return Ok(());
}
// Fetch `block_root` with non optimistic execution for `SyncCommitteeContribution`.
let response = self
.beacon_nodes
.first_success(
|beacon_node| async move {
match beacon_node.get_beacon_blocks_root(BlockId::Head).await {
Ok(Some(block)) if block.execution_optimistic == Some(false) => {
Ok(block)
}
Ok(Some(_)) => {
Err(format!("To sign sync committee messages for slot {slot} a non-optimistic head block is required"))
}
Ok(None) => Err(format!("No block root found for slot {}", slot)),
Err(e) => Err(e.to_string()),
}
},
)
.await;
let block_root = match response {
Ok(block) => block.data.root,
Err(errs) => {
warn!(
log,
"Refusing to sign sync committee messages for an optimistic head block or \
a block head with unknown optimistic status";
"errors" => errs.to_string(),
"slot" => slot,
);
return Ok(());
}
};
// Spawn one task to publish all of the sync committee signatures.
let validator_duties = slot_duties.duties;
let service = self.clone();
self.inner.context.executor.spawn(
async move {
service
.publish_sync_committee_signatures(slot, block_root, validator_duties)
.map(|_| ())
.await
},
"sync_committee_signature_publish",
);
let aggregators = slot_duties.aggregators;
let service = self.clone();
self.inner.context.executor.spawn(
async move {
service
.publish_sync_committee_aggregates(
slot,
block_root,
aggregators,
aggregate_production_instant,
)
.map(|_| ())
.await
},
"sync_committee_aggregate_publish",
);
Ok(())
}
/// Publish sync committee signatures.
async fn publish_sync_committee_signatures(
&self,
slot: Slot,
beacon_block_root: Hash256,
validator_duties: Vec<SyncDuty>,
) -> Result<(), ()> {
let log = self.context.log();
// Create futures to produce sync committee signatures.
let signature_futures = validator_duties.iter().map(|duty| async move {
match self
.validator_store
.produce_sync_committee_signature(
slot,
beacon_block_root,
duty.validator_index,
&duty.pubkey,
)
.await
{
Ok(signature) => Some(signature),
Err(ValidatorStoreError::UnknownPubkey(pubkey)) => {
// A pubkey can be missing when a validator was recently
// removed via the API.
debug!(
log,
"Missing pubkey for sync committee signature";
"pubkey" => ?pubkey,
"validator_index" => duty.validator_index,
"slot" => slot,
);
None
}
Err(e) => {
crit!(
log,
"Failed to sign sync committee signature";
"validator_index" => duty.validator_index,
"slot" => slot,
"error" => ?e,
);
None
}
}
});
// Execute all the futures in parallel, collecting any successful results.
let committee_signatures = &join_all(signature_futures)
.await
.into_iter()
.flatten()
.collect::<Vec<_>>();
self.beacon_nodes
.request(ApiTopic::SyncCommittee, |beacon_node| async move {
beacon_node
.post_beacon_pool_sync_committee_signatures(committee_signatures)
.await
})
.await
.map_err(|e| {
error!(
log,
"Unable to publish sync committee messages";
"slot" => slot,
"error" => %e,
);
})?;
info!(
log,
"Successfully published sync committee messages";
"count" => committee_signatures.len(),
"head_block" => ?beacon_block_root,
"slot" => slot,
);
Ok(())
}
async fn publish_sync_committee_aggregates(
&self,
slot: Slot,
beacon_block_root: Hash256,
aggregators: HashMap<SyncSubnetId, Vec<(u64, PublicKeyBytes, SyncSelectionProof)>>,
aggregate_instant: Instant,
) {
for (subnet_id, subnet_aggregators) in aggregators {
let service = self.clone();
self.inner.context.executor.spawn(
async move {
service
.publish_sync_committee_aggregate_for_subnet(
slot,
beacon_block_root,
subnet_id,
subnet_aggregators,
aggregate_instant,
)
.map(|_| ())
.await
},
"sync_committee_aggregate_publish_subnet",
);
}
}
async fn publish_sync_committee_aggregate_for_subnet(
&self,
slot: Slot,
beacon_block_root: Hash256,
subnet_id: SyncSubnetId,
subnet_aggregators: Vec<(u64, PublicKeyBytes, SyncSelectionProof)>,
aggregate_instant: Instant,
) -> Result<(), ()> {
sleep_until(aggregate_instant).await;
let log = self.context.log();
let contribution = &self
.beacon_nodes
.first_success(|beacon_node| async move {
let sync_contribution_data = SyncContributionData {
slot,
beacon_block_root,
subcommittee_index: subnet_id.into(),
};
beacon_node
.get_validator_sync_committee_contribution::<E>(&sync_contribution_data)
.await
})
.await
.map_err(|e| {
crit!(
log,
"Failed to produce sync contribution";
"slot" => slot,
"beacon_block_root" => ?beacon_block_root,
"error" => %e,
)
})?
.ok_or_else(|| {
crit!(
log,
"No aggregate contribution found";
"slot" => slot,
"beacon_block_root" => ?beacon_block_root,
);
})?
.data;
// Create futures to produce signed contributions.
let signature_futures = subnet_aggregators.into_iter().map(
|(aggregator_index, aggregator_pk, selection_proof)| async move {
match self
.validator_store
.produce_signed_contribution_and_proof(
aggregator_index,
aggregator_pk,
contribution.clone(),
selection_proof,
)
.await
{
Ok(signed_contribution) => Some(signed_contribution),
Err(ValidatorStoreError::UnknownPubkey(pubkey)) => {
// A pubkey can be missing when a validator was recently
// removed via the API.
debug!(
log,
"Missing pubkey for sync contribution";
"pubkey" => ?pubkey,
"slot" => slot,
);
None
}
Err(e) => {
crit!(
log,
"Unable to sign sync committee contribution";
"slot" => slot,
"error" => ?e,
);
None
}
}
},
);
// Execute all the futures in parallel, collecting any successful results.
let signed_contributions = &join_all(signature_futures)
.await
.into_iter()
.flatten()
.collect::<Vec<_>>();
// Publish to the beacon node.
self.beacon_nodes
.first_success(|beacon_node| async move {
beacon_node
.post_validator_contribution_and_proofs(signed_contributions)
.await
})
.await
.map_err(|e| {
error!(
log,
"Unable to publish signed contributions and proofs";
"slot" => slot,
"error" => %e,
);
})?;
info!(
log,
"Successfully published sync contributions";
"subnet" => %subnet_id,
"beacon_block_root" => %beacon_block_root,
"num_signers" => contribution.aggregation_bits.num_set_bits(),
"slot" => slot,
);
Ok(())
}
fn spawn_subscription_tasks(&self) {
let service = self.clone();
let log = self.context.log().clone();
self.inner.context.executor.spawn(
async move {
service.publish_subscriptions().await.unwrap_or_else(|e| {
error!(
log,
"Error publishing subscriptions";
"error" => ?e,
)
});
},
"sync_committee_subscription_publish",
);
}
async fn publish_subscriptions(self) -> Result<(), String> {
let log = self.context.log().clone();
let spec = &self.duties_service.spec;
let slot = self.slot_clock.now().ok_or("Failed to read slot clock")?;
let mut duty_slots = vec![];
let mut all_succeeded = true;
// At the start of every epoch during the current period, re-post the subscriptions
// to the beacon node. This covers the case where the BN has forgotten the subscriptions
// due to a restart, or where the VC has switched to a fallback BN.
let current_period = sync_period_of_slot::<E>(slot, spec)?;
if !self.first_subscription_done.load(Ordering::Relaxed)
|| slot.as_u64() % E::slots_per_epoch() == 0
{
duty_slots.push((slot, current_period));
}
// Near the end of the current period, push subscriptions for the next period to the
// beacon node. We aggressively push every slot in the lead-up, as this is the main way
// that we want to ensure that the BN is subscribed (well in advance).
let lookahead_slot = slot + SUBSCRIPTION_LOOKAHEAD_EPOCHS * E::slots_per_epoch();
let lookahead_period = sync_period_of_slot::<E>(lookahead_slot, spec)?;
if lookahead_period > current_period {
duty_slots.push((lookahead_slot, lookahead_period));
}
if duty_slots.is_empty() {
return Ok(());
}
// Collect subscriptions.
let mut subscriptions = vec![];
for (duty_slot, sync_committee_period) in duty_slots {
debug!(
log,
"Fetching subscription duties";
"duty_slot" => duty_slot,
"current_slot" => slot,
);
match self
.duties_service
.sync_duties
.get_duties_for_slot(duty_slot, spec)
{
Some(duties) => subscriptions.extend(subscriptions_from_sync_duties(
duties.duties,
sync_committee_period,
spec,
)),
None => {
debug!(
log,
"No duties for subscription";
"slot" => duty_slot,
);
all_succeeded = false;
}
}
}
if subscriptions.is_empty() {
debug!(
log,
"No sync subscriptions to send";
"slot" => slot,
);
return Ok(());
}
// Post subscriptions to BN.
debug!(
log,
"Posting sync subscriptions to BN";
"count" => subscriptions.len(),
);
let subscriptions_slice = &subscriptions;
for subscription in subscriptions_slice {
debug!(
log,
"Subscription";
"validator_index" => subscription.validator_index,
"validator_sync_committee_indices" => ?subscription.sync_committee_indices,
"until_epoch" => subscription.until_epoch,
);
}
if let Err(e) = self
.beacon_nodes
.request(ApiTopic::Subscriptions, |beacon_node| async move {
beacon_node
.post_validator_sync_committee_subscriptions(subscriptions_slice)
.await
})
.await
{
error!(
log,
"Unable to post sync committee subscriptions";
"slot" => slot,
"error" => %e,
);
all_succeeded = false;
}
// Disable first-subscription latch once all duties have succeeded once.
if all_succeeded {
self.first_subscription_done.store(true, Ordering::Relaxed);
}
Ok(())
}
}
fn sync_period_of_slot<E: EthSpec>(slot: Slot, spec: &ChainSpec) -> Result<u64, String> {
slot.epoch(E::slots_per_epoch())
.sync_committee_period(spec)
.map_err(|e| format!("Error computing sync period: {:?}", e))
}
fn subscriptions_from_sync_duties(
duties: Vec<SyncDuty>,
sync_committee_period: u64,
spec: &ChainSpec,
) -> impl Iterator<Item = SyncCommitteeSubscription> {
let until_epoch = spec.epochs_per_sync_committee_period * (sync_committee_period + 1);
duties
.into_iter()
.map(move |duty| SyncCommitteeSubscription {
validator_index: duty.validator_index,
sync_committee_indices: duty.validator_sync_committee_indices,
until_epoch,
})
}