Optimize validator duties (#2243)

## Issue Addressed

Closes #2052

## Proposed Changes

- Refactor the attester/proposer duties endpoints in the BN
    - Performance improvements
    - Fixes some potential inconsistencies with the dependent root fields.
    - Removes `http_api::beacon_proposer_cache` and just uses the one on the `BeaconChain` instead.
    - Move the code for the proposer/attester duties endpoints into separate files, for readability.
- Refactor the `DutiesService` in the VC
    - Required to reduce the delay on broadcasting new blocks.
    - Gets rid of the `ValidatorDuty` shim struct that came about when we adopted the standard API.
    - Separate block/attestation duty tasks so that they don't block each other when one is slow.
- In the VC, use `PublicKeyBytes` to represent validators instead of `PublicKey`. `PublicKey` is a legit crypto object whilst `PublicKeyBytes` is just a byte-array, it's much faster to clone/hash `PublicKeyBytes` and this change has had a significant impact on runtimes.
    - Unfortunately this has created lots of dust changes.
 - In the BN, store `PublicKeyBytes` in the `beacon_proposer_cache` and allow access to them. The HTTP API always sends `PublicKeyBytes` over the wire and the conversion from `PublicKey` -> `PublickeyBytes` is non-trivial, especially when queries have 100s/1000s of validators (like Pyrmont).
 - Add the `state_processing::state_advance` mod which dedups a lot of the "apply `n` skip slots to the state" code.
    - This also fixes a bug with some functions which were failing to include a state root as per [this comment](072695284f/consensus/state_processing/src/state_advance.rs (L69-L74)). I couldn't find any instance of this bug that resulted in anything more severe than keying a shuffling cache by the wrong block root.
 - Swap the VC block service to use `mpsc` from `tokio` instead of `futures`. This is consistent with the rest of the code base.
    
~~This PR *reduces* the size of the codebase 🎉~~ It *used* to reduce the size of the code base before I added more comments. 

## Observations on Prymont

- Proposer duties times down from peaks of 450ms to consistent <1ms.
- Current epoch attester duties times down from >1s peaks to a consistent 20-30ms.
- Block production down from +600ms to 100-200ms.

## Additional Info

- ~~Blocked on #2241~~
- ~~Blocked on #2234~~

## TODO

- [x] ~~Refactor this into some smaller PRs?~~ Leaving this as-is for now.
- [x] Address `per_slot_processing` roots.
- [x] Investigate slow next epoch times. Not getting added to cache on block processing?
- [x] Consider [this](072695284f/beacon_node/store/src/hot_cold_store.rs (L811-L812)) in the scenario of replacing the state roots


Co-authored-by: pawan <pawandhananjay@gmail.com>
Co-authored-by: Michael Sproul <michael@sigmaprime.io>
This commit is contained in:
Paul Hauner
2021-03-17 05:09:57 +00:00
parent 6a69b20be1
commit 015ab7d0a7
49 changed files with 2201 additions and 1833 deletions

View File

@@ -11,7 +11,7 @@ use slot_clock::SlotClock;
use std::collections::HashMap;
use std::ops::Deref;
use std::sync::Arc;
use tokio::time::{interval_at, sleep_until, Duration, Instant};
use tokio::time::{sleep, sleep_until, Duration, Instant};
use tree_hash::TreeHash;
use types::{
AggregateSignature, Attestation, AttestationData, BitList, ChainSpec, CommitteeIndex, EthSpec,
@@ -20,7 +20,7 @@ use types::{
/// Builds an `AttestationService`.
pub struct AttestationServiceBuilder<T, E: EthSpec> {
duties_service: Option<DutiesService<T, E>>,
duties_service: Option<Arc<DutiesService<T, E>>>,
validator_store: Option<ValidatorStore<T, E>>,
slot_clock: Option<T>,
beacon_nodes: Option<Arc<BeaconNodeFallback<T, E>>>,
@@ -38,7 +38,7 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationServiceBuilder<T, E> {
}
}
pub fn duties_service(mut self, service: DutiesService<T, E>) -> Self {
pub fn duties_service(mut self, service: Arc<DutiesService<T, E>>) -> Self {
self.duties_service = Some(service);
self
}
@@ -88,7 +88,7 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationServiceBuilder<T, E> {
/// Helper to minimise `Arc` usage.
pub struct Inner<T, E: EthSpec> {
duties_service: DutiesService<T, E>,
duties_service: Arc<DutiesService<T, E>>,
validator_store: ValidatorStore<T, E>,
slot_clock: T,
beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
@@ -137,32 +137,31 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
"next_update_millis" => duration_to_next_slot.as_millis()
);
let mut interval = {
// Note: `interval_at` panics if `slot_duration` is 0
interval_at(
Instant::now() + duration_to_next_slot + slot_duration / 3,
slot_duration,
)
};
let executor = self.context.executor.clone();
let interval_fut = async move {
loop {
interval.tick().await;
let log = self.context.log();
if let Some(duration_to_next_slot) = self.slot_clock.duration_to_next_slot() {
sleep(duration_to_next_slot + slot_duration / 3).await;
let log = self.context.log();
if let Err(e) = self.spawn_attestation_tasks(slot_duration) {
crit!(
log,
"Failed to spawn attestation tasks";
"error" => e
)
if let Err(e) = self.spawn_attestation_tasks(slot_duration) {
crit!(
log,
"Failed to spawn attestation tasks";
"error" => e
)
} else {
trace!(
log,
"Spawned attestation tasks";
)
}
} else {
trace!(
log,
"Spawned attestation tasks";
)
error!(log, "Failed to read slot clock");
// If we can't read the slot clock, just wait another slot.
sleep(slot_duration).await;
continue;
}
}
};
@@ -192,12 +191,9 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
.attesters(slot)
.into_iter()
.fold(HashMap::new(), |mut map, duty_and_proof| {
if let Some(committee_index) = duty_and_proof.duty.attestation_committee_index {
let validator_duties = map.entry(committee_index).or_insert_with(Vec::new);
validator_duties.push(duty_and_proof);
}
map.entry(duty_and_proof.duty.committee_index)
.or_insert_with(Vec::new)
.push(duty_and_proof);
map
});
@@ -355,43 +351,27 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
let mut attestations = Vec::with_capacity(validator_duties.len());
for duty in validator_duties {
// Ensure that all required fields are present in the validator duty.
let (
duty_slot,
duty_committee_index,
validator_committee_position,
_,
_,
committee_length,
) = if let Some(tuple) = duty.attestation_duties() {
tuple
} else {
crit!(
log,
"Missing validator duties when signing";
"duties" => format!("{:?}", duty)
);
continue;
};
for duty_and_proof in validator_duties {
let duty = &duty_and_proof.duty;
// Ensure that the attestation matches the duties.
if duty_slot != attestation_data.slot || duty_committee_index != attestation_data.index
#[allow(clippy::suspicious_operation_groupings)]
if duty.slot != attestation_data.slot || duty.committee_index != attestation_data.index
{
crit!(
log,
"Inconsistent validator duties during signing";
"validator" => format!("{:?}", duty.validator_pubkey()),
"duty_slot" => duty_slot,
"validator" => ?duty.pubkey,
"duty_slot" => duty.slot,
"attestation_slot" => attestation_data.slot,
"duty_index" => duty_committee_index,
"duty_index" => duty.committee_index,
"attestation_index" => attestation_data.index,
);
continue;
}
let mut attestation = Attestation {
aggregation_bits: BitList::with_capacity(committee_length as usize).unwrap(),
aggregation_bits: BitList::with_capacity(duty.committee_length as usize).unwrap(),
data: attestation_data.clone(),
signature: AggregateSignature::infinity(),
};
@@ -399,8 +379,8 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
if self
.validator_store
.sign_attestation(
duty.validator_pubkey(),
validator_committee_position,
&duty.pubkey,
duty.validator_committee_index as usize,
&mut attestation,
current_epoch,
)
@@ -490,6 +470,8 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
let mut signed_aggregate_and_proofs = Vec::new();
for duty_and_proof in validator_duties {
let duty = &duty_and_proof.duty;
let selection_proof = if let Some(proof) = duty_and_proof.selection_proof.as_ref() {
proof
} else {
@@ -497,26 +479,18 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
// subscribed aggregators.
continue;
};
let (duty_slot, duty_committee_index, _, validator_index, _, _) =
if let Some(tuple) = duty_and_proof.attestation_duties() {
tuple
} else {
crit!(log, "Missing duties when signing aggregate");
continue;
};
let pubkey = &duty_and_proof.duty.validator_pubkey;
let slot = attestation_data.slot;
let committee_index = attestation_data.index;
if duty_slot != slot || duty_committee_index != committee_index {
if duty.slot != slot || duty.committee_index != committee_index {
crit!(log, "Inconsistent validator duties during signing");
continue;
}
if let Some(aggregate) = self.validator_store.produce_signed_aggregate_and_proof(
pubkey,
validator_index,
&duty.pubkey,
duty.validator_index,
aggregated_attestation.clone(),
selection_proof.clone(),
) {