mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-19 21:04:41 +00:00
Optimize validator duties (#2243)
## Issue Addressed Closes #2052 ## Proposed Changes - Refactor the attester/proposer duties endpoints in the BN - Performance improvements - Fixes some potential inconsistencies with the dependent root fields. - Removes `http_api::beacon_proposer_cache` and just uses the one on the `BeaconChain` instead. - Move the code for the proposer/attester duties endpoints into separate files, for readability. - Refactor the `DutiesService` in the VC - Required to reduce the delay on broadcasting new blocks. - Gets rid of the `ValidatorDuty` shim struct that came about when we adopted the standard API. - Separate block/attestation duty tasks so that they don't block each other when one is slow. - In the VC, use `PublicKeyBytes` to represent validators instead of `PublicKey`. `PublicKey` is a legit crypto object whilst `PublicKeyBytes` is just a byte-array, it's much faster to clone/hash `PublicKeyBytes` and this change has had a significant impact on runtimes. - Unfortunately this has created lots of dust changes. - In the BN, store `PublicKeyBytes` in the `beacon_proposer_cache` and allow access to them. The HTTP API always sends `PublicKeyBytes` over the wire and the conversion from `PublicKey` -> `PublickeyBytes` is non-trivial, especially when queries have 100s/1000s of validators (like Pyrmont). - Add the `state_processing::state_advance` mod which dedups a lot of the "apply `n` skip slots to the state" code. - This also fixes a bug with some functions which were failing to include a state root as per [this comment](072695284f/consensus/state_processing/src/state_advance.rs (L69-L74)). I couldn't find any instance of this bug that resulted in anything more severe than keying a shuffling cache by the wrong block root. - Swap the VC block service to use `mpsc` from `tokio` instead of `futures`. This is consistent with the rest of the code base. ~~This PR *reduces* the size of the codebase 🎉~~ It *used* to reduce the size of the code base before I added more comments. ## Observations on Prymont - Proposer duties times down from peaks of 450ms to consistent <1ms. - Current epoch attester duties times down from >1s peaks to a consistent 20-30ms. - Block production down from +600ms to 100-200ms. ## Additional Info - ~~Blocked on #2241~~ - ~~Blocked on #2234~~ ## TODO - [x] ~~Refactor this into some smaller PRs?~~ Leaving this as-is for now. - [x] Address `per_slot_processing` roots. - [x] Investigate slow next epoch times. Not getting added to cache on block processing? - [x] Consider [this](072695284f/beacon_node/store/src/hot_cold_store.rs (L811-L812)) in the scenario of replacing the state roots Co-authored-by: pawan <pawandhananjay@gmail.com> Co-authored-by: Michael Sproul <michael@sigmaprime.io>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
use crate::ProductionValidatorClient;
|
||||
use slog::{error, info};
|
||||
use crate::{DutiesService, ProductionValidatorClient};
|
||||
use slog::{error, info, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
use tokio::time::{interval_at, Duration, Instant};
|
||||
use tokio::time::{sleep, Duration};
|
||||
use types::EthSpec;
|
||||
|
||||
/// Spawns a notifier service which periodically logs information about the node.
|
||||
@@ -11,86 +11,19 @@ pub fn spawn_notifier<T: EthSpec>(client: &ProductionValidatorClient<T>) -> Resu
|
||||
let duties_service = client.duties_service.clone();
|
||||
|
||||
let slot_duration = Duration::from_secs(context.eth2_config.spec.seconds_per_slot);
|
||||
let duration_to_next_slot = duties_service
|
||||
.slot_clock
|
||||
.duration_to_next_slot()
|
||||
.ok_or("slot_notifier unable to determine time to next slot")?;
|
||||
|
||||
// Run the notifier half way through each slot.
|
||||
let start_instant = Instant::now() + duration_to_next_slot + (slot_duration / 2);
|
||||
let mut interval = interval_at(start_instant, slot_duration);
|
||||
|
||||
let interval_fut = async move {
|
||||
let log = context.log();
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
let num_available = duties_service.beacon_nodes.num_available().await;
|
||||
let num_synced = duties_service.beacon_nodes.num_synced().await;
|
||||
let num_total = duties_service.beacon_nodes.num_total().await;
|
||||
if num_synced > 0 {
|
||||
info!(
|
||||
log,
|
||||
"Connected to beacon node(s)";
|
||||
"total" => num_total,
|
||||
"available" => num_available,
|
||||
"synced" => num_synced,
|
||||
)
|
||||
if let Some(duration_to_next_slot) = duties_service.slot_clock.duration_to_next_slot() {
|
||||
sleep(duration_to_next_slot + slot_duration / 2).await;
|
||||
notify(&duties_service, &log).await;
|
||||
} else {
|
||||
error!(
|
||||
log,
|
||||
"No synced beacon nodes";
|
||||
"total" => num_total,
|
||||
"available" => num_available,
|
||||
"synced" => num_synced,
|
||||
)
|
||||
}
|
||||
|
||||
if let Some(slot) = duties_service.slot_clock.now() {
|
||||
let epoch = slot.epoch(T::slots_per_epoch());
|
||||
|
||||
let total_validators = duties_service.total_validator_count();
|
||||
let proposing_validators = duties_service.proposer_count(epoch);
|
||||
let attesting_validators = duties_service.attester_count(epoch);
|
||||
|
||||
if total_validators == 0 {
|
||||
info!(
|
||||
log,
|
||||
"No validators present";
|
||||
"msg" => "see `lighthouse account validator create --help` \
|
||||
or the HTTP API documentation"
|
||||
)
|
||||
} else if total_validators == attesting_validators {
|
||||
info!(
|
||||
log,
|
||||
"All validators active";
|
||||
"proposers" => proposing_validators,
|
||||
"active_validators" => attesting_validators,
|
||||
"total_validators" => total_validators,
|
||||
"epoch" => format!("{}", epoch),
|
||||
"slot" => format!("{}", slot),
|
||||
);
|
||||
} else if attesting_validators > 0 {
|
||||
info!(
|
||||
log,
|
||||
"Some validators active";
|
||||
"proposers" => proposing_validators,
|
||||
"active_validators" => attesting_validators,
|
||||
"total_validators" => total_validators,
|
||||
"epoch" => format!("{}", epoch),
|
||||
"slot" => format!("{}", slot),
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
log,
|
||||
"Awaiting activation";
|
||||
"validators" => total_validators,
|
||||
"epoch" => format!("{}", epoch),
|
||||
"slot" => format!("{}", slot),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
error!(log, "Unable to read slot clock");
|
||||
error!(log, "Failed to read slot clock");
|
||||
// If we can't read the slot clock, just wait another slot.
|
||||
sleep(slot_duration).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -98,3 +31,77 @@ pub fn spawn_notifier<T: EthSpec>(client: &ProductionValidatorClient<T>) -> Resu
|
||||
executor.spawn(interval_fut, "validator_notifier");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Performs a single notification routine.
|
||||
async fn notify<T: SlotClock + 'static, E: EthSpec>(
|
||||
duties_service: &DutiesService<T, E>,
|
||||
log: &Logger,
|
||||
) {
|
||||
let num_available = duties_service.beacon_nodes.num_available().await;
|
||||
let num_synced = duties_service.beacon_nodes.num_synced().await;
|
||||
let num_total = duties_service.beacon_nodes.num_total().await;
|
||||
if num_synced > 0 {
|
||||
info!(
|
||||
log,
|
||||
"Connected to beacon node(s)";
|
||||
"total" => num_total,
|
||||
"available" => num_available,
|
||||
"synced" => num_synced,
|
||||
)
|
||||
} else {
|
||||
error!(
|
||||
log,
|
||||
"No synced beacon nodes";
|
||||
"total" => num_total,
|
||||
"available" => num_available,
|
||||
"synced" => num_synced,
|
||||
)
|
||||
}
|
||||
|
||||
if let Some(slot) = duties_service.slot_clock.now() {
|
||||
let epoch = slot.epoch(E::slots_per_epoch());
|
||||
|
||||
let total_validators = duties_service.total_validator_count();
|
||||
let proposing_validators = duties_service.proposer_count(epoch);
|
||||
let attesting_validators = duties_service.attester_count(epoch);
|
||||
|
||||
if total_validators == 0 {
|
||||
info!(
|
||||
log,
|
||||
"No validators present";
|
||||
"msg" => "see `lighthouse account validator create --help` \
|
||||
or the HTTP API documentation"
|
||||
)
|
||||
} else if total_validators == attesting_validators {
|
||||
info!(
|
||||
log,
|
||||
"All validators active";
|
||||
"proposers" => proposing_validators,
|
||||
"active_validators" => attesting_validators,
|
||||
"total_validators" => total_validators,
|
||||
"epoch" => format!("{}", epoch),
|
||||
"slot" => format!("{}", slot),
|
||||
);
|
||||
} else if attesting_validators > 0 {
|
||||
info!(
|
||||
log,
|
||||
"Some validators active";
|
||||
"proposers" => proposing_validators,
|
||||
"active_validators" => attesting_validators,
|
||||
"total_validators" => total_validators,
|
||||
"epoch" => format!("{}", epoch),
|
||||
"slot" => format!("{}", slot),
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
log,
|
||||
"Awaiting activation";
|
||||
"validators" => total_validators,
|
||||
"epoch" => format!("{}", epoch),
|
||||
"slot" => format!("{}", slot),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
error!(log, "Unable to read slot clock");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user