mirror of
https://github.com/sigp/lighthouse.git
synced 2026-05-02 04:03:35 +00:00
Fix race condition in VC block proposal service (#1282)
Closes #918 Closes #923
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
use crate::{is_synced::is_synced, validator_store::ValidatorStore};
|
||||
use crate::{
|
||||
block_service::BlockServiceNotification, is_synced::is_synced, validator_store::ValidatorStore,
|
||||
};
|
||||
use environment::RuntimeContext;
|
||||
use futures::StreamExt;
|
||||
use futures::channel::mpsc::Sender;
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use parking_lot::RwLock;
|
||||
use remote_beacon_node::{PublishStatus, RemoteBeaconNode};
|
||||
use rest_types::{ValidatorDuty, ValidatorDutyBytes, ValidatorSubscription};
|
||||
@@ -137,6 +140,8 @@ enum InsertOutcome {
|
||||
NewEpoch,
|
||||
/// The duties were identical to some already in the store.
|
||||
Identical,
|
||||
/// The duties informed us of new proposal slots but were otherwise identical.
|
||||
NewProposalSlots,
|
||||
/// There were duties for this validator and epoch in the store that were different to the ones
|
||||
/// provided. The existing duties were replaced.
|
||||
Replaced { should_resubscribe: bool },
|
||||
@@ -149,10 +154,10 @@ impl InsertOutcome {
|
||||
pub fn is_subscription_candidate(self) -> bool {
|
||||
match self {
|
||||
InsertOutcome::Replaced { should_resubscribe } => should_resubscribe,
|
||||
InsertOutcome::NewValidator => true,
|
||||
InsertOutcome::NewEpoch => true,
|
||||
InsertOutcome::Identical => false,
|
||||
InsertOutcome::Invalid => false,
|
||||
InsertOutcome::NewValidator | InsertOutcome::NewEpoch => true,
|
||||
InsertOutcome::Identical | InsertOutcome::Invalid | InsertOutcome::NewProposalSlots => {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -171,8 +176,14 @@ impl DutiesStore {
|
||||
.filter(|(_validator_pubkey, validator_map)| {
|
||||
validator_map
|
||||
.get(&epoch)
|
||||
.map(|duties| !duties.duty.block_proposal_slots.is_empty())
|
||||
.unwrap_or_else(|| false)
|
||||
.map(|duties| {
|
||||
duties
|
||||
.duty
|
||||
.block_proposal_slots
|
||||
.as_ref()
|
||||
.map_or(false, |proposal_slots| !proposal_slots.is_empty())
|
||||
})
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.count()
|
||||
}
|
||||
@@ -191,7 +202,7 @@ impl DutiesStore {
|
||||
.count()
|
||||
}
|
||||
|
||||
fn block_producers(&self, slot: Slot, slots_per_epoch: u64) -> Vec<PublicKey> {
|
||||
fn block_proposers(&self, slot: Slot, slots_per_epoch: u64) -> Vec<PublicKey> {
|
||||
self.store
|
||||
.read()
|
||||
.iter()
|
||||
@@ -201,7 +212,7 @@ impl DutiesStore {
|
||||
let epoch = slot.epoch(slots_per_epoch);
|
||||
|
||||
validator_map.get(&epoch).and_then(|duties| {
|
||||
if duties.duty.block_proposal_slots.contains(&slot) {
|
||||
if duties.duty.block_proposal_slots.as_ref()?.contains(&slot) {
|
||||
Some(duties.duty.validator_pubkey.clone())
|
||||
} else {
|
||||
None
|
||||
@@ -260,8 +271,15 @@ impl DutiesStore {
|
||||
|
||||
if let Some(validator_map) = store.get_mut(&duties.duty.validator_pubkey) {
|
||||
if let Some(known_duties) = validator_map.get_mut(&epoch) {
|
||||
if known_duties.duty == duties.duty {
|
||||
Ok(InsertOutcome::Identical)
|
||||
if known_duties.duty.eq_ignoring_proposal_slots(&duties.duty) {
|
||||
if known_duties.duty.block_proposal_slots == duties.duty.block_proposal_slots {
|
||||
Ok(InsertOutcome::Identical)
|
||||
} else if duties.duty.block_proposal_slots.is_some() {
|
||||
known_duties.duty.block_proposal_slots = duties.duty.block_proposal_slots;
|
||||
Ok(InsertOutcome::NewProposalSlots)
|
||||
} else {
|
||||
Ok(InsertOutcome::Invalid)
|
||||
}
|
||||
} else {
|
||||
// Compute the selection proof.
|
||||
duties.compute_selection_proof(validator_store)?;
|
||||
@@ -388,8 +406,9 @@ pub struct Inner<T, E: EthSpec> {
|
||||
|
||||
/// Maintains a store of the duties for all voting validators in the `validator_store`.
|
||||
///
|
||||
/// Polls the beacon node at the start of each epoch, collecting duties for the current and next
|
||||
/// epoch.
|
||||
/// Polls the beacon node at the start of each slot, collecting duties for the current and next
|
||||
/// epoch. The duties service notifies the block production service to run each time it completes,
|
||||
/// so it *must* be run every slot.
|
||||
pub struct DutiesService<T, E: EthSpec> {
|
||||
inner: Arc<Inner<T, E>>,
|
||||
}
|
||||
@@ -430,8 +449,8 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
|
||||
///
|
||||
/// It is possible that multiple validators have an identical proposal slot, however that is
|
||||
/// likely the result of heavy forking (lol) or inconsistent beacon node connections.
|
||||
pub fn block_producers(&self, slot: Slot) -> Vec<PublicKey> {
|
||||
self.store.block_producers(slot, E::slots_per_epoch())
|
||||
pub fn block_proposers(&self, slot: Slot) -> Vec<PublicKey> {
|
||||
self.store.block_proposers(slot, E::slots_per_epoch())
|
||||
}
|
||||
|
||||
/// Returns all `ValidatorDuty` for the given `slot`.
|
||||
@@ -440,7 +459,11 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
|
||||
}
|
||||
|
||||
/// Start the service that periodically polls the beacon node for validator duties.
|
||||
pub fn start_update_service(self, spec: &ChainSpec) -> Result<(), String> {
|
||||
pub fn start_update_service(
|
||||
self,
|
||||
mut block_service_tx: Sender<BlockServiceNotification>,
|
||||
spec: &ChainSpec,
|
||||
) -> Result<(), String> {
|
||||
let duration_to_next_slot = self
|
||||
.slot_clock
|
||||
.duration_to_next_slot()
|
||||
@@ -456,17 +479,19 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
|
||||
};
|
||||
|
||||
// Run an immediate update before starting the updater service.
|
||||
let duties_service = self.clone();
|
||||
let mut block_service_tx_clone = block_service_tx.clone();
|
||||
self.inner
|
||||
.context
|
||||
.executor
|
||||
.runtime_handle()
|
||||
.spawn(self.clone().do_update());
|
||||
.spawn(async move { duties_service.do_update(&mut block_service_tx_clone).await });
|
||||
|
||||
let executor = self.inner.context.executor.clone();
|
||||
|
||||
let interval_fut = async move {
|
||||
while interval.next().await.is_some() {
|
||||
self.clone().do_update().await.ok();
|
||||
self.clone().do_update(&mut block_service_tx).await;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -476,42 +501,40 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
|
||||
}
|
||||
|
||||
/// Attempt to download the duties of all managed validators for this epoch and the next.
|
||||
async fn do_update(self) -> Result<(), ()> {
|
||||
async fn do_update(self, block_service_tx: &mut Sender<BlockServiceNotification>) {
|
||||
let log = self.context.log();
|
||||
|
||||
if !is_synced(&self.beacon_node, &self.slot_clock, None).await
|
||||
&& !self.allow_unsynced_beacon_node
|
||||
{
|
||||
return Ok(());
|
||||
return;
|
||||
}
|
||||
|
||||
let current_epoch = self
|
||||
.slot_clock
|
||||
.now()
|
||||
.ok_or_else(|| {
|
||||
error!(log, "Duties manager failed to read slot clock");
|
||||
})
|
||||
.map(|slot| {
|
||||
let epoch = slot.epoch(E::slots_per_epoch());
|
||||
let slot = if let Some(slot) = self.slot_clock.now() {
|
||||
slot
|
||||
} else {
|
||||
error!(log, "Duties manager failed to read slot clock");
|
||||
return;
|
||||
};
|
||||
|
||||
if slot % E::slots_per_epoch() == 0 {
|
||||
let prune_below = epoch - PRUNE_DEPTH;
|
||||
let current_epoch = slot.epoch(E::slots_per_epoch());
|
||||
|
||||
trace!(
|
||||
log,
|
||||
"Pruning duties cache";
|
||||
"pruning_below" => prune_below.as_u64(),
|
||||
"current_epoch" => epoch.as_u64(),
|
||||
);
|
||||
if slot % E::slots_per_epoch() == 0 {
|
||||
let prune_below = current_epoch - PRUNE_DEPTH;
|
||||
|
||||
self.store.prune(prune_below);
|
||||
}
|
||||
trace!(
|
||||
log,
|
||||
"Pruning duties cache";
|
||||
"pruning_below" => prune_below.as_u64(),
|
||||
"current_epoch" => current_epoch.as_u64(),
|
||||
);
|
||||
|
||||
epoch
|
||||
})?;
|
||||
self.store.prune(prune_below);
|
||||
}
|
||||
|
||||
let result = self.clone().update_epoch(current_epoch).await;
|
||||
if let Err(e) = result {
|
||||
// Update duties for the current epoch, but keep running if there's an error:
|
||||
// block production or the next epoch update could still succeed.
|
||||
if let Err(e) = self.clone().update_epoch(current_epoch).await {
|
||||
error!(
|
||||
log,
|
||||
"Failed to get current epoch duties";
|
||||
@@ -519,18 +542,29 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
|
||||
);
|
||||
}
|
||||
|
||||
self.clone()
|
||||
.update_epoch(current_epoch + 1)
|
||||
// Notify the block service to produce a block.
|
||||
if let Err(e) = block_service_tx
|
||||
.send(BlockServiceNotification {
|
||||
slot,
|
||||
block_proposers: self.block_proposers(slot),
|
||||
})
|
||||
.await
|
||||
.map_err(move |e| {
|
||||
error!(
|
||||
log,
|
||||
"Failed to get next epoch duties";
|
||||
"http_error" => format!("{:?}", e)
|
||||
);
|
||||
})?;
|
||||
{
|
||||
error!(
|
||||
log,
|
||||
"Failed to notify block service";
|
||||
"error" => format!("{:?}", e)
|
||||
);
|
||||
};
|
||||
|
||||
Ok(())
|
||||
// Update duties for the next epoch.
|
||||
if let Err(e) = self.clone().update_epoch(current_epoch + 1).await {
|
||||
error!(
|
||||
log,
|
||||
"Failed to get next epoch duties";
|
||||
"http_error" => format!("{:?}", e)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempt to download the duties of all managed validators for the given `epoch`.
|
||||
@@ -548,6 +582,7 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
|
||||
|
||||
let mut new_validator = 0;
|
||||
let mut new_epoch = 0;
|
||||
let mut new_proposal_slots = 0;
|
||||
let mut identical = 0;
|
||||
let mut replaced = 0;
|
||||
let mut invalid = 0;
|
||||
@@ -596,6 +631,7 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
|
||||
);
|
||||
new_validator += 1;
|
||||
}
|
||||
InsertOutcome::NewProposalSlots => new_proposal_slots += 1,
|
||||
InsertOutcome::NewEpoch => new_epoch += 1,
|
||||
InsertOutcome::Identical => identical += 1,
|
||||
InsertOutcome::Replaced { .. } => replaced += 1,
|
||||
@@ -634,6 +670,7 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
|
||||
"Performed duties update";
|
||||
"identical" => identical,
|
||||
"new_epoch" => new_epoch,
|
||||
"new_proposal_slots" => new_proposal_slots,
|
||||
"new_validator" => new_validator,
|
||||
"replaced" => replaced,
|
||||
"epoch" => format!("{}", epoch)
|
||||
@@ -643,7 +680,8 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
|
||||
warn!(
|
||||
log,
|
||||
"Duties changed during routine update";
|
||||
"info" => "Chain re-org likely occurred"
|
||||
"info" => "Chain re-org likely occurred",
|
||||
"replaced" => replaced,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -688,8 +726,9 @@ fn duties_match_epoch(duties: &ValidatorDuty, epoch: Epoch, slots_per_epoch: u64
|
||||
duties
|
||||
.attestation_slot
|
||||
.map_or(true, |slot| slot.epoch(slots_per_epoch) == epoch)
|
||||
&& duties
|
||||
.block_proposal_slots
|
||||
.iter()
|
||||
.all(|slot| slot.epoch(slots_per_epoch) == epoch)
|
||||
&& duties.block_proposal_slots.as_ref().map_or(true, |slots| {
|
||||
slots
|
||||
.iter()
|
||||
.all(|slot| slot.epoch(slots_per_epoch) == epoch)
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user