Avoid penalizing peers for delays during processing (#2894)

## Issue Addressed

NA

## Proposed Changes

We have observed occasions were under-resourced nodes will receive messages that were valid *at the time*, but later become invalidated due to long waits for a `BeaconProcessor` worker.

In this PR, we will check to see if the message was valid *at the time of receipt*. If it was initially valid but invalid now, we just ignore the message without penalizing the peer.

## Additional Info

NA
This commit is contained in:
Paul Hauner
2022-01-12 02:36:24 +00:00
parent b656007963
commit 61f60bdf03
4 changed files with 95 additions and 36 deletions

View File

@@ -2,9 +2,9 @@ use crate::{metrics, service::NetworkMessage, sync::SyncMessage};
use beacon_chain::store::Error;
use beacon_chain::{
attestation_verification::{Error as AttnError, VerifiedAttestation},
attestation_verification::{self, Error as AttnError, VerifiedAttestation},
observed_operations::ObservationOutcome,
sync_committee_verification::Error as SyncCommitteeError,
sync_committee_verification::{self, Error as SyncCommitteeError},
validator_monitor::get_block_delay_ms,
BeaconChainError, BeaconChainTypes, BlockError, ExecutionPayloadError, ForkChoiceError,
GossipVerifiedBlock,
@@ -19,7 +19,7 @@ use tokio::sync::mpsc;
use types::{
Attestation, AttesterSlashing, EthSpec, Hash256, IndexedAttestation, ProposerSlashing,
SignedAggregateAndProof, SignedBeaconBlock, SignedContributionAndProof, SignedVoluntaryExit,
SubnetId, SyncCommitteeMessage, SyncSubnetId,
Slot, SubnetId, SyncCommitteeMessage, SyncSubnetId,
};
use super::{
@@ -100,12 +100,7 @@ enum FailedAtt<T: EthSpec> {
impl<T: EthSpec> FailedAtt<T> {
pub fn beacon_block_root(&self) -> &Hash256 {
match self {
FailedAtt::Unaggregate { attestation, .. } => &attestation.data.beacon_block_root,
FailedAtt::Aggregate { attestation, .. } => {
&attestation.message.aggregate.data.beacon_block_root
}
}
&self.attestation().data.beacon_block_root
}
pub fn kind(&self) -> &'static str {
@@ -114,6 +109,13 @@ impl<T: EthSpec> FailedAtt<T> {
FailedAtt::Aggregate { .. } => "aggregated",
}
}
pub fn attestation(&self) -> &Attestation<T> {
match self {
FailedAtt::Unaggregate { attestation, .. } => attestation,
FailedAtt::Aggregate { attestation, .. } => &attestation.message.aggregate,
}
}
}
/// Items required to verify a batch of unaggregated gossip attestations.
@@ -410,6 +412,7 @@ impl<T: BeaconChainTypes> Worker<T> {
},
reprocess_tx,
error,
seen_timestamp,
);
}
}
@@ -608,6 +611,7 @@ impl<T: BeaconChainTypes> Worker<T> {
},
reprocess_tx,
error,
seen_timestamp,
);
}
}
@@ -1117,6 +1121,7 @@ impl<T: BeaconChainTypes> Worker<T> {
subnet_id: SyncSubnetId,
seen_timestamp: Duration,
) {
let message_slot = sync_signature.slot;
let sync_signature = match self
.chain
.verify_sync_committee_message_for_gossip(sync_signature, subnet_id)
@@ -1128,6 +1133,8 @@ impl<T: BeaconChainTypes> Worker<T> {
message_id,
"sync_signature",
e,
message_slot,
seen_timestamp,
);
return;
}
@@ -1177,6 +1184,7 @@ impl<T: BeaconChainTypes> Worker<T> {
sync_contribution: SignedContributionAndProof<T::EthSpec>,
seen_timestamp: Duration,
) {
let contribution_slot = sync_contribution.message.contribution.slot;
let sync_contribution = match self
.chain
.verify_sync_contribution_for_gossip(sync_contribution)
@@ -1189,6 +1197,8 @@ impl<T: BeaconChainTypes> Worker<T> {
message_id,
"sync_contribution",
e,
contribution_slot,
seen_timestamp,
);
return;
}
@@ -1232,6 +1242,7 @@ impl<T: BeaconChainTypes> Worker<T> {
failed_att: FailedAtt<T::EthSpec>,
reprocess_tx: Option<mpsc::Sender<ReprocessQueueMessage<T>>>,
error: AttnError,
seen_timestamp: Duration,
) {
let beacon_block_root = failed_att.beacon_block_root();
let attestation_type = failed_att.kind();
@@ -1239,8 +1250,7 @@ impl<T: BeaconChainTypes> Worker<T> {
match &error {
AttnError::FutureEpoch { .. }
| AttnError::PastEpoch { .. }
| AttnError::FutureSlot { .. }
| AttnError::PastSlot { .. } => {
| AttnError::FutureSlot { .. } => {
/*
* These errors can be triggered by a mismatch between our slot and the peer.
*
@@ -1262,6 +1272,24 @@ impl<T: BeaconChainTypes> Worker<T> {
// Do not propagate these messages.
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
}
AttnError::PastSlot { .. } => {
// Produce a slot clock frozen at the time we received the message from the
// network.
let seen_clock = &self.chain.slot_clock.freeze_at(seen_timestamp);
let hindsight_verification =
attestation_verification::verify_propagation_slot_range(
seen_clock,
failed_att.attestation(),
);
// Only penalize the peer if it would have been invalid at the moment we received
// it.
if hindsight_verification.is_err() {
self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError);
}
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
}
AttnError::InvalidSelectionProof { .. } | AttnError::InvalidSignature => {
/*
* These errors are caused by invalid signatures.
@@ -1625,6 +1653,8 @@ impl<T: BeaconChainTypes> Worker<T> {
message_id: MessageId,
message_type: &str,
error: SyncCommitteeError,
sync_committee_message_slot: Slot,
seen_timestamp: Duration,
) {
metrics::register_sync_committee_error(&error);
@@ -1650,10 +1680,7 @@ impl<T: BeaconChainTypes> Worker<T> {
// Do not propagate these messages.
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
}
SyncCommitteeError::PastSlot {
message_slot,
earliest_permissible_slot,
} => {
SyncCommitteeError::PastSlot { .. } => {
/*
* This error can be triggered by a mismatch between our slot and the peer.
*
@@ -1667,12 +1694,34 @@ impl<T: BeaconChainTypes> Worker<T> {
"type" => ?message_type,
);
// We tolerate messages that were just one slot late.
if *message_slot + 1 < *earliest_permissible_slot {
// Compute the slot when we received the message.
let received_slot = self
.chain
.slot_clock
.slot_of(seen_timestamp)
.unwrap_or_else(|| self.chain.slot_clock.genesis_slot());
// The message is "excessively" late if it was more than one slot late.
let excessively_late = received_slot > sync_committee_message_slot + 1;
// This closure will lazily produce a slot clock frozen at the time we received the
// message from the network and return a bool indicating if the message was invalid
// at the time of receipt too.
let invalid_in_hindsight = || {
let seen_clock = &self.chain.slot_clock.freeze_at(seen_timestamp);
let hindsight_verification =
sync_committee_verification::verify_propagation_slot_range(
seen_clock,
&sync_committee_message_slot,
);
hindsight_verification.is_err()
};
// Penalize the peer if the message was more than one slot late
if excessively_late && invalid_in_hindsight() {
self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError);
}
// Do not propagate these messages.
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
}
SyncCommitteeError::EmptyAggregationBitfield => {