mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-15 19:02:42 +00:00
More metrics + RPC tweaks (#2041)
## Issue Addressed
NA
## Proposed Changes
This was mostly done to find the reason why LH was dropping peers from Nimbus. It proved to be useful so I think it's worth it. But there is also some functional stuff here
- Add metrics for rpc errors per client, error type and direction
- Add metrics for downscoring events per source type, client and penalty type
- Add metrics for gossip validation results per client for non-accepted messages
- Make the RPC handler return errors and requests/responses in the order we see them
- Allow a small burst for the Ping rate limit, from 1 every 5 seconds to 2 every 10 seconds
- Send rate limiting errors with a particular code and use that same code to identify them. I picked something different to 128 since that is most likely what other clients are using for their own errors
- Remove some unused code in the `PeerAction` and the rpc handler
- Remove the unused variant `RateLimited`. tTis was never produced directly, since the only way to get the request's protocol is via de handler. The handler upon receiving from LH a response with an error (rate limited in this case) emits this event with the missing info (It was always like this, just pointing out that we do downscore rate limiting errors regardless of the change)
Metrics for Nimbus looked like this:
Downscoring events: `increase(libp2p_peer_actions_per_client{client="Nimbus"}[5m])`

RPC Errors: `increase(libp2p_rpc_errors_per_client{client="Nimbus"}[5m])`

Unaccepted gossip message: `increase(gossipsub_unaccepted_messages_per_client{client="Nimbus"}[5m])`

This commit is contained in:
@@ -4,7 +4,7 @@ use beacon_chain::{
|
||||
attestation_verification::Error as AttnError, observed_operations::ObservationOutcome,
|
||||
BeaconChainError, BeaconChainTypes, BlockError, ForkChoiceError,
|
||||
};
|
||||
use eth2_libp2p::{MessageAcceptance, MessageId, PeerAction, PeerId};
|
||||
use eth2_libp2p::{MessageAcceptance, MessageId, PeerAction, PeerId, ReportSource};
|
||||
use slog::{debug, error, info, trace, warn};
|
||||
use ssz::Encode;
|
||||
use types::{
|
||||
@@ -18,8 +18,12 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
/* Auxiliary functions */
|
||||
|
||||
/// Penalizes a peer for misbehaviour.
|
||||
fn penalize_peer(&self, peer_id: PeerId, action: PeerAction) {
|
||||
self.send_network_message(NetworkMessage::ReportPeer { peer_id, action })
|
||||
fn gossip_penalize_peer(&self, peer_id: PeerId, action: PeerAction) {
|
||||
self.send_network_message(NetworkMessage::ReportPeer {
|
||||
peer_id,
|
||||
action,
|
||||
source: ReportSource::Gossipsub,
|
||||
})
|
||||
}
|
||||
|
||||
/// Send a message on `message_tx` that the `message_id` sent by `peer_id` should be propagated on
|
||||
@@ -235,7 +239,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
debug!(self.log, "Could not verify block for gossip, ignoring the block";
|
||||
"error" => %e);
|
||||
// Prevent recurring behaviour by penalizing the peer slightly.
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||
return;
|
||||
}
|
||||
@@ -259,7 +263,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id, PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError);
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -312,7 +316,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
"block root" => %block.canonical_root(),
|
||||
"block slot" => block.slot()
|
||||
);
|
||||
self.penalize_peer(peer_id, PeerAction::MidToleranceError);
|
||||
self.gossip_penalize_peer(peer_id, PeerAction::MidToleranceError);
|
||||
trace!(
|
||||
self.log,
|
||||
"Invalid gossip beacon block ssz";
|
||||
@@ -362,7 +366,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
MessageAcceptance::Ignore,
|
||||
);
|
||||
// We still penalize a peer slightly to prevent overuse of invalids.
|
||||
self.penalize_peer(peer_id, PeerAction::HighToleranceError);
|
||||
self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError);
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -419,7 +423,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
);
|
||||
|
||||
// Penalize peer slightly for invalids.
|
||||
self.penalize_peer(peer_id, PeerAction::HighToleranceError);
|
||||
self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError);
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -468,7 +472,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
MessageAcceptance::Ignore,
|
||||
);
|
||||
// Penalize peer slightly for invalids.
|
||||
self.penalize_peer(peer_id, PeerAction::HighToleranceError);
|
||||
self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError);
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -518,7 +522,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
|
||||
// Peers that are slow or not to spec can spam us with these messages draining our
|
||||
// bandwidth. We therefore penalize these peers when they do this.
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
|
||||
// Do not propagate these messages.
|
||||
self.propagate_validation_result(
|
||||
@@ -538,7 +542,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::EmptyAggregationBitfield => {
|
||||
/*
|
||||
@@ -553,7 +557,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::AggregatorPubkeyUnknown(_) => {
|
||||
/*
|
||||
@@ -574,7 +578,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::AggregatorNotInCommittee { .. } => {
|
||||
/*
|
||||
@@ -595,7 +599,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::AttestationAlreadyKnown { .. } => {
|
||||
/*
|
||||
@@ -630,7 +634,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
);
|
||||
// We still penalize the peer slightly. We don't want this to be a recurring
|
||||
// behaviour.
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||
|
||||
@@ -651,7 +655,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
);
|
||||
// We still penalize the peer slightly. We don't want this to be a recurring
|
||||
// behaviour.
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||
|
||||
@@ -669,7 +673,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::UnknownHeadBlock { beacon_block_root } => {
|
||||
// Note: its a little bit unclear as to whether or not this block is unknown or
|
||||
@@ -699,7 +703,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
});
|
||||
// We still penalize the peer slightly. We don't want this to be a recurring
|
||||
// behaviour.
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||
return;
|
||||
@@ -726,7 +730,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::BadTargetEpoch => {
|
||||
/*
|
||||
@@ -740,7 +744,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::NoCommitteeForSlotAndIndex { .. } => {
|
||||
/*
|
||||
@@ -753,7 +757,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::NotExactlyOneAggregationBitSet(_) => {
|
||||
/*
|
||||
@@ -766,7 +770,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::AttestsToFutureBlock { .. } => {
|
||||
/*
|
||||
@@ -779,7 +783,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
|
||||
AttnError::InvalidSubnetId { received, expected } => {
|
||||
@@ -797,7 +801,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::Invalid(_) => {
|
||||
/*
|
||||
@@ -810,7 +814,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::InvalidTargetEpoch { .. } => {
|
||||
/*
|
||||
@@ -823,7 +827,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::InvalidTargetRoot { .. } => {
|
||||
/*
|
||||
@@ -836,7 +840,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::LowToleranceError);
|
||||
}
|
||||
AttnError::TooManySkippedSlots {
|
||||
head_block_slot,
|
||||
@@ -860,7 +864,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
peer_id.clone(),
|
||||
MessageAcceptance::Reject,
|
||||
);
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::MidToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::MidToleranceError);
|
||||
}
|
||||
AttnError::BeaconChainError(e) => {
|
||||
/*
|
||||
@@ -882,7 +886,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
MessageAcceptance::Ignore,
|
||||
);
|
||||
// Penalize the peer slightly
|
||||
self.penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
self.gossip_penalize_peer(peer_id.clone(), PeerAction::HighToleranceError);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::sync::SyncMessage;
|
||||
use beacon_chain::{BeaconChainError, BeaconChainTypes};
|
||||
use eth2_libp2p::rpc::StatusMessage;
|
||||
use eth2_libp2p::rpc::*;
|
||||
use eth2_libp2p::{PeerId, PeerRequestId, Response, SyncInfo};
|
||||
use eth2_libp2p::{PeerId, PeerRequestId, ReportSource, Response, SyncInfo};
|
||||
use itertools::process_results;
|
||||
use slog::{debug, error, warn};
|
||||
use slot_clock::SlotClock;
|
||||
@@ -18,7 +18,11 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
|
||||
/// Disconnects and ban's a peer, sending a Goodbye request with the associated reason.
|
||||
pub fn goodbye_peer(&self, peer_id: PeerId, reason: GoodbyeReason) {
|
||||
self.send_network_message(NetworkMessage::GoodbyePeer { peer_id, reason });
|
||||
self.send_network_message(NetworkMessage::GoodbyePeer {
|
||||
peer_id,
|
||||
reason,
|
||||
source: ReportSource::Processor,
|
||||
});
|
||||
}
|
||||
|
||||
pub fn send_response(
|
||||
|
||||
Reference in New Issue
Block a user