mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-09 11:41:51 +00:00
closes https://github.com/sigp/lighthouse/issues/5785 The diagram below shows the differences in how the receiver (responder) behaves before and after this PR. The following sentences will detail the changes. ```mermaid flowchart TD subgraph "*** After ***" Start2([START]) --> AA[Receive request] AA --> COND1{Is there already an active request <br> with the same protocol?} COND1 --> |Yes| CC[Send error response] CC --> End2([END]) %% COND1 --> |No| COND2{Request is too large?} %% COND2 --> |Yes| CC COND1 --> |No| DD[Process request] DD --> EE{Rate limit reached?} EE --> |Yes| FF[Wait until tokens are regenerated] FF --> EE EE --> |No| GG[Send response] GG --> End2 end subgraph "*** Before ***" Start([START]) --> A[Receive request] A --> B{Rate limit reached <br> or <br> request is too large?} B -->|Yes| C[Send error response] C --> End([END]) B -->|No| E[Process request] E --> F[Send response] F --> End end ``` ### `Is there already an active request with the same protocol?` This check is not performed in `Before`. This is taken from the PR in the consensus-spec, which proposes updates regarding rate limiting and response timeout. https://github.com/ethereum/consensus-specs/pull/3767/files > The requester MUST NOT make more than two concurrent requests with the same ID. The PR mentions the requester side. In this PR, I introduced the `ActiveRequestsLimiter` for the `responder` side to restrict more than two requests from running simultaneously on the same protocol per peer. If the limiter disallows a request, the responder sends a rate-limited error and penalizes the requester. ### `Rate limit reached?` and `Wait until tokens are regenerated` UPDATE: I moved the limiter logic to the behaviour side. https://github.com/sigp/lighthouse/pull/5923#issuecomment-2379535927 ~~The rate limiter is shared between the behaviour and the handler. (`Arc<Mutex<RateLimiter>>>`) The handler checks the rate limit and queues the response if the limit is reached. The behaviour handles pruning.~~ ~~I considered not sharing the rate limiter between the behaviour and the handler, and performing all of these either within the behaviour or handler. However, I decided against this for the following reasons:~~ - ~~Regarding performing everything within the behaviour: The behaviour is unable to recognize the response protocol when `RPC::send_response()` is called, especially when the response is `RPCCodedResponse::Error`. Therefore, the behaviour can't rate limit responses based on the response protocol.~~ - ~~Regarding performing everything within the handler: When multiple connections are established with a peer, there could be multiple handlers interacting with that peer. Thus, we cannot enforce rate limiting per peer solely within the handler. (Any ideas? 🤔 )~~
233 lines
8.1 KiB
Rust
233 lines
8.1 KiB
Rust
pub use metrics::*;
|
|
use std::sync::LazyLock;
|
|
|
|
pub static NAT_OPEN: LazyLock<Result<IntGaugeVec>> = LazyLock::new(|| {
|
|
try_create_int_gauge_vec(
|
|
"nat_open",
|
|
"An estimate indicating if the local node is reachable from external nodes",
|
|
&["protocol"],
|
|
)
|
|
});
|
|
|
|
pub static ADDRESS_UPDATE_COUNT: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
|
try_create_int_counter(
|
|
"libp2p_address_update_total",
|
|
"Count of libp2p socked updated events (when our view of our IP address has changed)",
|
|
)
|
|
});
|
|
pub static PEERS_CONNECTED: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
|
|
try_create_int_gauge("libp2p_peers", "Count of libp2p peers currently connected")
|
|
});
|
|
|
|
pub static PEERS_CONNECTED_MULTI: LazyLock<Result<IntGaugeVec>> = LazyLock::new(|| {
|
|
try_create_int_gauge_vec(
|
|
"libp2p_peers_multi",
|
|
"Count of libp2p peers currently connected",
|
|
&["direction", "transport"],
|
|
)
|
|
});
|
|
|
|
pub static TCP_PEERS_CONNECTED: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
|
|
try_create_int_gauge(
|
|
"libp2p_tcp_peers",
|
|
"Count of libp2p peers currently connected via TCP",
|
|
)
|
|
});
|
|
|
|
pub static QUIC_PEERS_CONNECTED: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
|
|
try_create_int_gauge(
|
|
"libp2p_quic_peers",
|
|
"Count of libp2p peers currently connected via QUIC",
|
|
)
|
|
});
|
|
|
|
pub static PEER_CONNECT_EVENT_COUNT: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
|
try_create_int_counter(
|
|
"libp2p_peer_connect_event_total",
|
|
"Count of libp2p peer connect events (not the current number of connected peers)",
|
|
)
|
|
});
|
|
pub static PEER_DISCONNECT_EVENT_COUNT: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
|
try_create_int_counter(
|
|
"libp2p_peer_disconnect_event_total",
|
|
"Count of libp2p peer disconnect events",
|
|
)
|
|
});
|
|
pub static DISCOVERY_BYTES: LazyLock<Result<IntGaugeVec>> = LazyLock::new(|| {
|
|
try_create_int_gauge_vec(
|
|
"discovery_bytes",
|
|
"The number of bytes sent and received in discovery",
|
|
&["direction"],
|
|
)
|
|
});
|
|
pub static DISCOVERY_QUEUE: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
|
|
try_create_int_gauge(
|
|
"discovery_queue_size",
|
|
"The number of discovery queries awaiting execution",
|
|
)
|
|
});
|
|
pub static DISCOVERY_REQS: LazyLock<Result<Gauge>> = LazyLock::new(|| {
|
|
try_create_float_gauge(
|
|
"discovery_requests",
|
|
"The number of unsolicited discovery requests per second",
|
|
)
|
|
});
|
|
pub static DISCOVERY_SESSIONS: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
|
|
try_create_int_gauge(
|
|
"discovery_sessions",
|
|
"The number of active discovery sessions with peers",
|
|
)
|
|
});
|
|
pub static DISCOVERY_NO_USEFUL_ENRS: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
|
try_create_int_counter(
|
|
"discovery_no_useful_enrs_found",
|
|
"Total number of counts a query returned no useful ENRs to dial",
|
|
)
|
|
});
|
|
|
|
pub static PEERS_PER_CLIENT: LazyLock<Result<IntGaugeVec>> = LazyLock::new(|| {
|
|
try_create_int_gauge_vec(
|
|
"libp2p_peers_per_client",
|
|
"The connected peers via client implementation",
|
|
&["Client"],
|
|
)
|
|
});
|
|
|
|
pub static PEERS_PER_CUSTODY_GROUP_COUNT: LazyLock<Result<IntGaugeVec>> = LazyLock::new(|| {
|
|
try_create_int_gauge_vec(
|
|
"peers_per_custody_group_count",
|
|
"The current count of peers by custody group count",
|
|
&["custody_group_count"],
|
|
)
|
|
});
|
|
|
|
pub static FAILED_ATTESTATION_PUBLISHES_PER_SUBNET: LazyLock<Result<IntGaugeVec>> =
|
|
LazyLock::new(|| {
|
|
try_create_int_gauge_vec(
|
|
"gossipsub_failed_attestation_publishes_per_subnet",
|
|
"Failed attestation publishes per subnet",
|
|
&["subnet"],
|
|
)
|
|
});
|
|
pub static FAILED_PUBLISHES_PER_MAIN_TOPIC: LazyLock<Result<IntGaugeVec>> = LazyLock::new(|| {
|
|
try_create_int_gauge_vec(
|
|
"gossipsub_failed_publishes_per_main_topic",
|
|
"Failed gossip publishes",
|
|
&["topic_hash"],
|
|
)
|
|
});
|
|
pub static TOTAL_RPC_ERRORS_PER_CLIENT: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
|
|
try_create_int_counter_vec(
|
|
"libp2p_rpc_errors_per_client",
|
|
"RPC errors per client",
|
|
&["client", "rpc_error", "direction"],
|
|
)
|
|
});
|
|
pub static TOTAL_RPC_REQUESTS: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
|
|
try_create_int_counter_vec("libp2p_rpc_requests_total", "RPC requests total", &["type"])
|
|
});
|
|
pub static PEER_ACTION_EVENTS_PER_CLIENT: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
|
|
try_create_int_counter_vec(
|
|
"libp2p_peer_actions_per_client",
|
|
"Score reports per client",
|
|
&["client", "action", "source"],
|
|
)
|
|
});
|
|
pub static GOSSIP_UNACCEPTED_MESSAGES_PER_CLIENT: LazyLock<Result<IntCounterVec>> =
|
|
LazyLock::new(|| {
|
|
try_create_int_counter_vec(
|
|
"gossipsub_unaccepted_messages_per_client",
|
|
"Gossipsub messages that we did not accept, per client",
|
|
&["client", "validation_result"],
|
|
)
|
|
});
|
|
pub static GOSSIP_LATE_PUBLISH_PER_TOPIC_KIND: LazyLock<Result<IntCounterVec>> =
|
|
LazyLock::new(|| {
|
|
try_create_int_counter_vec(
|
|
"gossipsub_late_publish_per_topic_kind",
|
|
"Messages published late to gossipsub per topic kind.",
|
|
&["topic_kind"],
|
|
)
|
|
});
|
|
pub static GOSSIP_EXPIRED_LATE_PUBLISH_PER_TOPIC_KIND: LazyLock<Result<IntCounterVec>> =
|
|
LazyLock::new(|| {
|
|
try_create_int_counter_vec(
|
|
"gossipsub_expired_late_publish_per_topic_kind",
|
|
"Messages that expired waiting to be published on retry to gossipsub per topic kind.",
|
|
&["topic_kind"],
|
|
)
|
|
});
|
|
pub static GOSSIP_FAILED_LATE_PUBLISH_PER_TOPIC_KIND: LazyLock<Result<IntCounterVec>> =
|
|
LazyLock::new(|| {
|
|
try_create_int_counter_vec(
|
|
"gossipsub_failed_late_publish_per_topic_kind",
|
|
"Messages that failed to be published on retry to gossipsub per topic kind.",
|
|
&["topic_kind"],
|
|
)
|
|
});
|
|
pub static PEER_SCORE_DISTRIBUTION: LazyLock<Result<IntGaugeVec>> = LazyLock::new(|| {
|
|
try_create_int_gauge_vec(
|
|
"peer_score_distribution",
|
|
"The distribution of connected peer scores",
|
|
&["position"],
|
|
)
|
|
});
|
|
pub static PEER_SCORE_PER_CLIENT: LazyLock<Result<GaugeVec>> = LazyLock::new(|| {
|
|
try_create_float_gauge_vec(
|
|
"peer_score_per_client",
|
|
"Average score per client",
|
|
&["client"],
|
|
)
|
|
});
|
|
|
|
pub static SUBNET_PEERS_FOUND: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
|
|
try_create_int_counter_vec(
|
|
"discovery_query_peers_found",
|
|
"Total number of peers found in attestation subnets and sync subnets",
|
|
&["type"],
|
|
)
|
|
});
|
|
pub static TOTAL_SUBNET_QUERIES: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
|
|
try_create_int_counter_vec(
|
|
"discovery_total_queries",
|
|
"Total number of discovery subnet queries",
|
|
&["type"],
|
|
)
|
|
});
|
|
|
|
/*
|
|
* Peer Reporting
|
|
*/
|
|
pub static REPORT_PEER_MSGS: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
|
|
try_create_int_counter_vec(
|
|
"libp2p_report_peer_msgs_total",
|
|
"Number of peer reports per msg",
|
|
&["msg"],
|
|
)
|
|
});
|
|
|
|
pub static OUTBOUND_REQUEST_IDLING: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
|
try_create_histogram(
|
|
"outbound_request_idling_seconds",
|
|
"The time our own request remained idle in the self-limiter",
|
|
)
|
|
});
|
|
|
|
pub static RESPONSE_IDLING: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
|
try_create_histogram(
|
|
"response_idling_seconds",
|
|
"The time our response remained idle in the response limiter",
|
|
)
|
|
});
|
|
|
|
pub fn scrape_discovery_metrics() {
|
|
let metrics =
|
|
discv5::metrics::Metrics::from(discv5::Discv5::<discv5::DefaultProtocolId>::raw_metrics());
|
|
set_float_gauge(&DISCOVERY_REQS, metrics.unsolicited_requests_per_second);
|
|
set_gauge(&DISCOVERY_SESSIONS, metrics.active_sessions as i64);
|
|
set_gauge_vec(&DISCOVERY_BYTES, &["inbound"], metrics.bytes_recv as i64);
|
|
set_gauge_vec(&DISCOVERY_BYTES, &["outbound"], metrics.bytes_sent as i64);
|
|
set_gauge_vec(&NAT_OPEN, &["discv5_ipv4"], metrics.ipv4_contactable as i64);
|
|
set_gauge_vec(&NAT_OPEN, &["discv5_ipv6"], metrics.ipv6_contactable as i64);
|
|
}
|