Files
lighthouse/beacon_node/lighthouse_network/src/rpc/self_limiter.rs
Age Manning d6cd049a45 RPC RequestId Cleanup (#7238)
I've been working at updating another library to latest Lighthouse and got very confused with RPC request Ids.

There were types that had fields called `request_id` and `id`. And interchangeably could have types `PeerRequestId`, `rpc::RequestId`, `AppRequestId`, `api_types::RequestId` or even `Request.id`.

I couldn't keep track of which Id was linked to what and what each type meant.

So this PR mainly does a few things:
- Changes the field naming to match the actual type. So any field that has an  `AppRequestId` will be named `app_request_id` rather than `id` or `request_id` for example.
- I simplified the types. I removed the two different `RequestId` types (one in Lighthouse_network the other in the rpc) and grouped them into one. It has one downside tho. I had to add a few unreachable lines of code in the beacon processor, which the extra type would prevent, but I feel like it might be worth it. Happy to add an extra type to avoid those few lines.
- I also removed the concept of `PeerRequestId` which sometimes went alongside a `request_id`. There were times were had a `PeerRequest` and a `Request` being returned, both of which contain a `RequestId` so we had redundant information. I've simplified the logic by removing `PeerRequestId` and made a `ResponseId`. I think if you look at the code changes, it simplifies things a bit and removes the redundant extra info.

I think with this PR things are a little bit easier to reasonable about what is going on with all these RPC Ids.

NOTE: I did this with the help of AI, so probably should be checked
2025-04-03 10:10:15 +00:00

294 lines
12 KiB
Rust

use std::{
collections::{hash_map::Entry, HashMap, VecDeque},
sync::Arc,
task::{Context, Poll},
time::Duration,
};
use futures::FutureExt;
use libp2p::{swarm::NotifyHandler, PeerId};
use logging::crit;
use smallvec::SmallVec;
use tokio_util::time::DelayQueue;
use tracing::debug;
use types::{EthSpec, ForkContext};
use super::{
config::OutboundRateLimiterConfig,
rate_limiter::{RPCRateLimiter as RateLimiter, RateLimitedErr},
BehaviourAction, Protocol, RPCSend, ReqId, RequestType,
};
/// A request that was rate limited or waiting on rate limited requests for the same peer and
/// protocol.
struct QueuedRequest<Id: ReqId, E: EthSpec> {
req: RequestType<E>,
request_id: Id,
}
pub(crate) struct SelfRateLimiter<Id: ReqId, E: EthSpec> {
/// Requests queued for sending per peer. This requests are stored when the self rate
/// limiter rejects them. Rate limiting is based on a Peer and Protocol basis, therefore
/// are stored in the same way.
delayed_requests: HashMap<(PeerId, Protocol), VecDeque<QueuedRequest<Id, E>>>,
/// The delay required to allow a peer's outbound request per protocol.
next_peer_request: DelayQueue<(PeerId, Protocol)>,
/// Rate limiter for our own requests.
limiter: RateLimiter,
/// Requests that are ready to be sent.
ready_requests: SmallVec<[(PeerId, RPCSend<Id, E>); 3]>,
}
/// Error returned when the rate limiter does not accept a request.
// NOTE: this is currently not used, but might be useful for debugging.
pub enum Error {
/// There are queued requests for this same peer and protocol.
PendingRequests,
/// Request was tried but rate limited.
RateLimited,
}
impl<Id: ReqId, E: EthSpec> SelfRateLimiter<Id, E> {
/// Creates a new [`SelfRateLimiter`] based on configration values.
pub fn new(
config: OutboundRateLimiterConfig,
fork_context: Arc<ForkContext>,
) -> Result<Self, &'static str> {
debug!(?config, "Using self rate limiting params");
let limiter = RateLimiter::new_with_config(config.0, fork_context)?;
Ok(SelfRateLimiter {
delayed_requests: Default::default(),
next_peer_request: Default::default(),
limiter,
ready_requests: Default::default(),
})
}
/// Checks if the rate limiter allows the request. If it's allowed, returns the
/// [`ToSwarm`] that should be emitted. When not allowed, the request is delayed
/// until it can be sent.
pub fn allows(
&mut self,
peer_id: PeerId,
request_id: Id,
req: RequestType<E>,
) -> Result<RPCSend<Id, E>, Error> {
let protocol = req.versioned_protocol().protocol();
// First check that there are not already other requests waiting to be sent.
if let Some(queued_requests) = self.delayed_requests.get_mut(&(peer_id, protocol)) {
queued_requests.push_back(QueuedRequest { req, request_id });
return Err(Error::PendingRequests);
}
match Self::try_send_request(&mut self.limiter, peer_id, request_id, req) {
Err((rate_limited_req, wait_time)) => {
let key = (peer_id, protocol);
self.next_peer_request.insert(key, wait_time);
self.delayed_requests
.entry(key)
.or_default()
.push_back(rate_limited_req);
Err(Error::RateLimited)
}
Ok(event) => Ok(event),
}
}
/// Auxiliary function to deal with self rate limiting outcomes. If the rate limiter allows the
/// request, the [`ToSwarm`] that should be emitted is returned. If the request
/// should be delayed, it's returned with the duration to wait.
fn try_send_request(
limiter: &mut RateLimiter,
peer_id: PeerId,
request_id: Id,
req: RequestType<E>,
) -> Result<RPCSend<Id, E>, (QueuedRequest<Id, E>, Duration)> {
match limiter.allows(&peer_id, &req) {
Ok(()) => Ok(RPCSend::Request(request_id, req)),
Err(e) => {
let protocol = req.versioned_protocol();
match e {
RateLimitedErr::TooLarge => {
// this should never happen with default parameters. Let's just send the request.
// Log a crit since this is a config issue.
crit!(
protocol = %req.versioned_protocol().protocol(),
"Self rate limiting error for a batch that will never fit. Sending request anyway. Check configuration parameters."
);
Ok(RPCSend::Request(request_id, req))
}
RateLimitedErr::TooSoon(wait_time) => {
debug!(protocol = %protocol.protocol(), wait_time_ms = wait_time.as_millis(), %peer_id, "Self rate limiting");
Err((QueuedRequest { req, request_id }, wait_time))
}
}
}
}
}
/// When a peer and protocol are allowed to send a next request, this function checks the
/// queued requests and attempts marking as ready as many as the limiter allows.
fn next_peer_request_ready(&mut self, peer_id: PeerId, protocol: Protocol) {
if let Entry::Occupied(mut entry) = self.delayed_requests.entry((peer_id, protocol)) {
let queued_requests = entry.get_mut();
while let Some(QueuedRequest { req, request_id }) = queued_requests.pop_front() {
match Self::try_send_request(&mut self.limiter, peer_id, request_id, req) {
Err((rate_limited_req, wait_time)) => {
let key = (peer_id, protocol);
self.next_peer_request.insert(key, wait_time);
queued_requests.push_front(rate_limited_req);
// If one fails just wait for the next window that allows sending requests.
return;
}
Ok(event) => self.ready_requests.push((peer_id, event)),
}
}
if queued_requests.is_empty() {
entry.remove();
}
}
// NOTE: There can be entries that have been removed due to peer disconnections, we simply
// ignore these messages here.
}
/// Informs the limiter that a peer has disconnected. This removes any pending requests and
/// returns their IDs.
pub fn peer_disconnected(&mut self, peer_id: PeerId) -> Vec<(Id, Protocol)> {
// It's not ideal to iterate this map, but the key is (PeerId, Protocol) and this map
// should never really be large. So we iterate for simplicity
let mut failed_requests = Vec::new();
self.delayed_requests
.retain(|(map_peer_id, protocol), queue| {
if map_peer_id == &peer_id {
// NOTE: Currently cannot remove entries from the DelayQueue, we will just let
// them expire and ignore them.
for message in queue {
failed_requests.push((message.request_id, *protocol))
}
// Remove the entry
false
} else {
// Keep the entry
true
}
});
failed_requests
}
pub fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<BehaviourAction<Id, E>> {
// First check the requests that were self rate limited, since those might add events to
// the queue. Also do this this before rate limiter prunning to avoid removing and
// immediately adding rate limiting keys.
if let Poll::Ready(Some(expired)) = self.next_peer_request.poll_expired(cx) {
let (peer_id, protocol) = expired.into_inner();
self.next_peer_request_ready(peer_id, protocol);
}
// Prune the rate limiter.
let _ = self.limiter.poll_unpin(cx);
// Finally return any queued events.
if let Some((peer_id, event)) = self.ready_requests.pop() {
return Poll::Ready(BehaviourAction::NotifyHandler {
peer_id,
handler: NotifyHandler::Any,
event,
});
}
Poll::Pending
}
}
#[cfg(test)]
mod tests {
use crate::rpc::config::{OutboundRateLimiterConfig, RateLimiterConfig};
use crate::rpc::rate_limiter::Quota;
use crate::rpc::self_limiter::SelfRateLimiter;
use crate::rpc::{Ping, Protocol, RequestType};
use crate::service::api_types::{AppRequestId, SingleLookupReqId, SyncRequestId};
use libp2p::PeerId;
use logging::create_test_tracing_subscriber;
use std::time::Duration;
use types::{EthSpec, ForkContext, Hash256, MainnetEthSpec, Slot};
/// Test that `next_peer_request_ready` correctly maintains the queue.
#[tokio::test]
async fn test_next_peer_request_ready() {
create_test_tracing_subscriber();
let config = OutboundRateLimiterConfig(RateLimiterConfig {
ping_quota: Quota::n_every(1, 2),
..Default::default()
});
let fork_context = std::sync::Arc::new(ForkContext::new::<MainnetEthSpec>(
Slot::new(0),
Hash256::ZERO,
&MainnetEthSpec::default_spec(),
));
let mut limiter: SelfRateLimiter<AppRequestId, MainnetEthSpec> =
SelfRateLimiter::new(config, fork_context).unwrap();
let peer_id = PeerId::random();
let lookup_id = 0;
for i in 1..=5u32 {
let _ = limiter.allows(
peer_id,
AppRequestId::Sync(SyncRequestId::SingleBlock {
id: SingleLookupReqId {
lookup_id,
req_id: i,
},
}),
RequestType::Ping(Ping { data: i as u64 }),
);
}
{
let queue = limiter
.delayed_requests
.get(&(peer_id, Protocol::Ping))
.unwrap();
assert_eq!(4, queue.len());
// Check that requests in the queue are ordered in the sequence 2, 3, 4, 5.
let mut iter = queue.iter();
for i in 2..=5u32 {
assert!(matches!(
iter.next().unwrap().request_id,
AppRequestId::Sync(SyncRequestId::SingleBlock {
id: SingleLookupReqId { req_id, .. },
}) if req_id == i,
));
}
assert_eq!(limiter.ready_requests.len(), 0);
}
// Wait until the tokens have been regenerated, then run `next_peer_request_ready`.
tokio::time::sleep(Duration::from_secs(3)).await;
limiter.next_peer_request_ready(peer_id, Protocol::Ping);
{
let queue = limiter
.delayed_requests
.get(&(peer_id, Protocol::Ping))
.unwrap();
assert_eq!(3, queue.len());
// Check that requests in the queue are ordered in the sequence 3, 4, 5.
let mut iter = queue.iter();
for i in 3..=5 {
assert!(matches!(
iter.next().unwrap().request_id,
AppRequestId::Sync(SyncRequestId::SingleBlock {
id: SingleLookupReqId { req_id, .. },
}) if req_id == i,
));
}
assert_eq!(limiter.ready_requests.len(), 1);
}
}
}