RPC RequestId Cleanup (#7238)

I've been working at updating another library to latest Lighthouse and got very confused with RPC request Ids.

There were types that had fields called `request_id` and `id`. And interchangeably could have types `PeerRequestId`, `rpc::RequestId`, `AppRequestId`, `api_types::RequestId` or even `Request.id`.

I couldn't keep track of which Id was linked to what and what each type meant.

So this PR mainly does a few things:
- Changes the field naming to match the actual type. So any field that has an  `AppRequestId` will be named `app_request_id` rather than `id` or `request_id` for example.
- I simplified the types. I removed the two different `RequestId` types (one in Lighthouse_network the other in the rpc) and grouped them into one. It has one downside tho. I had to add a few unreachable lines of code in the beacon processor, which the extra type would prevent, but I feel like it might be worth it. Happy to add an extra type to avoid those few lines.
- I also removed the concept of `PeerRequestId` which sometimes went alongside a `request_id`. There were times were had a `PeerRequest` and a `Request` being returned, both of which contain a `RequestId` so we had redundant information. I've simplified the logic by removing `PeerRequestId` and made a `ResponseId`. I think if you look at the code changes, it simplifies things a bit and removes the redundant extra info.

I think with this PR things are a little bit easier to reasonable about what is going on with all these RPC Ids.

NOTE: I did this with the help of AI, so probably should be checked
This commit is contained in:
Age Manning
2025-04-03 21:10:15 +11:00
committed by GitHub
parent 80626e58d2
commit d6cd049a45
16 changed files with 438 additions and 739 deletions

View File

@@ -4,8 +4,7 @@
use super::methods::{GoodbyeReason, RpcErrorResponse, RpcResponse};
use super::outbound::OutboundRequestContainer;
use super::protocol::{InboundOutput, Protocol, RPCError, RPCProtocol, RequestType};
use super::RequestId;
use super::{RPCReceived, RPCSend, ReqId, Request};
use super::{RPCReceived, RPCSend, ReqId};
use crate::rpc::outbound::OutboundFramed;
use crate::rpc::protocol::InboundFramed;
use fnv::FnvHashMap;
@@ -91,6 +90,11 @@ pub struct RPCHandler<Id, E>
where
E: EthSpec,
{
/// The PeerId matching this `ConnectionHandler`.
peer_id: PeerId,
/// The ConnectionId matching this `ConnectionHandler`.
connection_id: ConnectionId,
/// The upgrade for inbound substreams.
listen_protocol: SubstreamProtocol<RPCProtocol<E>, ()>,
@@ -139,9 +143,6 @@ where
/// Timeout that will me used for inbound and outbound responses.
resp_timeout: Duration,
/// Information about this handler for logging purposes.
log_info: (PeerId, ConnectionId),
}
enum HandlerState {
@@ -228,6 +229,8 @@ where
connection_id: ConnectionId,
) -> Self {
RPCHandler {
connection_id,
peer_id,
listen_protocol,
events_out: SmallVec::new(),
dial_queue: SmallVec::new(),
@@ -244,7 +247,6 @@ where
fork_context,
waker: None,
resp_timeout,
log_info: (peer_id, connection_id),
}
}
@@ -255,8 +257,8 @@ where
if !self.dial_queue.is_empty() {
debug!(
unsent_queued_requests = self.dial_queue.len(),
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
"Starting handler shutdown"
);
}
@@ -306,8 +308,8 @@ where
if !matches!(response, RpcResponse::StreamTermination(..)) {
// the stream is closed after sending the expected number of responses
trace!(%response, id = ?inbound_id,
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
"Inbound stream has expired. Response not sent");
}
return;
@@ -324,8 +326,8 @@ where
if matches!(self.state, HandlerState::Deactivated) {
// we no longer send responses after the handler is deactivated
debug!(%response, id = ?inbound_id,
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
"Response not sent. Deactivated handler");
return;
}
@@ -394,8 +396,8 @@ where
Poll::Ready(_) => {
self.state = HandlerState::Deactivated;
debug!(
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
"Shutdown timeout elapsed, Handler deactivated"
);
return Poll::Ready(ConnectionHandlerEvent::NotifyBehaviour(
@@ -445,8 +447,8 @@ where
)));
} else {
crit!(
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
stream_id = ?outbound_id.get_ref(), "timed out substream not in the books");
}
}
@@ -577,8 +579,8 @@ where
// Its useful to log when the request was completed.
if matches!(info.protocol, Protocol::BlocksByRange) {
debug!(
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
duration = Instant::now()
.duration_since(info.request_start_time)
.as_secs(),
@@ -587,8 +589,8 @@ where
}
if matches!(info.protocol, Protocol::BlobsByRange) {
debug!(
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
duration = Instant::now()
.duration_since(info.request_start_time)
.as_secs(),
@@ -617,16 +619,16 @@ where
if matches!(info.protocol, Protocol::BlocksByRange) {
debug!(
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
duration = info.request_start_time.elapsed().as_secs(),
"BlocksByRange Response failed"
);
}
if matches!(info.protocol, Protocol::BlobsByRange) {
debug!(
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
duration = info.request_start_time.elapsed().as_secs(),
"BlobsByRange Response failed"
);
@@ -816,8 +818,8 @@ where
}
OutboundSubstreamState::Poisoned => {
crit!(
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
"Poisoned outbound substream"
);
unreachable!("Coding Error: Outbound substream is poisoned")
@@ -852,8 +854,8 @@ where
&& self.dial_negotiated == 0
{
debug!(
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
"Goodbye sent, Handler deactivated"
);
self.state = HandlerState::Deactivated;
@@ -986,12 +988,13 @@ where
self.shutdown(None);
}
self.events_out
.push(HandlerEvent::Ok(RPCReceived::Request(Request {
id: RequestId::next(),
self.events_out.push(HandlerEvent::Ok(RPCReceived::Request(
super::InboundRequestId {
connection_id: self.connection_id,
substream_id: self.current_inbound_substream_id,
r#type: req,
})));
},
req,
)));
self.current_inbound_substream_id.0 += 1;
}
@@ -1049,9 +1052,8 @@ where
.is_some()
{
crit!(
peer_id = %self.log_info.0,
connection_id = %self.log_info.1,
peer_id = %self.peer_id,
connection_id = %self.connection_id,
id = ?self.current_outbound_substream_id, "Duplicate outbound substream id");
}
self.current_outbound_substream_id.0 += 1;

View File

@@ -16,7 +16,6 @@ use libp2p::PeerId;
use logging::crit;
use rate_limiter::{RPCRateLimiter as RateLimiter, RateLimitedErr};
use std::marker::PhantomData;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Duration;
@@ -49,8 +48,6 @@ mod protocol;
mod rate_limiter;
mod self_limiter;
static NEXT_REQUEST_ID: AtomicUsize = AtomicUsize::new(1);
/// Composite trait for a request id.
pub trait ReqId: Send + 'static + std::fmt::Debug + Copy + Clone {}
impl<T> ReqId for T where T: Send + 'static + std::fmt::Debug + Copy + Clone {}
@@ -80,7 +77,7 @@ pub enum RPCReceived<Id, E: EthSpec> {
///
/// The `SubstreamId` is given by the `RPCHandler` as it identifies this request with the
/// *inbound* substream over which it is managed.
Request(Request<E>),
Request(InboundRequestId, RequestType<E>),
/// A response received from the outside.
///
/// The `Id` corresponds to the application given ID of the original request sent to the
@@ -91,35 +88,30 @@ pub enum RPCReceived<Id, E: EthSpec> {
EndOfStream(Id, ResponseTermination),
}
/// Rpc `Request` identifier.
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct RequestId(usize);
// An identifier for the inbound requests received via Rpc.
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
pub struct InboundRequestId {
/// The connection ID of the peer that sent the request.
connection_id: ConnectionId,
/// The ID of the substream that sent the request.
substream_id: SubstreamId,
}
impl RequestId {
/// Returns the next available [`RequestId`].
pub fn next() -> Self {
Self(NEXT_REQUEST_ID.fetch_add(1, Ordering::SeqCst))
}
/// Creates an _unchecked_ [`RequestId`].
impl InboundRequestId {
/// Creates an _unchecked_ [`InboundRequestId`].
///
/// [`Rpc`] enforces that [`RequestId`]s are unique and not reused.
/// [`Rpc`] enforces that [`InboundRequestId`]s are unique and not reused.
/// This constructor does not, hence the _unchecked_.
///
/// It is primarily meant for allowing manual tests.
pub fn new_unchecked(id: usize) -> Self {
Self(id)
pub fn new_unchecked(connection_id: usize, substream_id: usize) -> Self {
Self {
connection_id: ConnectionId::new_unchecked(connection_id),
substream_id: SubstreamId::new(substream_id),
}
}
}
/// An Rpc Request.
#[derive(Debug, Clone)]
pub struct Request<E: EthSpec> {
pub id: RequestId,
pub substream_id: SubstreamId,
pub r#type: RequestType<E>,
}
impl<E: EthSpec, Id: std::fmt::Debug> std::fmt::Display for RPCSend<Id, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@@ -136,7 +128,7 @@ pub struct RPCMessage<Id, E: EthSpec> {
/// The peer that sent the message.
pub peer_id: PeerId,
/// Handler managing this message.
pub conn_id: ConnectionId,
pub connection_id: ConnectionId,
/// The message that was sent.
pub message: Result<RPCReceived<Id, E>, HandlerErr<Id>>,
}
@@ -215,14 +207,13 @@ impl<Id: ReqId, E: EthSpec> RPC<Id, E> {
pub fn send_response(
&mut self,
peer_id: PeerId,
id: (ConnectionId, SubstreamId),
_request_id: RequestId,
event: RpcResponse<E>,
request_id: InboundRequestId,
response: RpcResponse<E>,
) {
self.events.push(ToSwarm::NotifyHandler {
peer_id,
handler: NotifyHandler::One(id.0),
event: RPCSend::Response(id.1, event),
handler: NotifyHandler::One(request_id.connection_id),
event: RPCSend::Response(request_id.substream_id, response),
});
}
@@ -387,7 +378,7 @@ where
for (id, proto) in limiter.peer_disconnected(peer_id) {
let error_msg = ToSwarm::GenerateEvent(RPCMessage {
peer_id,
conn_id: connection_id,
connection_id,
message: Err(HandlerErr::Outbound {
id,
proto,
@@ -408,7 +399,7 @@ where
} if *p == peer_id => {
*event = ToSwarm::GenerateEvent(RPCMessage {
peer_id,
conn_id: connection_id,
connection_id,
message: Err(HandlerErr::Outbound {
id: *request_id,
proto: req.versioned_protocol().protocol(),
@@ -424,21 +415,17 @@ where
fn on_connection_handler_event(
&mut self,
peer_id: PeerId,
conn_id: ConnectionId,
connection_id: ConnectionId,
event: <Self::ConnectionHandler as ConnectionHandler>::ToBehaviour,
) {
match event {
HandlerEvent::Ok(RPCReceived::Request(Request {
id,
substream_id,
r#type,
})) => {
HandlerEvent::Ok(RPCReceived::Request(request_id, request_type)) => {
if let Some(limiter) = self.limiter.as_mut() {
// check if the request is conformant to the quota
match limiter.allows(&peer_id, &r#type) {
match limiter.allows(&peer_id, &request_type) {
Err(RateLimitedErr::TooLarge) => {
// we set the batch sizes, so this is a coding/config err for most protocols
let protocol = r#type.versioned_protocol().protocol();
let protocol = request_type.versioned_protocol().protocol();
if matches!(
protocol,
Protocol::BlocksByRange
@@ -448,7 +435,7 @@ where
| Protocol::BlobsByRoot
| Protocol::DataColumnsByRoot
) {
debug!(request = %r#type, %protocol, "Request too large to process");
debug!(request = %request_type, %protocol, "Request too large to process");
} else {
// Other protocols shouldn't be sending large messages, we should flag the peer kind
crit!(%protocol, "Request size too large to ever be processed");
@@ -457,8 +444,7 @@ where
// the handler upon receiving the error code will send it back to the behaviour
self.send_response(
peer_id,
(conn_id, substream_id),
id,
request_id,
RpcResponse::Error(
RpcErrorResponse::RateLimited,
"Rate limited. Request too large".into(),
@@ -467,13 +453,12 @@ where
return;
}
Err(RateLimitedErr::TooSoon(wait_time)) => {
debug!(request = %r#type, %peer_id, wait_time_ms = wait_time.as_millis(), "Request exceeds the rate limit");
debug!(request = %request_type, %peer_id, wait_time_ms = wait_time.as_millis(), "Request exceeds the rate limit");
// send an error code to the peer.
// the handler upon receiving the error code will send it back to the behaviour
self.send_response(
peer_id,
(conn_id, substream_id),
id,
request_id,
RpcResponse::Error(
RpcErrorResponse::RateLimited,
format!("Wait {:?}", wait_time).into(),
@@ -487,12 +472,11 @@ where
}
// If we received a Ping, we queue a Pong response.
if let RequestType::Ping(_) = r#type {
trace!(connection_id = %conn_id, %peer_id, "Received Ping, queueing Pong");
if let RequestType::Ping(_) = request_type {
trace!(connection_id = %connection_id, %peer_id, "Received Ping, queueing Pong");
self.send_response(
peer_id,
(conn_id, substream_id),
id,
request_id,
RpcResponse::Success(RpcSuccessResponse::Pong(Ping {
data: self.seq_number,
})),
@@ -501,25 +485,21 @@ where
self.events.push(ToSwarm::GenerateEvent(RPCMessage {
peer_id,
conn_id,
message: Ok(RPCReceived::Request(Request {
id,
substream_id,
r#type,
})),
connection_id,
message: Ok(RPCReceived::Request(request_id, request_type)),
}));
}
HandlerEvent::Ok(rpc) => {
self.events.push(ToSwarm::GenerateEvent(RPCMessage {
peer_id,
conn_id,
connection_id,
message: Ok(rpc),
}));
}
HandlerEvent::Err(err) => {
self.events.push(ToSwarm::GenerateEvent(RPCMessage {
peer_id,
conn_id,
connection_id,
message: Err(err),
}));
}

View File

@@ -207,7 +207,7 @@ mod tests {
use crate::rpc::rate_limiter::Quota;
use crate::rpc::self_limiter::SelfRateLimiter;
use crate::rpc::{Ping, Protocol, RequestType};
use crate::service::api_types::{AppRequestId, RequestId, SingleLookupReqId, SyncRequestId};
use crate::service::api_types::{AppRequestId, SingleLookupReqId, SyncRequestId};
use libp2p::PeerId;
use logging::create_test_tracing_subscriber;
use std::time::Duration;
@@ -226,7 +226,7 @@ mod tests {
Hash256::ZERO,
&MainnetEthSpec::default_spec(),
));
let mut limiter: SelfRateLimiter<RequestId, MainnetEthSpec> =
let mut limiter: SelfRateLimiter<AppRequestId, MainnetEthSpec> =
SelfRateLimiter::new(config, fork_context).unwrap();
let peer_id = PeerId::random();
let lookup_id = 0;
@@ -234,12 +234,12 @@ mod tests {
for i in 1..=5u32 {
let _ = limiter.allows(
peer_id,
RequestId::Application(AppRequestId::Sync(SyncRequestId::SingleBlock {
AppRequestId::Sync(SyncRequestId::SingleBlock {
id: SingleLookupReqId {
lookup_id,
req_id: i,
},
})),
}),
RequestType::Ping(Ping { data: i as u64 }),
);
}
@@ -256,9 +256,9 @@ mod tests {
for i in 2..=5u32 {
assert!(matches!(
iter.next().unwrap().request_id,
RequestId::Application(AppRequestId::Sync(SyncRequestId::SingleBlock {
AppRequestId::Sync(SyncRequestId::SingleBlock {
id: SingleLookupReqId { req_id, .. },
})) if req_id == i,
}) if req_id == i,
));
}
@@ -281,9 +281,9 @@ mod tests {
for i in 3..=5 {
assert!(matches!(
iter.next().unwrap().request_id,
RequestId::Application(AppRequestId::Sync(SyncRequestId::SingleBlock {
AppRequestId::Sync(SyncRequestId::SingleBlock {
id: SingleLookupReqId { req_id, .. },
})) if req_id == i,
}) if req_id == i,
));
}