Delayed RPC Send Using Tokens (#5923)

closes https://github.com/sigp/lighthouse/issues/5785 The diagram below shows the differences in how the receiver (responder) behaves before and after this PR. The following sentences will detail the changes. ```mermaid flowchart TD subgraph "*** After ***" Start2([START]) --> AA[Receive request] AA --> COND1{Is there already an active request <br> with the same protocol?} COND1 --> |Yes| CC[Send error response] CC --> End2([END]) %% COND1 --> |No| COND2{Request is too large?} %% COND2 --> |Yes| CC COND1 --> |No| DD[Process request] DD --> EE{Rate limit reached?} EE --> |Yes| FF[Wait until tokens are regenerated] FF --> EE EE --> |No| GG[Send response] GG --> End2 end subgraph "*** Before ***" Start([START]) --> A[Receive request] A --> B{Rate limit reached <br> or <br> request is too large?} B -->|Yes| C[Send error response] C --> End([END]) B -->|No| E[Process request] E --> F[Send response] F --> End end ``` ### `Is there already an active request with the same protocol?` This check is not performed in `Before`. This is taken from the PR in the consensus-spec, which proposes updates regarding rate limiting and response timeout. https://github.com/ethereum/consensus-specs/pull/3767/files > The requester MUST NOT make more than two concurrent requests with the same ID. The PR mentions the requester side. In this PR, I introduced the `ActiveRequestsLimiter` for the `responder` side to restrict more than two requests from running simultaneously on the same protocol per peer. If the limiter disallows a request, the responder sends a rate-limited error and penalizes the requester. ### `Rate limit reached?` and `Wait until tokens are regenerated` UPDATE: I moved the limiter logic to the behaviour side. https://github.com/sigp/lighthouse/pull/5923#issuecomment-2379535927 ~~The rate limiter is shared between the behaviour and the handler. (`Arc<Mutex<RateLimiter>>>`) The handler checks the rate limit and queues the response if the limit is reached. The behaviour handles pruning.~~ ~~I considered not sharing the rate limiter between the behaviour and the handler, and performing all of these either within the behaviour or handler. However, I decided against this for the following reasons:~~ - ~~Regarding performing everything within the behaviour: The behaviour is unable to recognize the response protocol when `RPC::send_response()` is called, especially when the response is `RPCCodedResponse::Error`. Therefore, the behaviour can't rate limit responses based on the response protocol.~~ - ~~Regarding performing everything within the handler: When multiple connections are established with a peer, there could be multiple handlers interacting with that peer. Thus, we cannot enforce rate limiting per peer solely within the handler. (Any ideas? 🤔 )~~
2026-04-26 01:03:40 +00:00 · 2025-04-24 12:46:16 +09:00
parent 402a81cdd7
commit 1324d3d3c4
9 changed files with 976 additions and 163 deletions
--- a/beacon_node/lighthouse_network/tests/common.rs
+++ b/beacon_node/lighthouse_network/tests/common.rs
@@ -16,6 +16,7 @@ use types::{

 type E = MinimalEthSpec;

+use lighthouse_network::rpc::config::InboundRateLimiterConfig;
 use tempfile::Builder as TempBuilder;

 /// Returns a dummy fork context
@@ -77,7 +78,11 @@ pub fn build_tracing_subscriber(level: &str, enabled: bool) {
    }
 }

-pub fn build_config(mut boot_nodes: Vec<Enr>) -> Arc<NetworkConfig> {
+pub fn build_config(
+    mut boot_nodes: Vec<Enr>,
+    disable_peer_scoring: bool,
+    inbound_rate_limiter: Option<InboundRateLimiterConfig>,
+) -> Arc<NetworkConfig> {
    let mut config = NetworkConfig::default();

    // Find unused ports by using the 0 port.
@@ -93,6 +98,8 @@ pub fn build_config(mut boot_nodes: Vec<Enr>) -> Arc<NetworkConfig> {
    config.enr_address = (Some(std::net::Ipv4Addr::LOCALHOST), None);
    config.boot_nodes_enr.append(&mut boot_nodes);
    config.network_dir = path.into_path();
+    config.disable_peer_scoring = disable_peer_scoring;
+    config.inbound_rate_limiter_config = inbound_rate_limiter;
    Arc::new(config)
 }

@@ -102,8 +109,10 @@ pub async fn build_libp2p_instance(
    fork_name: ForkName,
    chain_spec: Arc<ChainSpec>,
    service_name: String,
+    disable_peer_scoring: bool,
+    inbound_rate_limiter: Option<InboundRateLimiterConfig>,
 ) -> Libp2pInstance {
-    let config = build_config(boot_nodes);
+    let config = build_config(boot_nodes, disable_peer_scoring, inbound_rate_limiter);
    // launch libp2p service

    let (signal, exit) = async_channel::bounded(1);
@@ -144,6 +153,8 @@ pub async fn build_node_pair(
    fork_name: ForkName,
    spec: Arc<ChainSpec>,
    protocol: Protocol,
+    disable_peer_scoring: bool,
+    inbound_rate_limiter: Option<InboundRateLimiterConfig>,
 ) -> (Libp2pInstance, Libp2pInstance) {
    let mut sender = build_libp2p_instance(
        rt.clone(),
@@ -151,10 +162,20 @@ pub async fn build_node_pair(
        fork_name,
        spec.clone(),
        "sender".to_string(),
+        disable_peer_scoring,
+        inbound_rate_limiter.clone(),
+    )
+    .await;
+    let mut receiver = build_libp2p_instance(
+        rt,
+        vec![],
+        fork_name,
+        spec.clone(),
+        "receiver".to_string(),
+        disable_peer_scoring,
+        inbound_rate_limiter,
    )
    .await;
-    let mut receiver =
-        build_libp2p_instance(rt, vec![], fork_name, spec.clone(), "receiver".to_string()).await;

    // let the two nodes set up listeners
    let sender_fut = async {
@@ -235,6 +256,8 @@ pub async fn build_linear(
                fork_name,
                spec.clone(),
                "linear".to_string(),
+                false,
+                None,
            )
            .await,
        );
--- a/beacon_node/lighthouse_network/tests/rpc_tests.rs
+++ b/beacon_node/lighthouse_network/tests/rpc_tests.rs
@@ -9,10 +9,10 @@ use lighthouse_network::{NetworkEvent, ReportSource, Response};
 use ssz::Encode;
 use ssz_types::VariableList;
 use std::sync::Arc;
-use std::time::Duration;
+use std::time::{Duration, Instant};
 use tokio::runtime::Runtime;
 use tokio::time::sleep;
-use tracing::{debug, warn};
+use tracing::{debug, error, warn};
 use types::{
    BeaconBlock, BeaconBlockAltair, BeaconBlockBase, BeaconBlockBellatrix, BlobSidecar, ChainSpec,
    EmptyBlock, Epoch, EthSpec, FixedBytesExtended, ForkName, Hash256, MinimalEthSpec,
@@ -64,8 +64,15 @@ fn test_tcp_status_rpc() {

    rt.block_on(async {
        // get sender/receiver
-        let (mut sender, mut receiver) =
-            common::build_node_pair(Arc::downgrade(&rt), ForkName::Base, spec, Protocol::Tcp).await;
+        let (mut sender, mut receiver) = common::build_node_pair(
+            Arc::downgrade(&rt),
+            ForkName::Base,
+            spec,
+            Protocol::Tcp,
+            false,
+            None,
+        )
+        .await;

        // Dummy STATUS RPC message
        let rpc_request = RequestType::Status(StatusMessage {
@@ -168,6 +175,8 @@ fn test_tcp_blocks_by_range_chunked_rpc() {
            ForkName::Bellatrix,
            spec.clone(),
            Protocol::Tcp,
+            false,
+            None,
        )
        .await;

@@ -311,6 +320,8 @@ fn test_blobs_by_range_chunked_rpc() {
            ForkName::Deneb,
            spec.clone(),
            Protocol::Tcp,
+            false,
+            None,
        )
        .await;

@@ -430,6 +441,8 @@ fn test_tcp_blocks_by_range_over_limit() {
            ForkName::Bellatrix,
            spec.clone(),
            Protocol::Tcp,
+            false,
+            None,
        )
        .await;

@@ -533,6 +546,8 @@ fn test_tcp_blocks_by_range_chunked_rpc_terminates_correctly() {
            ForkName::Base,
            spec.clone(),
            Protocol::Tcp,
+            false,
+            None,
        )
        .await;

@@ -665,6 +680,8 @@ fn test_tcp_blocks_by_range_single_empty_rpc() {
            ForkName::Base,
            spec.clone(),
            Protocol::Tcp,
+            false,
+            None,
        )
        .await;

@@ -785,6 +802,8 @@ fn test_tcp_blocks_by_root_chunked_rpc() {
            ForkName::Bellatrix,
            spec.clone(),
            Protocol::Tcp,
+            false,
+            None,
        )
        .await;

@@ -929,6 +948,8 @@ fn test_tcp_blocks_by_root_chunked_rpc_terminates_correctly() {
            ForkName::Base,
            spec.clone(),
            Protocol::Tcp,
+            false,
+            None,
        )
        .await;

@@ -1065,8 +1086,15 @@ fn goodbye_test(log_level: &str, enable_logging: bool, protocol: Protocol) {

    // get sender/receiver
    rt.block_on(async {
-        let (mut sender, mut receiver) =
-            common::build_node_pair(Arc::downgrade(&rt), ForkName::Base, spec, protocol).await;
+        let (mut sender, mut receiver) = common::build_node_pair(
+            Arc::downgrade(&rt),
+            ForkName::Base,
+            spec,
+            protocol,
+            false,
+            None,
+        )
+        .await;

        // build the sender future
        let sender_future = async {
@@ -1127,3 +1155,239 @@ fn quic_test_goodbye_rpc() {
    let enabled_logging = false;
    goodbye_test(log_level, enabled_logging, Protocol::Quic);
 }
+
+// Test that the receiver delays the responses during response rate-limiting.
+#[test]
+fn test_delayed_rpc_response() {
+    let rt = Arc::new(Runtime::new().unwrap());
+    let spec = Arc::new(E::default_spec());
+
+    // Allow 1 token to be use used every 3 seconds.
+    const QUOTA_SEC: u64 = 3;
+
+    rt.block_on(async {
+        // get sender/receiver
+        let (mut sender, mut receiver) = common::build_node_pair(
+            Arc::downgrade(&rt),
+            ForkName::Base,
+            spec,
+            Protocol::Tcp,
+            false,
+            // Configure a quota for STATUS responses of 1 token every 3 seconds.
+            Some(format!("status:1/{QUOTA_SEC}").parse().unwrap()),
+        )
+        .await;
+
+        // Dummy STATUS RPC message
+        let rpc_request = RequestType::Status(StatusMessage {
+            fork_digest: [0; 4],
+            finalized_root: Hash256::from_low_u64_be(0),
+            finalized_epoch: Epoch::new(1),
+            head_root: Hash256::from_low_u64_be(0),
+            head_slot: Slot::new(1),
+        });
+
+        // Dummy STATUS RPC message
+        let rpc_response = Response::Status(StatusMessage {
+            fork_digest: [0; 4],
+            finalized_root: Hash256::from_low_u64_be(0),
+            finalized_epoch: Epoch::new(1),
+            head_root: Hash256::from_low_u64_be(0),
+            head_slot: Slot::new(1),
+        });
+
+        // build the sender future
+        let sender_future = async {
+            let mut request_id = 1;
+            let mut request_sent_at = Instant::now();
+            loop {
+                match sender.next_event().await {
+                    NetworkEvent::PeerConnectedOutgoing(peer_id) => {
+                        debug!(%request_id, "Sending RPC request");
+                        sender
+                            .send_request(peer_id, AppRequestId::Router, rpc_request.clone())
+                            .unwrap();
+                        request_sent_at = Instant::now();
+                    }
+                    NetworkEvent::ResponseReceived {
+                        peer_id,
+                        app_request_id: _,
+                        response,
+                    } => {
+                        debug!(%request_id, "Sender received");
+                        assert_eq!(response, rpc_response);
+
+                        match request_id {
+                            1 => {
+                                // The first response is returned instantly.
+                                assert!(request_sent_at.elapsed() < Duration::from_millis(100));
+                            }
+                            2..=5 => {
+                                // The second and subsequent responses are delayed due to the response rate-limiter on the receiver side.
+                                // Adding a slight margin to the elapsed time check to account for potential timing issues caused by system
+                                // scheduling or execution delays during testing.
+                                assert!(
+                                    request_sent_at.elapsed()
+                                        > (Duration::from_secs(QUOTA_SEC)
+                                            - Duration::from_millis(100))
+                                );
+                                if request_id == 5 {
+                                    // End the test
+                                    return;
+                                }
+                            }
+                            _ => unreachable!(),
+                        }
+
+                        request_id += 1;
+                        debug!(%request_id, "Sending RPC request");
+                        sender
+                            .send_request(peer_id, AppRequestId::Router, rpc_request.clone())
+                            .unwrap();
+                        request_sent_at = Instant::now();
+                    }
+                    NetworkEvent::RPCFailed {
+                        app_request_id: _,
+                        peer_id: _,
+                        error,
+                    } => {
+                        error!(?error, "RPC Failed");
+                        panic!("Rpc failed.");
+                    }
+                    _ => {}
+                }
+            }
+        };
+
+        // build the receiver future
+        let receiver_future = async {
+            loop {
+                if let NetworkEvent::RequestReceived {
+                    peer_id,
+                    inbound_request_id,
+                    request_type,
+                } = receiver.next_event().await
+                {
+                    assert_eq!(request_type, rpc_request);
+                    debug!("Receiver received request");
+                    receiver.send_response(peer_id, inbound_request_id, rpc_response.clone());
+                }
+            }
+        };
+
+        tokio::select! {
+            _ = sender_future => {}
+            _ = receiver_future => {}
+            _ = sleep(Duration::from_secs(30)) => {
+                panic!("Future timed out");
+            }
+        }
+    })
+}
+
+// Test that a rate-limited error doesn't occur even if the sender attempts to send many requests at
+// once, thanks to the self-limiter on the sender side.
+#[test]
+fn test_active_requests() {
+    let rt = Arc::new(Runtime::new().unwrap());
+    let spec = Arc::new(E::default_spec());
+
+    rt.block_on(async {
+        // Get sender/receiver.
+        let (mut sender, mut receiver) = common::build_node_pair(
+            Arc::downgrade(&rt),
+            ForkName::Base,
+            spec,
+            Protocol::Tcp,
+            false,
+            None,
+        )
+        .await;
+
+        // Dummy STATUS RPC request.
+        let rpc_request = RequestType::Status(StatusMessage {
+            fork_digest: [0; 4],
+            finalized_root: Hash256::from_low_u64_be(0),
+            finalized_epoch: Epoch::new(1),
+            head_root: Hash256::from_low_u64_be(0),
+            head_slot: Slot::new(1),
+        });
+
+        // Dummy STATUS RPC response.
+        let rpc_response = Response::Status(StatusMessage {
+            fork_digest: [0; 4],
+            finalized_root: Hash256::zero(),
+            finalized_epoch: Epoch::new(1),
+            head_root: Hash256::zero(),
+            head_slot: Slot::new(1),
+        });
+
+        // Number of requests.
+        const REQUESTS: u8 = 10;
+
+        // Build the sender future.
+        let sender_future = async {
+            let mut response_received = 0;
+            loop {
+                match sender.next_event().await {
+                    NetworkEvent::PeerConnectedOutgoing(peer_id) => {
+                        debug!("Sending RPC request");
+                        // Send requests in quick succession to intentionally trigger request queueing in the self-limiter.
+                        for _ in 0..REQUESTS {
+                            sender
+                                .send_request(peer_id, AppRequestId::Router, rpc_request.clone())
+                                .unwrap();
+                        }
+                    }
+                    NetworkEvent::ResponseReceived { response, .. } => {
+                        debug!(?response, "Sender received response");
+                        if matches!(response, Response::Status(_)) {
+                            response_received += 1;
+                        }
+                    }
+                    NetworkEvent::RPCFailed {
+                        app_request_id: _,
+                        peer_id: _,
+                        error,
+                    } => panic!("RPC failed: {:?}", error),
+                    _ => {}
+                }
+
+                if response_received == REQUESTS {
+                    return;
+                }
+            }
+        };
+
+        // Build the receiver future.
+        let receiver_future = async {
+            let mut received_requests = vec![];
+            loop {
+                tokio::select! {
+                    event = receiver.next_event() => {
+                       if let NetworkEvent::RequestReceived { peer_id, inbound_request_id, request_type } = event {
+                            debug!(?request_type, "Receiver received request");
+                            if matches!(request_type, RequestType::Status(_)) {
+                                received_requests.push((peer_id, inbound_request_id));
+                            }
+                        }
+                    }
+                    // Introduce a delay in sending responses to trigger request queueing on the sender side.
+                    _ = sleep(Duration::from_secs(3)) => {
+                        for (peer_id, inbound_request_id) in received_requests.drain(..) {
+                            receiver.send_response(peer_id, inbound_request_id, rpc_response.clone());
+                        }
+                    }
+                }
+            }
+        };
+
+        tokio::select! {
+            _ = sender_future => {}
+            _ = receiver_future => {}
+            _ = sleep(Duration::from_secs(30)) => {
+                panic!("Future timed out");
+            }
+        }
+    })
+}