Validator client head monitor timeout fix (#8846)

Fix a bug in v8.1.0 whereby the VC times out continuously with: > Feb 18 02:03:48.030 WARN Head service failed retrying starting next slot error: "Head monitoring stream error, node: 0, error: SseClient(Transport(reqwest::Error { kind: Decode, source: reqwest::Error { kind: Body, source: TimedOut } }))" - Remove the existing timeout for the events API by using `Duration::MAX`. This is necessary as the client is configured with a default timeout. This is the only way to override/remove it. - DO NOT add a `read_timeout` (yet), as this would need to be configured on a per-client basis. We do not want to create a new Client for every call as the early commits on this branch were doing, as this would bypass the TLS cert config, and is also wasteful. Co-Authored-By: hopinheimer <knmanas6@gmail.com> Co-Authored-By: Michael Sproul <michael@sigmaprime.io> Co-Authored-By: Michael Sproul <michaelsproul@users.noreply.github.com>
2026-06-10 01:26:44 +00:00 · 2026-02-18 16:28:17 +11:00
parent c5b4580e37
commit be799cb2ad
1 changed files with 5 additions and 6 deletions
--- a/common/eth2/src/lib.rs
+++ b/common/eth2/src/lib.rs
@@ -76,8 +76,6 @@ const HTTP_GET_BEACON_BLOCK_SSZ_TIMEOUT_QUOTIENT: u32 = 4;
 const HTTP_GET_DEBUG_BEACON_STATE_QUOTIENT: u32 = 4;
 const HTTP_GET_DEPOSIT_SNAPSHOT_QUOTIENT: u32 = 4;
 const HTTP_GET_VALIDATOR_BLOCK_TIMEOUT_QUOTIENT: u32 = 4;
-// Generally the timeout for events should be longer than a slot.
-const HTTP_GET_EVENTS_TIMEOUT_MULTIPLIER: u32 = 50;
 const HTTP_DEFAULT_TIMEOUT_QUOTIENT: u32 = 4;

 /// A struct to define a variety of different timeouts for different validator tasks to ensure
@@ -98,7 +96,6 @@ pub struct Timeouts {
    pub get_debug_beacon_states: Duration,
    pub get_deposit_snapshot: Duration,
    pub get_validator_block: Duration,
-    pub events: Duration,
    pub default: Duration,
 }

@@ -119,7 +116,6 @@ impl Timeouts {
            get_debug_beacon_states: timeout,
            get_deposit_snapshot: timeout,
            get_validator_block: timeout,
-            events: HTTP_GET_EVENTS_TIMEOUT_MULTIPLIER * timeout,
            default: timeout,
        }
    }
@@ -142,7 +138,6 @@ impl Timeouts {
            get_debug_beacon_states: base_timeout / HTTP_GET_DEBUG_BEACON_STATE_QUOTIENT,
            get_deposit_snapshot: base_timeout / HTTP_GET_DEPOSIT_SNAPSHOT_QUOTIENT,
            get_validator_block: base_timeout / HTTP_GET_VALIDATOR_BLOCK_TIMEOUT_QUOTIENT,
-            events: HTTP_GET_EVENTS_TIMEOUT_MULTIPLIER * base_timeout,
            default: base_timeout / HTTP_DEFAULT_TIMEOUT_QUOTIENT,
        }
    }
@@ -2805,10 +2800,14 @@ impl BeaconNodeHttpClient {
            .join(",");
        path.query_pairs_mut().append_pair("topics", &topic_string);

+        // Do not use a timeout for the events endpoint. Using a regular timeout will trigger a
+        // timeout every `timeout` seconds, regardless of any data streamed from the endpoint.
+        // In future we could add a read_timeout, but that can only be configured globally on the
+        // Client.
        let mut es = self
            .client
            .get(path)
-            .timeout(self.timeouts.events)
+            .timeout(Duration::MAX)
            .eventsource()
            .map_err(Error::SseEventSource)?;
        // If we don't await `Event::Open` here, then the consumer