mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-30 03:03:45 +00:00
Validator registration request failures do not cause us to mark BNs offline (#3488)
## Issue Addressed Relates to https://github.com/sigp/lighthouse/issues/3416 ## Proposed Changes - Add an `OfflineOnFailure` enum to the `first_success` method for querying beacon nodes so that a val registration request failure from the BN -> builder does not result in the BN being marked offline. This seems important because these failures could be coming directly from a connected relay and actually have no bearing on BN health. Other messages that are sent to a relay have a local fallback so shouldn't result in errors - Downgrade the following log to a `WARN` ``` ERRO Unable to publish validator registrations to the builder network, error: All endpoints failed https://BN_B => RequestFailed(ServerMessage(ErrorMessage { code: 500, message: "UNHANDLED_ERROR: BuilderMissing", stacktraces: [] })), https://XXXX/ => Unavailable(Offline), [omitted] ``` ## Additional Info I think this change at least improves the UX of having a VC connected to some builder and some non-builder beacon nodes. I think we need to balance potentially alerting users that there is a BN <> VC misconfiguration and also allowing this type of fallback to work. If we want to fully support this type of configuration we may want to consider adding a flag `--builder-beacon-nodes` and track whether a VC should be making builder queries on a per-beacon node basis. But I think the changes in this PR are independent of that type of extension. PS: Sorry for the big diff here, it's mostly formatting changes after I added a new arg to a bunch of methods calls. Co-authored-by: realbigsean <sean@sigmaprime.io>
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
use crate::beacon_node_fallback::{BeaconNodeFallback, RequireSynced};
|
||||
use crate::validator_store::{DoppelgangerStatus, ValidatorStore};
|
||||
use crate::OfflineOnFailure;
|
||||
use bls::PublicKeyBytes;
|
||||
use environment::RuntimeContext;
|
||||
use parking_lot::RwLock;
|
||||
use slog::{debug, error, info};
|
||||
use slog::{debug, error, info, warn};
|
||||
use slot_clock::SlotClock;
|
||||
use std::collections::HashMap;
|
||||
use std::hash::Hash;
|
||||
@@ -330,11 +331,15 @@ impl<T: SlotClock + 'static, E: EthSpec> PreparationService<T, E> {
|
||||
let preparation_entries = preparation_data.as_slice();
|
||||
match self
|
||||
.beacon_nodes
|
||||
.first_success(RequireSynced::Yes, |beacon_node| async move {
|
||||
beacon_node
|
||||
.post_validator_prepare_beacon_proposer(preparation_entries)
|
||||
.await
|
||||
})
|
||||
.first_success(
|
||||
RequireSynced::Yes,
|
||||
OfflineOnFailure::Yes,
|
||||
|beacon_node| async move {
|
||||
beacon_node
|
||||
.post_validator_prepare_beacon_proposer(preparation_entries)
|
||||
.await
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => debug!(
|
||||
@@ -445,9 +450,13 @@ impl<T: SlotClock + 'static, E: EthSpec> PreparationService<T, E> {
|
||||
for batch in signed.chunks(VALIDATOR_REGISTRATION_BATCH_SIZE) {
|
||||
match self
|
||||
.beacon_nodes
|
||||
.first_success(RequireSynced::Yes, |beacon_node| async move {
|
||||
beacon_node.post_validator_register_validator(batch).await
|
||||
})
|
||||
.first_success(
|
||||
RequireSynced::Yes,
|
||||
OfflineOnFailure::No,
|
||||
|beacon_node| async move {
|
||||
beacon_node.post_validator_register_validator(batch).await
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => info!(
|
||||
@@ -455,7 +464,7 @@ impl<T: SlotClock + 'static, E: EthSpec> PreparationService<T, E> {
|
||||
"Published validator registrations to the builder network";
|
||||
"count" => registration_data_len,
|
||||
),
|
||||
Err(e) => error!(
|
||||
Err(e) => warn!(
|
||||
log,
|
||||
"Unable to publish validator registrations to the builder network";
|
||||
"error" => %e,
|
||||
|
||||
Reference in New Issue
Block a user