Validator registration request failures do not cause us to mark BNs offline (#3488)

## Issue Addressed

Relates to https://github.com/sigp/lighthouse/issues/3416

## Proposed Changes

- Add an `OfflineOnFailure` enum to the `first_success` method for querying beacon nodes so that a val registration request failure from the BN -> builder does not result in the BN being marked offline. This seems important because these failures could be coming directly from a connected relay and actually have no bearing on BN health.  Other messages that are sent to a relay have a local fallback so shouldn't result in errors 

- Downgrade the following log to a `WARN`

```
ERRO Unable to publish validator registrations to the builder network, error: All endpoints failed https://BN_B => RequestFailed(ServerMessage(ErrorMessage { code: 500, message: "UNHANDLED_ERROR: BuilderMissing", stacktraces: [] })), https://XXXX/ => Unavailable(Offline), [omitted]
```

## Additional Info

I think this change at least improves the UX of having a VC connected to some builder and some non-builder beacon nodes. I think we need to balance potentially alerting users that there is a BN <> VC misconfiguration and also allowing this type of fallback to work. 

If we want to fully support this type of configuration we may want to consider adding a flag `--builder-beacon-nodes` and track whether a VC should be making builder queries on a per-beacon node basis.  But I think the changes in this PR are independent of that type of extension.

PS: Sorry for the big diff here, it's mostly formatting changes after I added a new arg to a bunch of methods calls.




Co-authored-by: realbigsean <sean@sigmaprime.io>
This commit is contained in:
realbigsean
2022-08-29 11:35:59 +00:00
parent 66eca1a882
commit 2ce86a0830
9 changed files with 339 additions and 241 deletions

View File

@@ -8,7 +8,7 @@
mod sync;
use crate::beacon_node_fallback::{BeaconNodeFallback, RequireSynced};
use crate::beacon_node_fallback::{BeaconNodeFallback, OfflineOnFailure, RequireSynced};
use crate::{
block_service::BlockServiceNotification,
http_metrics::metrics,
@@ -382,18 +382,22 @@ async fn poll_validator_indices<T: SlotClock + 'static, E: EthSpec>(
// Query the remote BN to resolve a pubkey to a validator index.
let download_result = duties_service
.beacon_nodes
.first_success(duties_service.require_synced, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::VALIDATOR_ID_HTTP_GET],
);
beacon_node
.get_beacon_states_validator_id(
StateId::Head,
&ValidatorId::PublicKey(pubkey),
)
.await
})
.first_success(
duties_service.require_synced,
OfflineOnFailure::Yes,
|beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::VALIDATOR_ID_HTTP_GET],
);
beacon_node
.get_beacon_states_validator_id(
StateId::Head,
&ValidatorId::PublicKey(pubkey),
)
.await
},
)
.await;
match download_result {
@@ -559,15 +563,19 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
let subscriptions_ref = &subscriptions;
if let Err(e) = duties_service
.beacon_nodes
.first_success(duties_service.require_synced, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::SUBSCRIPTIONS_HTTP_POST],
);
beacon_node
.post_validator_beacon_committee_subscriptions(subscriptions_ref)
.await
})
.first_success(
duties_service.require_synced,
OfflineOnFailure::Yes,
|beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::SUBSCRIPTIONS_HTTP_POST],
);
beacon_node
.post_validator_beacon_committee_subscriptions(subscriptions_ref)
.await
},
)
.await
{
error!(
@@ -619,15 +627,19 @@ async fn poll_beacon_attesters_for_epoch<T: SlotClock + 'static, E: EthSpec>(
let response = duties_service
.beacon_nodes
.first_success(duties_service.require_synced, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::ATTESTER_DUTIES_HTTP_POST],
);
beacon_node
.post_validator_duties_attester(epoch, local_indices)
.await
})
.first_success(
duties_service.require_synced,
OfflineOnFailure::Yes,
|beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::ATTESTER_DUTIES_HTTP_POST],
);
beacon_node
.post_validator_duties_attester(epoch, local_indices)
.await
},
)
.await
.map_err(|e| Error::FailedToDownloadAttesters(e.to_string()))?;
@@ -779,15 +791,19 @@ async fn poll_beacon_proposers<T: SlotClock + 'static, E: EthSpec>(
if !local_pubkeys.is_empty() {
let download_result = duties_service
.beacon_nodes
.first_success(duties_service.require_synced, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::PROPOSER_DUTIES_HTTP_GET],
);
beacon_node
.get_validator_duties_proposer(current_epoch)
.await
})
.first_success(
duties_service.require_synced,
OfflineOnFailure::Yes,
|beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::PROPOSER_DUTIES_HTTP_GET],
);
beacon_node
.get_validator_duties_proposer(current_epoch)
.await
},
)
.await;
match download_result {