Do not penalize peers on execution layer offline errors (#3258)

## Issue Addressed

Partly resolves https://github.com/sigp/lighthouse/issues/3032

## Proposed Changes

Extracts some of the functionality of #3094 into a separate PR as the original PR requires a bit more work.
Do not unnecessarily penalize peers when we fail to validate received execution payloads because our execution layer is offline.
This commit is contained in:
Pawan Dhananjay
2022-06-19 23:13:40 +00:00
parent 21b3425a12
commit f428719761
8 changed files with 121 additions and 11 deletions

View File

@@ -8,7 +8,7 @@
//! If a batch fails, the backfill sync cannot progress. In this scenario, we mark the backfill
//! sync as failed, log an error and attempt to retry once a new peer joins the node.
use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent as BeaconWorkEvent};
use crate::beacon_processor::{ChainSegmentProcessId, FailureMode, WorkEvent as BeaconWorkEvent};
use crate::sync::manager::{BatchProcessResult, Id};
use crate::sync::network_context::SyncNetworkContext;
use crate::sync::range_sync::{BatchConfig, BatchId, BatchInfo, BatchProcessingResult, BatchState};
@@ -554,6 +554,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
imported_blocks: false,
// The beacon processor queue is full, no need to penalize the peer.
peer_action: None,
mode: FailureMode::ConsensusLayer,
},
)
} else {
@@ -638,6 +639,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
BatchProcessResult::Failed {
imported_blocks,
peer_action,
mode: _,
} => {
let batch = match self.batches.get_mut(&batch_id) {
Some(v) => v,

View File

@@ -1,7 +1,7 @@
use std::collections::hash_map::Entry;
use std::time::Duration;
use beacon_chain::{BeaconChainTypes, BlockError};
use beacon_chain::{BeaconChainTypes, BlockError, ExecutionPayloadError};
use fnv::FnvHashMap;
use lighthouse_network::{PeerAction, PeerId};
use lru_cache::LRUTimeCache;
@@ -10,7 +10,7 @@ use smallvec::SmallVec;
use store::{Hash256, SignedBeaconBlock};
use tokio::sync::mpsc;
use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent};
use crate::beacon_processor::{ChainSegmentProcessId, FailureMode, WorkEvent};
use crate::metrics;
use self::{
@@ -420,6 +420,20 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
BlockError::ParentUnknown(block) => {
self.search_parent(block, peer_id, cx);
}
e @ BlockError::ExecutionPayloadError(ExecutionPayloadError::RequestFailed(_))
| e @ BlockError::ExecutionPayloadError(
ExecutionPayloadError::NoExecutionConnection,
) => {
// These errors indicate that the execution layer is offline
// and failed to validate the execution payload. Do not downscore peer.
debug!(
self.log,
"Single block lookup failed. Execution layer is offline";
"root" => %root,
"error" => ?e
);
}
other => {
warn!(self.log, "Peer sent invalid block in single block lookup"; "root" => %root, "error" => ?other, "peer_id" => %peer_id);
cx.report_peer(
@@ -506,6 +520,19 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
}
}
}
Err(e @ BlockError::ExecutionPayloadError(ExecutionPayloadError::RequestFailed(_)))
| Err(
e @ BlockError::ExecutionPayloadError(ExecutionPayloadError::NoExecutionConnection),
) => {
// These errors indicate that the execution layer is offline
// and failed to validate the execution payload. Do not downscore peer.
debug!(
self.log,
"Parent lookup failed. Execution layer is offline";
"chain_hash" => %chain_hash,
"error" => ?e
);
}
Err(outcome) => {
// all else we consider the chain a failure and downvote the peer that sent
// us the last block
@@ -561,11 +588,21 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
BatchProcessResult::Failed {
imported_blocks: _,
peer_action,
mode,
} => {
self.failed_chains.insert(parent_lookup.chain_hash());
if let Some(peer_action) = peer_action {
for &peer_id in parent_lookup.used_peers() {
cx.report_peer(peer_id, peer_action, "parent_chain_failure")
if let FailureMode::ExecutionLayer { pause_sync: _ } = mode {
debug!(
self.log,
"Chain segment processing failed. Execution layer is offline";
"chain_hash" => %chain_hash,
"error" => ?mode
);
} else {
self.failed_chains.insert(parent_lookup.chain_hash());
if let Some(peer_action) = peer_action {
for &peer_id in parent_lookup.used_peers() {
cx.report_peer(peer_id, peer_action, "parent_chain_failure")
}
}
}
}

View File

@@ -38,7 +38,7 @@ use super::block_lookups::BlockLookups;
use super::network_context::SyncNetworkContext;
use super::peer_sync_info::{remote_sync_type, PeerSyncType};
use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent as BeaconWorkEvent};
use crate::beacon_processor::{ChainSegmentProcessId, FailureMode, WorkEvent as BeaconWorkEvent};
use crate::service::NetworkMessage;
use crate::status::ToStatusMessage;
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError};
@@ -137,6 +137,7 @@ pub enum BatchProcessResult {
Failed {
imported_blocks: bool,
peer_action: Option<PeerAction>,
mode: FailureMode,
},
}

View File

@@ -1,6 +1,6 @@
use super::batch::{BatchInfo, BatchProcessingResult, BatchState};
use crate::beacon_processor::ChainSegmentProcessId;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::beacon_processor::{ChainSegmentProcessId, FailureMode};
use crate::sync::{manager::Id, network_context::SyncNetworkContext, BatchProcessResult};
use beacon_chain::BeaconChainTypes;
use fnv::FnvHashMap;
@@ -320,6 +320,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
&BatchProcessResult::Failed {
imported_blocks: false,
peer_action: None,
mode: FailureMode::ConsensusLayer,
},
)
} else {
@@ -499,6 +500,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
BatchProcessResult::Failed {
imported_blocks,
peer_action,
mode: _,
} => {
let batch = self.batches.get_mut(&batch_id).ok_or_else(|| {
RemoveChain::WrongChainState(format!(