mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-14 02:12:33 +00:00
Fix failed_peers post fulu
This commit is contained in:
@@ -617,9 +617,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
error,
|
||||
} => {
|
||||
// TODO(sync): De-dup between back and forwards sync
|
||||
let mut failed_peers = vec![];
|
||||
|
||||
if let Some(penalty) = peer_action.block_peer {
|
||||
// Penalize the peer appropiately.
|
||||
network.report_peer(batch_peers.block(), penalty, "faulty_batch");
|
||||
failed_peers.push(batch_peers.block());
|
||||
}
|
||||
|
||||
// Penalize each peer only once. Currently a peer_action does not mix different
|
||||
@@ -635,9 +638,11 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
.unique()
|
||||
{
|
||||
network.report_peer(peer, penalty, "faulty_batch_column");
|
||||
failed_peers.push(peer);
|
||||
}
|
||||
|
||||
match batch.processing_completed(BatchProcessingResult::FaultyFailure) {
|
||||
match batch.processing_completed(BatchProcessingResult::FaultyFailure(failed_peers))
|
||||
{
|
||||
Err(e) => {
|
||||
// Batch was in the wrong state
|
||||
self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0))
|
||||
@@ -926,12 +931,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
) -> Result<(), BackFillError> {
|
||||
if let Some(batch) = self.batches.get_mut(&batch_id) {
|
||||
let request = batch.to_blocks_by_range_request();
|
||||
let failed_peers = batch.failed_block_peers();
|
||||
let failed_peers = batch.failed_peers();
|
||||
match network.block_components_by_range_request(
|
||||
request,
|
||||
RangeRequestId::BackfillSync { batch_id },
|
||||
self.peers.clone(),
|
||||
&failed_peers,
|
||||
failed_peers,
|
||||
) {
|
||||
Ok(request_id) => {
|
||||
// inform the batch about the new request
|
||||
|
||||
@@ -112,7 +112,7 @@ pub enum BatchOperationOutcome {
|
||||
|
||||
pub enum BatchProcessingResult {
|
||||
Success,
|
||||
FaultyFailure,
|
||||
FaultyFailure(Vec<PeerId>),
|
||||
NonFaultyFailure,
|
||||
}
|
||||
|
||||
@@ -128,7 +128,9 @@ pub struct BatchInfo<E: EthSpec, B: BatchConfig = RangeSyncBatchConfig> {
|
||||
/// Number of processing attempts that have failed but we do not count.
|
||||
non_faulty_processing_attempts: u8,
|
||||
/// The number of download retries this batch has undergone due to a failed request.
|
||||
failed_download_attempts: Vec<Option<PeerId>>,
|
||||
failed_download_attempts: usize,
|
||||
/// Peers that returned bad data, and we want to de-prioritize
|
||||
failed_peers: HashSet<PeerId>,
|
||||
/// State of the batch.
|
||||
state: BatchState<E>,
|
||||
/// Pin the generic
|
||||
@@ -197,7 +199,8 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
start_slot,
|
||||
end_slot,
|
||||
failed_processing_attempts: Vec::new(),
|
||||
failed_download_attempts: Vec::new(),
|
||||
failed_download_attempts: 0,
|
||||
failed_peers: <_>::default(),
|
||||
non_faulty_processing_attempts: 0,
|
||||
state: BatchState::AwaitingDownload,
|
||||
marker: std::marker::PhantomData,
|
||||
@@ -206,23 +209,8 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
|
||||
/// Gives a list of peers from which this batch has had a failed download or processing
|
||||
/// attempt.
|
||||
///
|
||||
/// TODO(das): Returns only block peers to keep the mainnet path equivalent. The failed peers
|
||||
/// mechanism is broken for PeerDAS and will be fixed with https://github.com/sigp/lighthouse/issues/6258
|
||||
pub fn failed_block_peers(&self) -> HashSet<PeerId> {
|
||||
let mut peers = HashSet::with_capacity(
|
||||
self.failed_processing_attempts.len() + self.failed_download_attempts.len(),
|
||||
);
|
||||
|
||||
for attempt in &self.failed_processing_attempts {
|
||||
peers.insert(attempt.peers.block());
|
||||
}
|
||||
|
||||
for peer in self.failed_download_attempts.iter().flatten() {
|
||||
peers.insert(*peer);
|
||||
}
|
||||
|
||||
peers
|
||||
pub fn failed_peers(&self) -> &HashSet<PeerId> {
|
||||
&self.failed_peers
|
||||
}
|
||||
|
||||
/// Verifies if an incoming block belongs to this batch.
|
||||
@@ -272,8 +260,7 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
match self.state {
|
||||
BatchState::Poisoned => unreachable!("Poisoned batch"),
|
||||
BatchState::Failed => BatchOperationOutcome::Failed {
|
||||
blacklist: self.failed_processing_attempts.len()
|
||||
> self.failed_download_attempts.len(),
|
||||
blacklist: self.failed_processing_attempts.len() > self.failed_download_attempts,
|
||||
},
|
||||
_ => BatchOperationOutcome::Continue,
|
||||
}
|
||||
@@ -325,15 +312,19 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
match self.state.poison() {
|
||||
BatchState::Downloading(_request_id) => {
|
||||
// register the attempt and check if the batch can be tried again
|
||||
self.failed_download_attempts.push(peer);
|
||||
self.state = if self.failed_download_attempts.len()
|
||||
>= B::max_batch_download_attempts() as usize
|
||||
{
|
||||
BatchState::Failed
|
||||
} else {
|
||||
// drop the blocks
|
||||
BatchState::AwaitingDownload
|
||||
};
|
||||
if let Some(peer) = peer {
|
||||
self.failed_peers.insert(peer);
|
||||
}
|
||||
|
||||
self.failed_download_attempts += 1;
|
||||
|
||||
self.state =
|
||||
if self.failed_download_attempts >= B::max_batch_download_attempts() as usize {
|
||||
BatchState::Failed
|
||||
} else {
|
||||
// drop the blocks
|
||||
BatchState::AwaitingDownload
|
||||
};
|
||||
Ok(self.outcome())
|
||||
}
|
||||
BatchState::Poisoned => unreachable!("Poisoned batch"),
|
||||
@@ -390,9 +381,12 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
BatchState::Processing(attempt) => {
|
||||
self.state = match procesing_result {
|
||||
BatchProcessingResult::Success => BatchState::AwaitingValidation(attempt),
|
||||
BatchProcessingResult::FaultyFailure => {
|
||||
BatchProcessingResult::FaultyFailure(failed_peers) => {
|
||||
// register the failed attempt
|
||||
self.failed_processing_attempts.push(attempt);
|
||||
for peer in failed_peers {
|
||||
self.failed_peers.insert(peer);
|
||||
}
|
||||
|
||||
// check if the batch can be downloaded again
|
||||
if self.failed_processing_attempts.len()
|
||||
|
||||
@@ -539,10 +539,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// TODO(sync): propagate error in logs
|
||||
error: _,
|
||||
} => {
|
||||
let mut failed_peers = vec![];
|
||||
|
||||
// TODO(sync): De-dup between back and forwards sync
|
||||
if let Some(penalty) = peer_action.block_peer {
|
||||
// Penalize the peer appropiately.
|
||||
network.report_peer(batch_peers.block(), penalty, "faulty_batch");
|
||||
failed_peers.push(batch_peers.block());
|
||||
}
|
||||
|
||||
// Penalize each peer only once. Currently a peer_action does not mix different
|
||||
@@ -558,10 +561,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
.unique()
|
||||
{
|
||||
network.report_peer(peer, penalty, "faulty_batch_column");
|
||||
failed_peers.push(peer);
|
||||
}
|
||||
|
||||
// Check if this batch is allowed to continue
|
||||
match batch.processing_completed(BatchProcessingResult::FaultyFailure)? {
|
||||
match batch
|
||||
.processing_completed(BatchProcessingResult::FaultyFailure(failed_peers))?
|
||||
{
|
||||
BatchOperationOutcome::Continue => {
|
||||
// Chain can continue. Check if it can be moved forward.
|
||||
if *imported_blocks > 0 {
|
||||
@@ -929,7 +935,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
let batch_state = self.visualize_batch_state();
|
||||
if let Some(batch) = self.batches.get_mut(&batch_id) {
|
||||
let request = batch.to_blocks_by_range_request();
|
||||
let failed_peers = batch.failed_block_peers();
|
||||
let failed_peers = batch.failed_peers();
|
||||
|
||||
match network.block_components_by_range_request(
|
||||
request,
|
||||
@@ -938,7 +944,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
batch_id,
|
||||
},
|
||||
self.peers.clone(),
|
||||
&failed_peers,
|
||||
failed_peers,
|
||||
) {
|
||||
Ok(request_id) => {
|
||||
// inform the batch about the new request
|
||||
|
||||
Reference in New Issue
Block a user