From e426e45455bd457a5735e722b08f11dd42630fc0 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Wed, 11 Jun 2025 12:38:55 +0200 Subject: [PATCH] Don't use failed_peers for download errors, rely on randomness to skip potentially faulty peers --- beacon_node/network/src/sync/backfill_sync/mod.rs | 6 ++---- beacon_node/network/src/sync/range_sync/batch.rs | 10 +--------- beacon_node/network/src/sync/range_sync/chain.rs | 8 ++------ 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index 5037cf4860..70d6573264 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -330,9 +330,7 @@ impl BackFillSync { return Ok(()); } debug!(batch_epoch = %batch_id, error = ?err, "Batch download failed"); - // TODO(das): Is it necessary for the batch to track failed peers? Can we make this - // mechanism compatible with PeerDAS and before PeerDAS? - match batch.download_failed(None) { + match batch.download_failed() { Err(e) => self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0)), Ok(BatchOperationOutcome::Failed { blacklist: _ }) => self.fail_sync(match err { RpcResponseError::RpcError(_) @@ -956,7 +954,7 @@ impl BackFillSync { return self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0)); } - match batch.download_failed(None) { + match batch.download_failed() { Err(e) => { self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0))? } diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs index 5267ba56ba..8834c74c08 100644 --- a/beacon_node/network/src/sync/range_sync/batch.rs +++ b/beacon_node/network/src/sync/range_sync/batch.rs @@ -305,17 +305,9 @@ impl BatchInfo { /// The `peer` parameter, when set to None, does not increment the failed attempts of /// this batch and register the peer, rather attempts a re-download. #[must_use = "Batch may have failed"] - pub fn download_failed( - &mut self, - peer: Option, - ) -> Result { + pub fn download_failed(&mut self) -> Result { match self.state.poison() { BatchState::Downloading(_request_id) => { - // register the attempt and check if the batch can be tried again - if let Some(peer) = peer { - self.failed_peers.insert(peer); - } - self.failed_download_attempts += 1; self.state = diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index 17bce62a7c..921d134c68 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -902,11 +902,7 @@ impl SyncingChain { %request_id, "Batch download error" ); - if let BatchOperationOutcome::Failed { blacklist } = - // TODO(das): Is it necessary for the batch to track failed peers? Can we make this - // mechanism compatible with PeerDAS and before PeerDAS? - batch.download_failed(None)? - { + if let BatchOperationOutcome::Failed { blacklist } = batch.download_failed()? { return Err(RemoveChain::ChainFailed { blacklist, failing_batch: batch_id, @@ -966,7 +962,7 @@ impl SyncingChain { warn!(%batch_id, error = ?e, "batch_id" = %batch_id, %batch, "Could not send batch request"); // register the failed download and check if the batch can be retried batch.start_downloading(1)?; // fake request_id = 1 is not relevant - match batch.download_failed(None)? { + match batch.download_failed()? { BatchOperationOutcome::Failed { blacklist } => { return Err(RemoveChain::ChainFailed { blacklist,