Don't use failed_peers for download errors, rely on randomness to skip potentially faulty peers

This commit is contained in:
dapplion
2025-06-11 12:38:55 +02:00
parent 4e13b3be0f
commit e426e45455
3 changed files with 5 additions and 19 deletions

View File

@@ -330,9 +330,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
return Ok(());
}
debug!(batch_epoch = %batch_id, error = ?err, "Batch download failed");
// TODO(das): Is it necessary for the batch to track failed peers? Can we make this
// mechanism compatible with PeerDAS and before PeerDAS?
match batch.download_failed(None) {
match batch.download_failed() {
Err(e) => self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0)),
Ok(BatchOperationOutcome::Failed { blacklist: _ }) => self.fail_sync(match err {
RpcResponseError::RpcError(_)
@@ -956,7 +954,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
return self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0));
}
match batch.download_failed(None) {
match batch.download_failed() {
Err(e) => {
self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0))?
}

View File

@@ -305,17 +305,9 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
/// The `peer` parameter, when set to None, does not increment the failed attempts of
/// this batch and register the peer, rather attempts a re-download.
#[must_use = "Batch may have failed"]
pub fn download_failed(
&mut self,
peer: Option<PeerId>,
) -> Result<BatchOperationOutcome, WrongState> {
pub fn download_failed(&mut self) -> Result<BatchOperationOutcome, WrongState> {
match self.state.poison() {
BatchState::Downloading(_request_id) => {
// register the attempt and check if the batch can be tried again
if let Some(peer) = peer {
self.failed_peers.insert(peer);
}
self.failed_download_attempts += 1;
self.state =

View File

@@ -902,11 +902,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
%request_id,
"Batch download error"
);
if let BatchOperationOutcome::Failed { blacklist } =
// TODO(das): Is it necessary for the batch to track failed peers? Can we make this
// mechanism compatible with PeerDAS and before PeerDAS?
batch.download_failed(None)?
{
if let BatchOperationOutcome::Failed { blacklist } = batch.download_failed()? {
return Err(RemoveChain::ChainFailed {
blacklist,
failing_batch: batch_id,
@@ -966,7 +962,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
warn!(%batch_id, error = ?e, "batch_id" = %batch_id, %batch, "Could not send batch request");
// register the failed download and check if the batch can be retried
batch.start_downloading(1)?; // fake request_id = 1 is not relevant
match batch.download_failed(None)? {
match batch.download_failed()? {
BatchOperationOutcome::Failed { blacklist } => {
return Err(RemoveChain::ChainFailed {
blacklist,