From a6bdc474db0147c4194123bbd71dcfb562b36ae4 Mon Sep 17 00:00:00 2001 From: Lion - dapplion <35266934+dapplion@users.noreply.github.com> Date: Thu, 13 Mar 2025 10:26:43 -0300 Subject: [PATCH] Log range sync download errors (#6991) Currently range sync download log errors just say `error: rpc_error` which isn't helpful. The actual error is suppressed unless logged somewhere else. Log the actual error that caused the batch download to fail as part of the log that states that the batch download failed. --- .../network/src/sync/backfill_sync/mod.rs | 6 ++--- beacon_node/network/src/sync/manager.rs | 22 ++++++++++++------- .../network/src/sync/range_sync/chain.rs | 6 +++-- .../network/src/sync/range_sync/range.rs | 5 +++-- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index cd3f0dcbeb..509caf7316 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -10,8 +10,7 @@ use crate::network_beacon_processor::ChainSegmentProcessId; use crate::sync::manager::BatchProcessResult; -use crate::sync::network_context::RangeRequestId; -use crate::sync::network_context::SyncNetworkContext; +use crate::sync::network_context::{RangeRequestId, RpcResponseError, SyncNetworkContext}; use crate::sync::range_sync::{ BatchConfig, BatchId, BatchInfo, BatchOperationOutcome, BatchProcessingResult, BatchState, }; @@ -375,6 +374,7 @@ impl BackFillSync { batch_id: BatchId, peer_id: &PeerId, request_id: Id, + err: RpcResponseError, ) -> Result<(), BackFillError> { if let Some(batch) = self.batches.get_mut(&batch_id) { // A batch could be retried without the peer failing the request (disconnecting/ @@ -385,7 +385,7 @@ impl BackFillSync { if !batch.is_expecting_block(&request_id) { return Ok(()); } - debug!(batch_epoch = %batch_id, error = "rpc_error", "Batch failed"); + debug!(batch_epoch = %batch_id, error = ?err, "Batch download failed"); if let Some(active_requests) = self.active_requests.get_mut(peer_id) { active_requests.remove(&batch_id); } diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 671fa1e3b4..1e6d21d68a 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -1288,7 +1288,7 @@ impl SyncManager { } } } - Err(_) => match range_request_id.requester { + Err(e) => match range_request_id.requester { RangeRequestId::RangeSync { chain_id, batch_id } => { self.range_sync.inject_error( &mut self.network, @@ -1296,16 +1296,22 @@ impl SyncManager { batch_id, chain_id, range_request_id.id, + e, ); self.update_sync_state(); } - RangeRequestId::BackfillSync { batch_id } => match self - .backfill_sync - .inject_error(&mut self.network, batch_id, &peer_id, range_request_id.id) - { - Ok(_) => {} - Err(_) => self.update_sync_state(), - }, + RangeRequestId::BackfillSync { batch_id } => { + match self.backfill_sync.inject_error( + &mut self.network, + batch_id, + &peer_id, + range_request_id.id, + e, + ) { + Ok(_) => {} + Err(_) => self.update_sync_state(), + } + } }, } } diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index 70c7b6f98f..24045e901b 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -2,7 +2,7 @@ use super::batch::{BatchInfo, BatchProcessingResult, BatchState}; use super::RangeSyncType; use crate::metrics; use crate::network_beacon_processor::ChainSegmentProcessId; -use crate::sync::network_context::RangeRequestId; +use crate::sync::network_context::{RangeRequestId, RpcResponseError}; use crate::sync::{network_context::SyncNetworkContext, BatchOperationOutcome, BatchProcessResult}; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::BeaconChainTypes; @@ -879,6 +879,7 @@ impl SyncingChain { batch_id: BatchId, peer_id: &PeerId, request_id: Id, + err: RpcResponseError, ) -> ProcessingResult { let batch_state = self.visualize_batch_state(); if let Some(batch) = self.batches.get_mut(&batch_id) { @@ -901,9 +902,10 @@ impl SyncingChain { debug!( batch_epoch = %batch_id, batch_state = ?batch.state(), + error = ?err, %peer_id, %request_id, - "Batch failed. RPC Error" + "Batch download error" ); if let Some(active_requests) = self.peers.get_mut(peer_id) { active_requests.remove(&batch_id); diff --git a/beacon_node/network/src/sync/range_sync/range.rs b/beacon_node/network/src/sync/range_sync/range.rs index e4a20f6349..ab9a88e4ac 100644 --- a/beacon_node/network/src/sync/range_sync/range.rs +++ b/beacon_node/network/src/sync/range_sync/range.rs @@ -44,7 +44,7 @@ use super::chain_collection::{ChainCollection, SyncChainStatus}; use super::sync_type::RangeSyncType; use crate::metrics; use crate::status::ToStatusMessage; -use crate::sync::network_context::SyncNetworkContext; +use crate::sync::network_context::{RpcResponseError, SyncNetworkContext}; use crate::sync::BatchProcessResult; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::{BeaconChain, BeaconChainTypes}; @@ -348,10 +348,11 @@ where batch_id: BatchId, chain_id: ChainId, request_id: Id, + err: RpcResponseError, ) { // check that this request is pending match self.chains.call_by_id(chain_id, |chain| { - chain.inject_error(network, batch_id, &peer_id, request_id) + chain.inject_error(network, batch_id, &peer_id, request_id, err) }) { Ok((removed_chain, sync_type)) => { if let Some((removed_chain, remove_reason)) = removed_chain {