mirror of
https://github.com/sigp/lighthouse.git
synced 2026-05-31 05:07:12 +00:00
Fix stuck data column lookups by improving peer selection and retry logic (#8005)
Fixes the issue described in #7980 where Lighthouse repeatedly sends `DataColumnsByRoot` requests to the same peers that return empty responses, causing sync to get stuck. The root cause was we don't count empty responses as failures, leading to excessive retries to unresponsive peers. - Track per peer attempts to limit retry attempts per peer (`MAX_CUSTODY_PEER_ATTEMPTS = 3`) - Replaced random peer selection with hashing within each lookup to prevent splitting lookup into too many small requests and improve request batching efficiency. - Added `single_block_lookup` root span to track all lookups created and added more debug logs: <img width="1264" height="501" alt="image" src="https://github.com/user-attachments/assets/983629ba-b6d0-41cf-8e93-88a5b96c2f31" /> Co-Authored-By: Jimmy Chen <jchen.tc@gmail.com> Co-Authored-By: Jimmy Chen <jimmy@sigmaprime.io>
This commit is contained in:
@@ -29,7 +29,7 @@ use lighthouse_network::service::api_types::{
|
||||
DataColumnsByRootRequester, Id, SingleLookupReqId, SyncRequestId,
|
||||
};
|
||||
use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource};
|
||||
use lighthouse_tracing::SPAN_OUTGOING_RANGE_REQUEST;
|
||||
use lighthouse_tracing::{SPAN_OUTGOING_BLOCK_BY_ROOT_REQUEST, SPAN_OUTGOING_RANGE_REQUEST};
|
||||
use parking_lot::RwLock;
|
||||
pub use requests::LookupVerifyError;
|
||||
use requests::{
|
||||
@@ -886,6 +886,11 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
||||
"Sync RPC request sent"
|
||||
);
|
||||
|
||||
let request_span = debug_span!(
|
||||
parent: Span::current(),
|
||||
SPAN_OUTGOING_BLOCK_BY_ROOT_REQUEST,
|
||||
%block_root,
|
||||
);
|
||||
self.blocks_by_root_requests.insert(
|
||||
id,
|
||||
peer_id,
|
||||
@@ -893,8 +898,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
||||
// block and the peer must have it.
|
||||
true,
|
||||
BlocksByRootRequestItems::new(request),
|
||||
// Not implemented
|
||||
Span::none(),
|
||||
request_span,
|
||||
);
|
||||
|
||||
Ok(LookupRequestResult::RequestSent(id.req_id))
|
||||
|
||||
Reference in New Issue
Block a user