Maintain peers across all data column subnets (#7915)

Closes:
- #7865
- #7855

Changes extracted from earlier PR #7876

This PR fixes two main things with a few other improvements mentioned below:
- Prevent Lighthouse from repeatedly sending `DataColumnByRoot` requests to an unsynced peer, causing lookup sync to get stuck
- Allows Lighthouse to send discovery requests if there isn't enough **synced** peers in the required sampling subnets - this fixes the stuck sync scenario where there isn't enough usable peers in sampling subnet but no discovery is attempted.


  - Make peer discovery queries if custody subnet peer count drops below the minimum threshold
- Update peer pruning logic to prioritise uniform distribution across all data column subnets and avoid pruning sampling peers if the count is below the target threshold (2)
- Check sync status when making discovery requests, to make sure we don't ignore requests if there isn't enough synced peers in the required sampling subnets
- Optimise some of the `PeerDB` functions checking custody peers
- Only send lookup requests to peers that are synced or advanced
This commit is contained in:
Jimmy Chen
2025-09-04 15:36:20 +10:00
committed by GitHub
parent 76adedff27
commit c2a92f1a8c
6 changed files with 974 additions and 336 deletions

View File

@@ -1120,13 +1120,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
.sampling_subnets()
.iter()
.all(|subnet_id| {
let peer_count = network
let min_peer_count = 1;
network
.network_globals()
.peers
.read()
.good_range_sync_custody_subnet_peers(*subnet_id)
.count();
peer_count > 0
.has_good_peers_in_custody_subnet(subnet_id, min_peer_count)
})
} else {
true

View File

@@ -1132,21 +1132,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
) -> bool {
if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) {
// Require peers on all sampling column subnets before sending batches
let sampling_subnets = network.network_globals().sampling_subnets();
network
.network_globals()
.sampling_subnets()
.iter()
.all(|subnet_id| {
let peer_db = network.network_globals().peers.read();
let peer_count = self
.peers
.iter()
.filter(|peer| {
peer_db.is_good_range_sync_custody_subnet_peer(*subnet_id, peer)
})
.count();
peer_count > 0
})
.peers
.read()
.has_good_custody_range_sync_peer(&sampling_subnets, epoch)
} else {
true
}