Maintain peers across all data column subnets (#7915)

Closes:
- #7865
- #7855

Changes extracted from earlier PR #7876

This PR fixes two main things with a few other improvements mentioned below:
- Prevent Lighthouse from repeatedly sending `DataColumnByRoot` requests to an unsynced peer, causing lookup sync to get stuck
- Allows Lighthouse to send discovery requests if there isn't enough **synced** peers in the required sampling subnets - this fixes the stuck sync scenario where there isn't enough usable peers in sampling subnet but no discovery is attempted.


  - Make peer discovery queries if custody subnet peer count drops below the minimum threshold
- Update peer pruning logic to prioritise uniform distribution across all data column subnets and avoid pruning sampling peers if the count is below the target threshold (2)
- Check sync status when making discovery requests, to make sure we don't ignore requests if there isn't enough synced peers in the required sampling subnets
- Optimise some of the `PeerDB` functions checking custody peers
- Only send lookup requests to peers that are synced or advanced
This commit is contained in:
Jimmy Chen
2025-09-04 15:36:20 +10:00
committed by GitHub
parent 76adedff27
commit c2a92f1a8c
6 changed files with 974 additions and 336 deletions

View File

@@ -174,19 +174,6 @@ impl<E: EthSpec> PeerInfo<E> {
self.subnets.iter()
}
/// Returns the number of long lived subnets a peer is subscribed to.
// NOTE: This currently excludes sync committee subnets
pub fn long_lived_subnet_count(&self) -> usize {
if let Some(meta_data) = self.meta_data.as_ref() {
return meta_data.attnets().num_set_bits();
} else if let Some(enr) = self.enr.as_ref()
&& let Ok(attnets) = enr.attestation_bitfield::<E>()
{
return attnets.num_set_bits();
}
0
}
/// Returns an iterator over the long-lived subnets if it has any.
pub fn long_lived_subnets(&self) -> Vec<Subnet> {
let mut long_lived_subnets = Vec::new();
@@ -222,6 +209,13 @@ impl<E: EthSpec> PeerInfo<E> {
}
}
}
long_lived_subnets.extend(
self.custody_subnets
.iter()
.map(|&id| Subnet::DataColumn(id)),
);
long_lived_subnets
}
@@ -240,6 +234,11 @@ impl<E: EthSpec> PeerInfo<E> {
self.custody_subnets.iter()
}
/// Returns the number of custody subnets this peer is assigned to.
pub fn custody_subnet_count(&self) -> usize {
self.custody_subnets.len()
}
/// Returns true if the peer is connected to a long-lived subnet.
pub fn has_long_lived_subnet(&self) -> bool {
// Check the meta_data
@@ -262,6 +261,17 @@ impl<E: EthSpec> PeerInfo<E> {
{
return true;
}
// Check if the peer has custody subnets populated and the peer is subscribed to any of
// its custody subnets
let subscribed_to_any_custody_subnets = self
.custody_subnets
.iter()
.any(|subnet_id| self.subnets.contains(&Subnet::DataColumn(*subnet_id)));
if subscribed_to_any_custody_subnets {
return true;
}
false
}
@@ -318,6 +328,14 @@ impl<E: EthSpec> PeerInfo<E> {
)
}
/// Checks if the peer is synced or advanced.
pub fn is_synced_or_advanced(&self) -> bool {
matches!(
self.sync_status,
SyncStatus::Synced { .. } | SyncStatus::Advanced { .. }
)
}
/// Checks if the status is connected.
pub fn is_dialing(&self) -> bool {
matches!(self.connection_status, PeerConnectionStatus::Dialing { .. })
@@ -645,3 +663,50 @@ impl From<PeerConnectionStatus> for PeerState {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::Subnet;
use types::{DataColumnSubnetId, MainnetEthSpec};
type E = MainnetEthSpec;
fn create_test_peer_info() -> PeerInfo<E> {
PeerInfo::default()
}
#[test]
fn test_has_long_lived_subnet_empty_custody_subnets() {
let peer_info = create_test_peer_info();
// peer has no custody subnets or subscribed to any subnets hence return false
assert!(!peer_info.has_long_lived_subnet());
}
#[test]
fn test_has_long_lived_subnet_empty_subnets_with_custody_subnets() {
let mut peer_info = create_test_peer_info();
peer_info.custody_subnets.insert(DataColumnSubnetId::new(1));
peer_info.custody_subnets.insert(DataColumnSubnetId::new(2));
// Peer has custody subnets but isn't subscribed to any hence return false
assert!(!peer_info.has_long_lived_subnet());
}
#[test]
fn test_has_long_lived_subnet_subscribed_to_custody_subnets() {
let mut peer_info = create_test_peer_info();
peer_info.custody_subnets.insert(DataColumnSubnetId::new(1));
peer_info.custody_subnets.insert(DataColumnSubnetId::new(2));
peer_info.custody_subnets.insert(DataColumnSubnetId::new(3));
peer_info
.subnets
.insert(Subnet::DataColumn(DataColumnSubnetId::new(1)));
peer_info
.subnets
.insert(Subnet::DataColumn(DataColumnSubnetId::new(2)));
// Missing DataColumnSubnetId::new(3) - but peer is subscribed to some custody subnets
// Peer is subscribed to any custody subnets - return true
assert!(peer_info.has_long_lived_subnet());
}
}