Implement PeerDAS subnet decoupling (aka custody groups) (#6736)

* Implement PeerDAS subnet decoupling (aka custody groups).

* Merge branch 'unstable' into decouple-subnets

* Refactor feature testing for spec tests (#6737)

Squashed commit of the following:

commit 898d05ee17
Merge: ffbd25e2b 7e0cddef3
Author: Jimmy Chen <jchen.tc@gmail.com>
Date:   Tue Dec 24 14:41:19 2024 +1100

    Merge branch 'unstable' into refactor-ef-tests-features

commit ffbd25e2be
Author: Jimmy Chen <jchen.tc@gmail.com>
Date:   Tue Dec 24 14:40:38 2024 +1100

    Fix `SszStatic` tests for PeerDAS: exclude eip7594 test vectors when testing Electra types.

commit aa593cf35c
Author: Jimmy Chen <jchen.tc@gmail.com>
Date:   Fri Dec 20 12:08:54 2024 +1100

    Refactor spec testing for features and simplify usage.

* Fix build.

* Add input validation and improve arithmetic handling when calculating custody groups.

* Address review comments re code style consistency.

* Merge branch 'unstable' into decouple-subnets

# Conflicts:
#	beacon_node/beacon_chain/src/kzg_utils.rs
#	beacon_node/beacon_chain/src/observed_data_sidecars.rs
#	beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs
#	common/eth2_network_config/built_in_network_configs/chiado/config.yaml
#	common/eth2_network_config/built_in_network_configs/gnosis/config.yaml
#	common/eth2_network_config/built_in_network_configs/holesky/config.yaml
#	common/eth2_network_config/built_in_network_configs/mainnet/config.yaml
#	common/eth2_network_config/built_in_network_configs/sepolia/config.yaml
#	consensus/types/src/chain_spec.rs

* Update consensus/types/src/chain_spec.rs

Co-authored-by: Lion - dapplion <35266934+dapplion@users.noreply.github.com>

* Merge remote-tracking branch 'origin/unstable' into decouple-subnets

* Update error handling.

* Address review comment.

* Merge remote-tracking branch 'origin/unstable' into decouple-subnets

# Conflicts:
#	consensus/types/src/chain_spec.rs

* Update PeerDAS spec tests to `1.5.0-beta.0` and fix failing unit tests.

* Merge remote-tracking branch 'origin/unstable' into decouple-subnets

# Conflicts:
#	beacon_node/lighthouse_network/src/peer_manager/mod.rs
This commit is contained in:
Jimmy Chen
2025-01-15 18:40:26 +11:00
committed by GitHub
parent dd7591f712
commit e98209d118
39 changed files with 552 additions and 430 deletions

View File

@@ -34,6 +34,9 @@ pub use peerdb::sync_status::{SyncInfo, SyncStatus};
use std::collections::{hash_map::Entry, HashMap, HashSet};
use std::net::IpAddr;
use strum::IntoEnumIterator;
use types::data_column_custody_group::{
compute_subnets_from_custody_group, get_custody_groups, CustodyIndex,
};
pub mod config;
mod network_behaviour;
@@ -101,6 +104,8 @@ pub struct PeerManager<E: EthSpec> {
/// discovery queries for subnet peers if we disconnect from existing sync
/// committee subnet peers.
sync_committee_subnets: HashMap<SyncSubnetId, Instant>,
/// A mapping of all custody groups to column subnets to avoid re-computation.
subnets_by_custody_group: HashMap<u64, Vec<DataColumnSubnetId>>,
/// The heartbeat interval to perform routine maintenance.
heartbeat: tokio::time::Interval,
/// Keeps track of whether the discovery service is enabled or not.
@@ -160,6 +165,21 @@ impl<E: EthSpec> PeerManager<E> {
// Set up the peer manager heartbeat interval
let heartbeat = tokio::time::interval(tokio::time::Duration::from_secs(HEARTBEAT_INTERVAL));
// Compute subnets for all custody groups
let subnets_by_custody_group = if network_globals.spec.is_peer_das_scheduled() {
(0..network_globals.spec.number_of_custody_groups)
.map(|custody_index| {
let subnets =
compute_subnets_from_custody_group(custody_index, &network_globals.spec)
.expect("Should compute subnets for all custody groups")
.collect();
(custody_index, subnets)
})
.collect::<HashMap<_, Vec<DataColumnSubnetId>>>()
} else {
HashMap::new()
};
Ok(PeerManager {
network_globals,
events: SmallVec::new(),
@@ -170,6 +190,7 @@ impl<E: EthSpec> PeerManager<E> {
target_peers: target_peer_count,
temporary_banned_peers: LRUTimeCache::new(PEER_RECONNECTION_TIMEOUT),
sync_committee_subnets: Default::default(),
subnets_by_custody_group,
heartbeat,
discovery_enabled,
metrics_enabled,
@@ -711,22 +732,39 @@ impl<E: EthSpec> PeerManager<E> {
"peer_id" => %peer_id, "new_seq_no" => meta_data.seq_number());
}
let custody_subnet_count_opt = meta_data.custody_subnet_count().copied().ok();
let custody_group_count_opt = meta_data.custody_group_count().copied().ok();
peer_info.set_meta_data(meta_data);
if self.network_globals.spec.is_peer_das_scheduled() {
// Gracefully ignore metadata/v2 peers. Potentially downscore after PeerDAS to
// prioritize PeerDAS peers.
if let Some(custody_subnet_count) = custody_subnet_count_opt {
match self.compute_peer_custody_subnets(peer_id, custody_subnet_count) {
Ok(custody_subnets) => {
if let Some(custody_group_count) = custody_group_count_opt {
match self.compute_peer_custody_groups(peer_id, custody_group_count) {
Ok(custody_groups) => {
let custody_subnets = custody_groups
.into_iter()
.flat_map(|custody_index| {
self.subnets_by_custody_group
.get(&custody_index)
.cloned()
.unwrap_or_else(|| {
warn!(
self.log,
"Custody group not found in subnet mapping";
"custody_index" => custody_index,
"peer_id" => %peer_id
);
vec![]
})
})
.collect();
peer_info.set_custody_subnets(custody_subnets);
}
Err(err) => {
debug!(self.log, "Unable to compute peer custody subnets from metadata";
debug!(self.log, "Unable to compute peer custody groups from metadata";
"info" => "Sending goodbye to peer",
"peer_id" => %peer_id,
"custody_subnet_count" => custody_subnet_count,
"custody_group_count" => custody_group_count,
"error" => ?err,
);
invalid_meta_data = true;
@@ -1312,7 +1350,7 @@ impl<E: EthSpec> PeerManager<E> {
let mut inbound_ipv4_peers_connected: usize = 0;
let mut inbound_ipv6_peers_connected: usize = 0;
let mut peers_connected_multi: HashMap<(&str, &str), i32> = HashMap::new();
let mut peers_per_custody_subnet_count: HashMap<u64, i64> = HashMap::new();
let mut peers_per_custody_group_count: HashMap<u64, i64> = HashMap::new();
for (_, peer_info) in self.network_globals.peers.read().connected_peers() {
peers_connected += 1;
@@ -1345,8 +1383,8 @@ impl<E: EthSpec> PeerManager<E> {
.or_default() += 1;
if let Some(MetaData::V3(meta_data)) = peer_info.meta_data() {
*peers_per_custody_subnet_count
.entry(meta_data.custody_subnet_count)
*peers_per_custody_group_count
.entry(meta_data.custody_group_count)
.or_default() += 1;
}
// Check if incoming peer is ipv4
@@ -1377,11 +1415,11 @@ impl<E: EthSpec> PeerManager<E> {
// PEERS_CONNECTED
metrics::set_gauge(&metrics::PEERS_CONNECTED, peers_connected);
// CUSTODY_SUBNET_COUNT
for (custody_subnet_count, peer_count) in peers_per_custody_subnet_count.into_iter() {
// CUSTODY_GROUP_COUNT
for (custody_group_count, peer_count) in peers_per_custody_group_count.into_iter() {
metrics::set_gauge_vec(
&metrics::PEERS_PER_CUSTODY_SUBNET_COUNT,
&[&custody_subnet_count.to_string()],
&metrics::PEERS_PER_CUSTODY_GROUP_COUNT,
&[&custody_group_count.to_string()],
peer_count,
)
}
@@ -1410,43 +1448,27 @@ impl<E: EthSpec> PeerManager<E> {
}
}
fn compute_peer_custody_subnets(
fn compute_peer_custody_groups(
&self,
peer_id: &PeerId,
custody_subnet_count: u64,
) -> Result<HashSet<DataColumnSubnetId>, String> {
custody_group_count: u64,
) -> Result<HashSet<CustodyIndex>, String> {
// If we don't have a node id, we cannot compute the custody duties anyway
let node_id = peer_id_to_node_id(peer_id)?;
let spec = &self.network_globals.spec;
if !(spec.custody_requirement..=spec.data_column_sidecar_subnet_count)
.contains(&custody_subnet_count)
if !(spec.custody_requirement..=spec.number_of_custody_groups)
.contains(&custody_group_count)
{
return Err("Invalid custody subnet count in metadata: out of range".to_string());
return Err("Invalid custody group count in metadata: out of range".to_string());
}
let custody_subnets = DataColumnSubnetId::compute_custody_subnets::<E>(
node_id.raw(),
custody_subnet_count,
spec,
)
.map(|subnets| subnets.collect())
.unwrap_or_else(|e| {
// This is an unreachable scenario unless there's a bug, as we've validated the csc
// just above.
error!(
self.log,
"Computing peer custody subnets failed unexpectedly";
"info" => "Falling back to default custody requirement subnets",
"peer_id" => %peer_id,
"custody_subnet_count" => custody_subnet_count,
"error" => ?e
);
DataColumnSubnetId::compute_custody_requirement_subnets::<E>(node_id.raw(), spec)
.collect()
});
Ok(custody_subnets)
get_custody_groups(node_id.raw(), custody_group_count, spec).map_err(|e| {
format!(
"Error computing peer custody groups for node {} with cgc={}: {:?}",
node_id, custody_group_count, e
)
})
}
}