Implement basic validator custody framework (no backfill) (#7578)

Resolves #6767


  This PR implements a basic version of validator custody.
- It introduces a new `CustodyContext` object which contains info regarding number of validators attached to a node and  the custody count they contribute to the cgc.
- The `CustodyContext` is added in the da_checker and has methods for returning the current cgc and the number of columns to sample at head. Note that the logic for returning the cgc existed previously in the network globals.
- To estimate the number of validators attached, we use the `beacon_committee_subscriptions` endpoint. This might overestimate the number of validators actually publishing attestations from the node in the case of multi BN setups. We could also potentially use the `publish_attestations` endpoint to get a more conservative estimate at a later point.
- Anytime there's a change in the `custody_group_count` due to addition/removal of validators, the custody context should send an event on a broadcast channnel. The only subscriber for the channel exists in the network service which simply subscribes to more subnets. There can be additional subscribers in sync that will start a backfill once the cgc changes.

TODO

- [ ] **NOT REQUIRED:** Currently, the logic only handles an increase in validator count and does not handle a decrease. We should ideally unsubscribe from subnets when the cgc has decreased.
- [ ] **NOT REQUIRED:** Add a service in the `CustodyContext` that emits an event once `MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS ` passes after updating the current cgc. This event should be picked up by a subscriber which updates the enr and metadata.
- [x] Add more tests
This commit is contained in:
Pawan Dhananjay
2025-06-11 11:10:06 -07:00
committed by GitHub
parent 076a1c3fae
commit 5f208bb858
38 changed files with 928 additions and 350 deletions

View File

@@ -780,7 +780,7 @@ pub fn update_sync_metrics<E: EthSpec>(network_globals: &Arc<NetworkGlobals<E>>)
let all_column_subnets =
(0..network_globals.spec.data_column_sidecar_subnet_count).map(DataColumnSubnetId::new);
let custody_column_subnets = network_globals.sampling_subnets.iter();
let custody_column_subnets = network_globals.sampling_subnets();
// Iterate all subnet values to set to zero the empty entries in peers_per_column_subnet
for subnet in all_column_subnets {
@@ -794,7 +794,7 @@ pub fn update_sync_metrics<E: EthSpec>(network_globals: &Arc<NetworkGlobals<E>>)
// Registering this metric is a duplicate for supernodes but helpful for fullnodes. This way
// operators can monitor the health of only the subnets of their interest without complex
// Grafana queries.
for subnet in custody_column_subnets {
for subnet in custody_column_subnets.iter() {
set_gauge_entry(
&PEERS_PER_CUSTODY_COLUMN_SUBNET,
&[&format!("{subnet}")],

View File

@@ -1272,10 +1272,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let verification_result = self
.chain
.clone()
.verify_block_for_gossip(
block.clone(),
self.network_globals.custody_columns_count() as usize,
)
.verify_block_for_gossip(block.clone())
.await;
if verification_result.is_ok() {

View File

@@ -843,7 +843,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
block_root: Hash256,
publish_blobs: bool,
) {
let custody_columns = self.network_globals.sampling_columns.clone();
let custody_columns = self.network_globals.sampling_columns();
let self_cloned = self.clone();
let publish_fn = move |blobs_or_data_column| {
if publish_blobs {
@@ -930,7 +930,12 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
publish_columns: bool,
) -> Option<AvailabilityProcessingStatus> {
// Only supernodes attempt reconstruction
if !self.network_globals.is_supernode() {
if !self
.chain
.data_availability_checker
.custody_context()
.current_is_supernode
{
return None;
}

View File

@@ -285,7 +285,7 @@ impl TestRig {
)
.unwrap()
.into_iter()
.filter(|c| network_globals.sampling_columns.contains(&c.index))
.filter(|c| network_globals.sampling_columns().contains(&c.index))
.collect::<Vec<_>>();
(None, Some(custody_columns))
@@ -371,22 +371,12 @@ impl TestRig {
}
}
pub fn custody_columns_count(&self) -> usize {
self.network_beacon_processor
.network_globals
.custody_columns_count() as usize
}
pub fn enqueue_rpc_block(&self) {
let block_root = self.next_block.canonical_root();
self.network_beacon_processor
.send_rpc_beacon_block(
block_root,
RpcBlock::new_without_blobs(
Some(block_root),
self.next_block.clone(),
self.custody_columns_count(),
),
RpcBlock::new_without_blobs(Some(block_root), self.next_block.clone()),
std::time::Duration::default(),
BlockProcessType::SingleBlock { id: 0 },
)
@@ -398,11 +388,7 @@ impl TestRig {
self.network_beacon_processor
.send_rpc_beacon_block(
block_root,
RpcBlock::new_without_blobs(
Some(block_root),
self.next_block.clone(),
self.custody_columns_count(),
),
RpcBlock::new_without_blobs(Some(block_root), self.next_block.clone()),
std::time::Duration::default(),
BlockProcessType::SingleBlock { id: 1 },
)

View File

@@ -10,6 +10,7 @@ use beacon_processor::{work_reprocessing_queue::ReprocessQueueMessage, BeaconPro
use futures::channel::mpsc::Sender;
use futures::future::OptionFuture;
use futures::prelude::*;
use lighthouse_network::rpc::InboundRequestId;
use lighthouse_network::rpc::RequestType;
use lighthouse_network::service::Network;
@@ -105,6 +106,12 @@ pub enum NetworkMessage<E: EthSpec> {
ConnectTrustedPeer(Enr),
/// Disconnect from a trusted peer and remove it from the `trusted_peers` mapping.
DisconnectTrustedPeer(Enr),
/// Custody group count changed due to a change in validators' weight.
/// Subscribe to new subnets and update ENR metadata.
CustodyCountChanged {
new_custody_group_count: u64,
sampling_count: u64,
},
}
/// Messages triggered by validators that may trigger a subscription to a subnet.
@@ -270,7 +277,15 @@ impl<T: BeaconChainTypes> NetworkService<T> {
};
// launch libp2p service
let (mut libp2p, network_globals) = Network::new(executor.clone(), service_context).await?;
let (mut libp2p, network_globals) = Network::new(
executor.clone(),
service_context,
beacon_chain
.data_availability_checker
.custody_context()
.custody_group_count_at_head(&beacon_chain.spec),
)
.await?;
// Repopulate the DHT with stored ENR's if discovery is not disabled.
if !config.disable_discovery {
@@ -745,6 +760,15 @@ impl<T: BeaconChainTypes> NetworkService<T> {
);
}
}
NetworkMessage::CustodyCountChanged {
new_custody_group_count,
sampling_count,
} => {
// subscribe to `sampling_count` subnets
self.libp2p
.subscribe_new_data_column_subnets(sampling_count);
self.libp2p.update_enr_cgc(new_custody_group_count);
}
}
}

View File

@@ -257,17 +257,11 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
));
}
RpcBlock::new_with_custody_columns(
Some(block_root),
block,
custody_columns,
expects_custody_columns.len(),
spec,
)
.map_err(|e| format!("{e:?}"))?
RpcBlock::new_with_custody_columns(Some(block_root), block, custody_columns, spec)
.map_err(|e| format!("{e:?}"))?
} else {
// Block has no data, expects zero columns
RpcBlock::new_without_blobs(Some(block_root), block, 0)
RpcBlock::new_without_blobs(Some(block_root), block)
});
}

View File

@@ -476,7 +476,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
// Attempt to find all required custody peers before sending any request or creating an ID
let columns_by_range_peers_to_request =
if matches!(batch_type, ByRangeRequestType::BlocksAndColumns) {
let column_indexes = self.network_globals().sampling_columns.clone();
let column_indexes = self.network_globals().sampling_columns();
Some(self.select_columns_by_range_peers_to_request(
&column_indexes,
peers,
@@ -534,7 +534,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
(
data_column_requests,
self.network_globals()
.sampling_columns
.sampling_columns()
.clone()
.iter()
.copied()
@@ -928,8 +928,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
// Include only the blob indexes not yet imported (received through gossip)
let custody_indexes_to_fetch = self
.network_globals()
.sampling_columns
.clone()
.sampling_columns()
.into_iter()
.filter(|index| !custody_indexes_imported.contains(index))
.collect::<Vec<_>>();
@@ -1487,11 +1486,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
.beacon_processor_if_enabled()
.ok_or(SendErrorProcessor::ProcessorNotAvailable)?;
let block = RpcBlock::new_without_blobs(
Some(block_root),
block,
self.network_globals().custody_columns_count() as usize,
);
let block = RpcBlock::new_without_blobs(Some(block_root), block);
debug!(block = ?block_root, id, "Sending block for processing");
// Lookup sync event safety: If `beacon_processor.send_rpc_beacon_block` returns Ok() sync

View File

@@ -1032,7 +1032,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// Require peers on all sampling column subnets before sending batches
let peers_on_all_custody_subnets = network
.network_globals()
.sampling_subnets
.sampling_subnets()
.iter()
.all(|subnet_id| {
let peer_count = network

View File

@@ -1205,12 +1205,8 @@ impl TestRig {
payload_verification_status: PayloadVerificationStatus::Verified,
is_valid_merge_transition_block: false,
};
let executed_block = AvailabilityPendingExecutedBlock::new(
block,
import_data,
payload_verification_outcome,
self.network_globals.custody_columns_count() as usize,
);
let executed_block =
AvailabilityPendingExecutedBlock::new(block, import_data, payload_verification_outcome);
match self
.harness
.chain

View File

@@ -449,18 +449,10 @@ fn build_rpc_block(
RpcBlock::new(None, block, Some(blobs.clone())).unwrap()
}
Some(DataSidecars::DataColumns(columns)) => {
RpcBlock::new_with_custody_columns(
None,
block,
columns.clone(),
// TODO(das): Assumes CGC = max value. Change if we want to do more complex tests
columns.len(),
spec,
)
.unwrap()
RpcBlock::new_with_custody_columns(None, block, columns.clone(), spec).unwrap()
}
// Block has no data, expects zero columns
None => RpcBlock::new_without_blobs(None, block, 0),
None => RpcBlock::new_without_blobs(None, block),
}
}