Implement Subnet Sampling for PeerDAS (#6410)

* Add `SAMPLES_PER_SLOT` config.

* Rename `sampling` module to `peer_sampling`

* Implement subnet sampling.

* Update lookup test.

* Merge branch 'unstable' into subnet-sampling

* Merge branch 'unstable' into subnet-sampling

# Conflicts:
#	beacon_node/beacon_chain/src/data_availability_checker.rs
#	beacon_node/http_api/src/publish_blocks.rs
#	beacon_node/lighthouse_network/src/types/globals.rs
#	beacon_node/network/src/sync/manager.rs

* Merge branch 'unstable' into subnet-sampling
This commit is contained in:
Jimmy Chen
2024-10-04 10:27:30 +10:00
committed by GitHub
parent a4a673b780
commit f3a5e256da
20 changed files with 122 additions and 80 deletions

View File

@@ -813,7 +813,7 @@ impl<T: BeaconChainTypes> NetworkService<T> {
}
}
} else {
for column_subnet in &self.network_globals.custody_subnets {
for column_subnet in &self.network_globals.sampling_subnets {
for fork_digest in self.required_gossip_fork_digests() {
let gossip_kind = Subnet::DataColumn(*column_subnet).into();
let topic =

View File

@@ -1,6 +1,6 @@
use crate::network_beacon_processor::NetworkBeaconProcessor;
use crate::sync::manager::{BlockProcessType, SyncManager};
use crate::sync::sampling::SamplingConfig;
use crate::sync::peer_sampling::SamplingConfig;
use crate::sync::{SamplingId, SyncMessage};
use crate::NetworkMessage;
use std::sync::Arc;
@@ -2037,9 +2037,10 @@ fn custody_lookup_happy_path() {
// Should not request blobs
let id = r.expect_block_lookup_request(block.canonical_root());
r.complete_valid_block_request(id, block.into(), true);
let custody_column_count = spec.custody_requirement * spec.data_columns_per_subnet() as u64;
// for each slot we download `samples_per_slot` columns
let sample_column_count = spec.samples_per_slot * spec.data_columns_per_subnet() as u64;
let custody_ids =
r.expect_only_data_columns_by_root_requests(block_root, custody_column_count as usize);
r.expect_only_data_columns_by_root_requests(block_root, sample_column_count as usize);
r.complete_valid_custody_request(custody_ids, data_columns, false);
r.expect_no_active_lookups();
}

View File

@@ -38,9 +38,9 @@ use super::block_lookups::BlockLookups;
use super::network_context::{
BlockOrBlob, CustodyByRootResult, RangeRequestId, RpcEvent, SyncNetworkContext,
};
use super::peer_sampling::{Sampling, SamplingConfig, SamplingResult};
use super::peer_sync_info::{remote_sync_type, PeerSyncType};
use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
use super::sampling::{Sampling, SamplingConfig, SamplingResult};
use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor};
use crate::service::NetworkMessage;
use crate::status::ToStatusMessage;

View File

@@ -6,9 +6,9 @@ mod block_lookups;
mod block_sidecar_coupling;
pub mod manager;
mod network_context;
mod peer_sampling;
mod peer_sync_info;
mod range_sync;
mod sampling;
pub use lighthouse_network::service::api_types::SamplingId;
pub use manager::{BatchProcessResult, SyncMessage};

View File

@@ -418,13 +418,13 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
false
};
let (expects_custody_columns, num_of_custody_column_req) =
let (expects_columns, num_of_column_req) =
if matches!(batch_type, ByRangeRequestType::BlocksAndColumns) {
let custody_indexes = self.network_globals().custody_columns.clone();
let column_indexes = self.network_globals().sampling_columns.clone();
let mut num_of_custody_column_req = 0;
for (peer_id, columns_by_range_request) in
self.make_columns_by_range_requests(request, &custody_indexes)?
self.make_columns_by_range_requests(request, &column_indexes)?
{
requested_peers.push(peer_id);
@@ -448,15 +448,15 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
num_of_custody_column_req += 1;
}
(Some(custody_indexes), Some(num_of_custody_column_req))
(Some(column_indexes), Some(num_of_custody_column_req))
} else {
(None, None)
};
let info = RangeBlockComponentsRequest::new(
expected_blobs,
expects_custody_columns,
num_of_custody_column_req,
expects_columns,
num_of_column_req,
requested_peers,
);
self.range_block_components_requests
@@ -668,7 +668,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
let imported_blob_indexes = self
.chain
.data_availability_checker
.imported_blob_indexes(&block_root)
.cached_blob_indexes(&block_root)
.unwrap_or_default();
// Include only the blob indexes not yet imported (received through gossip)
let indices = (0..expected_blobs as u64)
@@ -786,13 +786,13 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
let custody_indexes_imported = self
.chain
.data_availability_checker
.imported_custody_column_indexes(&block_root)
.cached_data_column_indexes(&block_root)
.unwrap_or_default();
// Include only the blob indexes not yet imported (received through gossip)
let custody_indexes_to_fetch = self
.network_globals()
.custody_columns
.sampling_columns
.clone()
.into_iter()
.filter(|index| !custody_indexes_imported.contains(index))

View File

@@ -444,9 +444,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
self.request_batches(network)?;
}
}
} else if !self.good_peers_on_custody_subnets(self.processing_target, network) {
} else if !self.good_peers_on_sampling_subnets(self.processing_target, network) {
// This is to handle the case where no batch was sent for the current processing
// target when there is no custody peers available. This is a valid state and should not
// target when there is no sampling peers available. This is a valid state and should not
// return an error.
return Ok(KeepChain);
} else {
@@ -1075,10 +1075,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// check if we have the batch for our optimistic start. If not, request it first.
// We wait for this batch before requesting any other batches.
if let Some(epoch) = self.optimistic_start {
if !self.good_peers_on_custody_subnets(epoch, network) {
if !self.good_peers_on_sampling_subnets(epoch, network) {
debug!(
self.log,
"Waiting for peers to be available on custody column subnets"
"Waiting for peers to be available on sampling column subnets"
);
return Ok(KeepChain);
}
@@ -1107,14 +1107,18 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
Ok(KeepChain)
}
/// Checks all custody column subnets for peers. Returns `true` if there is at least one peer in
/// every custody column subnet.
fn good_peers_on_custody_subnets(&self, epoch: Epoch, network: &SyncNetworkContext<T>) -> bool {
/// Checks all sampling column subnets for peers. Returns `true` if there is at least one peer in
/// every sampling column subnet.
fn good_peers_on_sampling_subnets(
&self,
epoch: Epoch,
network: &SyncNetworkContext<T>,
) -> bool {
if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) {
// Require peers on all custody column subnets before sending batches
// Require peers on all sampling column subnets before sending batches
let peers_on_all_custody_subnets = network
.network_globals()
.custody_subnets
.sampling_subnets
.iter()
.all(|subnet_id| {
let peer_count = network
@@ -1167,11 +1171,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
return None;
}
// don't send batch requests until we have peers on custody subnets
// don't send batch requests until we have peers on sampling subnets
// TODO(das): this is a workaround to avoid sending out excessive block requests because
// block and data column requests are currently coupled. This can be removed once we find a
// way to decouple the requests and do retries individually, see issue #6258.
if !self.good_peers_on_custody_subnets(self.to_be_downloaded, network) {
if !self.good_peers_on_sampling_subnets(self.to_be_downloaded, network) {
debug!(
self.log,
"Waiting for peers to be available on custody column subnets"