Implement Subnet Sampling for PeerDAS (#6410)

* Add `SAMPLES_PER_SLOT` config. * Rename `sampling` module to `peer_sampling` * Implement subnet sampling. * Update lookup test. * Merge branch 'unstable' into subnet-sampling * Merge branch 'unstable' into subnet-sampling # Conflicts: # beacon_node/beacon_chain/src/data_availability_checker.rs # beacon_node/http_api/src/publish_blocks.rs # beacon_node/lighthouse_network/src/types/globals.rs # beacon_node/network/src/sync/manager.rs * Merge branch 'unstable' into subnet-sampling
2026-06-15 17:58:23 +00:00 · 2024-10-04 10:27:30 +10:00
parent a4a673b780
commit f3a5e256da
20 changed files with 122 additions and 80 deletions
--- a/beacon_node/network/src/service.rs
+++ b/beacon_node/network/src/service.rs
@@ -813,7 +813,7 @@ impl<T: BeaconChainTypes> NetworkService<T> {
                }
            }
        } else {
-            for column_subnet in &self.network_globals.custody_subnets {
+            for column_subnet in &self.network_globals.sampling_subnets {
                for fork_digest in self.required_gossip_fork_digests() {
                    let gossip_kind = Subnet::DataColumn(*column_subnet).into();
                    let topic =
--- a/beacon_node/network/src/sync/block_lookups/tests.rs
+++ b/beacon_node/network/src/sync/block_lookups/tests.rs
@@ -1,6 +1,6 @@
 use crate::network_beacon_processor::NetworkBeaconProcessor;
 use crate::sync::manager::{BlockProcessType, SyncManager};
-use crate::sync::sampling::SamplingConfig;
+use crate::sync::peer_sampling::SamplingConfig;
 use crate::sync::{SamplingId, SyncMessage};
 use crate::NetworkMessage;
 use std::sync::Arc;
@@ -2037,9 +2037,10 @@ fn custody_lookup_happy_path() {
    // Should not request blobs
    let id = r.expect_block_lookup_request(block.canonical_root());
    r.complete_valid_block_request(id, block.into(), true);
-    let custody_column_count = spec.custody_requirement * spec.data_columns_per_subnet() as u64;
+    // for each slot we download `samples_per_slot` columns
+    let sample_column_count = spec.samples_per_slot * spec.data_columns_per_subnet() as u64;
    let custody_ids =
-        r.expect_only_data_columns_by_root_requests(block_root, custody_column_count as usize);
+        r.expect_only_data_columns_by_root_requests(block_root, sample_column_count as usize);
    r.complete_valid_custody_request(custody_ids, data_columns, false);
    r.expect_no_active_lookups();
 }
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -38,9 +38,9 @@ use super::block_lookups::BlockLookups;
 use super::network_context::{
    BlockOrBlob, CustodyByRootResult, RangeRequestId, RpcEvent, SyncNetworkContext,
 };
+use super::peer_sampling::{Sampling, SamplingConfig, SamplingResult};
 use super::peer_sync_info::{remote_sync_type, PeerSyncType};
 use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
-use super::sampling::{Sampling, SamplingConfig, SamplingResult};
 use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor};
 use crate::service::NetworkMessage;
 use crate::status::ToStatusMessage;
--- a/beacon_node/network/src/sync/mod.rs
+++ b/beacon_node/network/src/sync/mod.rs
@@ -6,9 +6,9 @@ mod block_lookups;
 mod block_sidecar_coupling;
 pub mod manager;
 mod network_context;
+mod peer_sampling;
 mod peer_sync_info;
 mod range_sync;
-mod sampling;

 pub use lighthouse_network::service::api_types::SamplingId;
 pub use manager::{BatchProcessResult, SyncMessage};
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -418,13 +418,13 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
            false
        };

-        let (expects_custody_columns, num_of_custody_column_req) =
+        let (expects_columns, num_of_column_req) =
            if matches!(batch_type, ByRangeRequestType::BlocksAndColumns) {
-                let custody_indexes = self.network_globals().custody_columns.clone();
+                let column_indexes = self.network_globals().sampling_columns.clone();
                let mut num_of_custody_column_req = 0;

                for (peer_id, columns_by_range_request) in
-                    self.make_columns_by_range_requests(request, &custody_indexes)?
+                    self.make_columns_by_range_requests(request, &column_indexes)?
                {
                    requested_peers.push(peer_id);

@@ -448,15 +448,15 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                    num_of_custody_column_req += 1;
                }

-                (Some(custody_indexes), Some(num_of_custody_column_req))
+                (Some(column_indexes), Some(num_of_custody_column_req))
            } else {
                (None, None)
            };

        let info = RangeBlockComponentsRequest::new(
            expected_blobs,
-            expects_custody_columns,
-            num_of_custody_column_req,
+            expects_columns,
+            num_of_column_req,
            requested_peers,
        );
        self.range_block_components_requests
@@ -668,7 +668,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        let imported_blob_indexes = self
            .chain
            .data_availability_checker
-            .imported_blob_indexes(&block_root)
+            .cached_blob_indexes(&block_root)
            .unwrap_or_default();
        // Include only the blob indexes not yet imported (received through gossip)
        let indices = (0..expected_blobs as u64)
@@ -786,13 +786,13 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
        let custody_indexes_imported = self
            .chain
            .data_availability_checker
-            .imported_custody_column_indexes(&block_root)
+            .cached_data_column_indexes(&block_root)
            .unwrap_or_default();

        // Include only the blob indexes not yet imported (received through gossip)
        let custody_indexes_to_fetch = self
            .network_globals()
-            .custody_columns
+            .sampling_columns
            .clone()
            .into_iter()
            .filter(|index| !custody_indexes_imported.contains(index))
--- a/beacon_node/network/src/sync/peer_sampling.rs
+++ b/beacon_node/network/src/sync/peer_sampling.rs
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -444,9 +444,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                    self.request_batches(network)?;
                }
            }
-        } else if !self.good_peers_on_custody_subnets(self.processing_target, network) {
+        } else if !self.good_peers_on_sampling_subnets(self.processing_target, network) {
            // This is to handle the case where no batch was sent for the current processing
-            // target when there is no custody peers available. This is a valid state and should not
+            // target when there is no sampling peers available. This is a valid state and should not
            // return an error.
            return Ok(KeepChain);
        } else {
@@ -1075,10 +1075,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        // check if we have the batch for our optimistic start. If not, request it first.
        // We wait for this batch before requesting any other batches.
        if let Some(epoch) = self.optimistic_start {
-            if !self.good_peers_on_custody_subnets(epoch, network) {
+            if !self.good_peers_on_sampling_subnets(epoch, network) {
                debug!(
                    self.log,
-                    "Waiting for peers to be available on custody column subnets"
+                    "Waiting for peers to be available on sampling column subnets"
                );
                return Ok(KeepChain);
            }
@@ -1107,14 +1107,18 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
        Ok(KeepChain)
    }

-    /// Checks all custody column subnets for peers. Returns `true` if there is at least one peer in
-    /// every custody column subnet.
-    fn good_peers_on_custody_subnets(&self, epoch: Epoch, network: &SyncNetworkContext<T>) -> bool {
+    /// Checks all sampling column subnets for peers. Returns `true` if there is at least one peer in
+    /// every sampling column subnet.
+    fn good_peers_on_sampling_subnets(
+        &self,
+        epoch: Epoch,
+        network: &SyncNetworkContext<T>,
+    ) -> bool {
        if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) {
-            // Require peers on all custody column subnets before sending batches
+            // Require peers on all sampling column subnets before sending batches
            let peers_on_all_custody_subnets = network
                .network_globals()
-                .custody_subnets
+                .sampling_subnets
                .iter()
                .all(|subnet_id| {
                    let peer_count = network
@@ -1167,11 +1171,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
            return None;
        }

-        // don't send batch requests until we have peers on custody subnets
+        // don't send batch requests until we have peers on sampling subnets
        // TODO(das): this is a workaround to avoid sending out excessive block requests because
        // block and data column requests are currently coupled. This can be removed once we find a
        // way to decouple the requests and do retries individually, see issue #6258.
-        if !self.good_peers_on_custody_subnets(self.to_be_downloaded, network) {
+        if !self.good_peers_on_sampling_subnets(self.to_be_downloaded, network) {
            debug!(
                self.log,
                "Waiting for peers to be available on custody column subnets"