From 2d223575d614bb163be8d6734f2e9fd150aa32ed Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Wed, 20 Aug 2025 15:08:53 +1000 Subject: [PATCH] Avoid unnecessary database lookups in data column RPC requests (#7897) This PR is an optimisation to avoid unnecessary database lookups when peer requests data columns that the node doesn't custody (advertised via `cgc`). e.g. an extreme but realistic example - a full node only store 4 custody columns by default, but it may receive a range request of 32 slots with all 128 columns, and this would result in 4096 database lookups but the node is only able to get 128 (4 * 32) of them. - Filter data column RPC requests (`DataColumnsByRoot`, `DataColumnsByRange`) to only lookup columns the node custodies - Prevents unnecessary database queries that would always fail for non-custody columns --- beacon_node/beacon_chain/src/beacon_chain.rs | 11 ++ .../beacon_chain/src/validator_custody.rs | 109 ++++++++++++++++++ .../network_beacon_processor/rpc_methods.rs | 18 ++- 3 files changed, 136 insertions(+), 2 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 47d47314f3..524fd1e099 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -7153,6 +7153,17 @@ impl BeaconChain { .custody_context() .sampling_columns_for_epoch(epoch, &self.spec) } + + /// Returns a list of column indices that the node is expected to custody for a given epoch. + /// i.e. the node must have validated and persisted the column samples and should be able to + /// serve them to peers. + /// + /// If epoch is `None`, this function computes the custody columns at head. + pub fn custody_columns_for_epoch(&self, epoch_opt: Option) -> &[ColumnIndex] { + self.data_availability_checker + .custody_context() + .custody_columns_for_epoch(epoch_opt, &self.spec) + } } impl Drop for BeaconChain { diff --git a/beacon_node/beacon_chain/src/validator_custody.rs b/beacon_node/beacon_chain/src/validator_custody.rs index b8853362b8..1c89624f9d 100644 --- a/beacon_node/beacon_chain/src/validator_custody.rs +++ b/beacon_node/beacon_chain/src/validator_custody.rs @@ -323,6 +323,37 @@ impl CustodyContext { .expect("all_custody_columns_ordered should be initialized"); &all_columns_ordered[..num_of_columns_to_sample] } + + /// Returns the ordered list of column indices that the node is assigned to custody + /// (and advertised to peers) at the given epoch. If epoch is `None`, this function + /// computes the custody columns at head. + /// + /// This method differs from [`self::sampling_columns_for_epoch`] which returns all sampling columns. + /// The columns returned by this method are either identical to or a subset of the sampling columns, + /// representing only those columns that this node is responsible for maintaining custody of. + /// + /// # Parameters + /// * `epoch_opt` - Optional epoch to determine custody columns for. + /// + /// # Returns + /// A slice of ordered custody column indices for this epoch based on the node's custody configuration + pub fn custody_columns_for_epoch( + &self, + epoch_opt: Option, + spec: &ChainSpec, + ) -> &[ColumnIndex] { + let custody_group_count = if let Some(epoch) = epoch_opt { + self.custody_group_count_at_epoch(epoch, spec) as usize + } else { + self.custody_group_count_at_head(spec) as usize + }; + + let all_columns_ordered = self + .all_custody_columns_ordered + .get() + .expect("all_custody_columns_ordered should be initialized"); + &all_columns_ordered[..custody_group_count] + } } /// The custody count changed because of a change in the @@ -670,4 +701,82 @@ mod tests { assert_eq!(updated_custody_count_opt, expected_cgc_change); } } + + #[test] + fn custody_columns_for_epoch_no_validators_fullnode() { + let custody_context = CustodyContext::::new(false); + let spec = E::default_spec(); + let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::>(); + + custody_context + .init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, &spec) + .expect("should initialise ordered data columns"); + + assert_eq!( + custody_context.custody_columns_for_epoch(None, &spec).len(), + spec.custody_requirement as usize + ); + } + + #[test] + fn custody_columns_for_epoch_no_validators_supernode() { + let custody_context = CustodyContext::::new(true); + let spec = E::default_spec(); + let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::>(); + + custody_context + .init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, &spec) + .expect("should initialise ordered data columns"); + + assert_eq!( + custody_context.custody_columns_for_epoch(None, &spec).len(), + spec.number_of_custody_groups as usize + ); + } + + #[test] + fn custody_columns_for_epoch_with_validators_should_match_cgc() { + let custody_context = CustodyContext::::new(false); + let spec = E::default_spec(); + let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::>(); + let val_custody_units = 10; + + custody_context + .init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, &spec) + .expect("should initialise ordered data columns"); + + let _ = custody_context.register_validators( + vec![( + 0, + val_custody_units * spec.balance_per_additional_custody_group, + )], + Slot::new(10), + &spec, + ); + + assert_eq!( + custody_context.custody_columns_for_epoch(None, &spec).len(), + val_custody_units as usize + ); + } + + #[test] + fn custody_columns_for_epoch_specific_epoch_uses_epoch_cgc() { + let custody_context = CustodyContext::::new(false); + let spec = E::default_spec(); + let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::>(); + let test_epoch = Epoch::new(5); + + custody_context + .init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, &spec) + .expect("should initialise ordered data columns"); + + let expected_cgc = custody_context.custody_group_count_at_epoch(test_epoch, &spec); + assert_eq!( + custody_context + .custody_columns_for_epoch(Some(test_epoch), &spec) + .len(), + expected_cgc as usize + ); + } } diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs index 7d686d375d..bdfcc1d8a1 100644 --- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs @@ -364,11 +364,19 @@ impl NetworkBeaconProcessor { request: DataColumnsByRootRequest, ) -> Result<(), (RpcErrorResponse, &'static str)> { let mut send_data_column_count = 0; + // Only attempt lookups for columns the node has advertised and is responsible for maintaining custody of. + let available_columns = self.chain.custody_columns_for_epoch(None); for data_column_ids_by_root in request.data_column_ids.as_slice() { + let indices_to_retrieve = data_column_ids_by_root + .columns + .iter() + .copied() + .filter(|c| available_columns.contains(c)) + .collect::>(); match self.chain.get_data_columns_checking_all_caches( data_column_ids_by_root.block_root, - data_column_ids_by_root.columns.iter().as_slice(), + &indices_to_retrieve, ) { Ok(data_columns) => { send_data_column_count += data_columns.len(); @@ -1070,8 +1078,14 @@ impl NetworkBeaconProcessor { self.get_block_roots_for_slot_range(req.start_slot, req.count, "DataColumnsByRange")?; let mut data_columns_sent = 0; + // Only attempt lookups for columns the node has advertised and is responsible for maintaining custody of. + let request_start_epoch = request_start_slot.epoch(T::EthSpec::slots_per_epoch()); + let available_columns = self + .chain + .custody_columns_for_epoch(Some(request_start_epoch)); + for root in block_roots { - for index in &req.columns { + for index in available_columns { match self.chain.get_data_column(&root, index) { Ok(Some(data_column_sidecar)) => { // Due to skip slots, data columns could be out of the range, we ensure they