Implement basic validator custody framework (no backfill) (#7578)

Resolves #6767


  This PR implements a basic version of validator custody.
- It introduces a new `CustodyContext` object which contains info regarding number of validators attached to a node and  the custody count they contribute to the cgc.
- The `CustodyContext` is added in the da_checker and has methods for returning the current cgc and the number of columns to sample at head. Note that the logic for returning the cgc existed previously in the network globals.
- To estimate the number of validators attached, we use the `beacon_committee_subscriptions` endpoint. This might overestimate the number of validators actually publishing attestations from the node in the case of multi BN setups. We could also potentially use the `publish_attestations` endpoint to get a more conservative estimate at a later point.
- Anytime there's a change in the `custody_group_count` due to addition/removal of validators, the custody context should send an event on a broadcast channnel. The only subscriber for the channel exists in the network service which simply subscribes to more subnets. There can be additional subscribers in sync that will start a backfill once the cgc changes.

TODO

- [ ] **NOT REQUIRED:** Currently, the logic only handles an increase in validator count and does not handle a decrease. We should ideally unsubscribe from subnets when the cgc has decreased.
- [ ] **NOT REQUIRED:** Add a service in the `CustodyContext` that emits an event once `MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS ` passes after updating the current cgc. This event should be picked up by a subscriber which updates the enr and metadata.
- [x] Add more tests
This commit is contained in:
Pawan Dhananjay
2025-06-11 11:10:06 -07:00
committed by GitHub
parent 076a1c3fae
commit 5f208bb858
38 changed files with 928 additions and 350 deletions

View File

@@ -8,6 +8,7 @@ use crate::block_verification_types::{
use crate::data_availability_checker::{Availability, AvailabilityCheckError};
use crate::data_column_verification::KzgVerifiedCustodyDataColumn;
use crate::BeaconChainTypes;
use crate::CustodyContext;
use lru::LruCache;
use parking_lot::RwLock;
use std::cmp::Ordering;
@@ -158,6 +159,7 @@ impl<E: EthSpec> PendingComponents<E> {
pub fn make_available<R>(
&mut self,
spec: &Arc<ChainSpec>,
num_expected_columns: u64,
recover: R,
) -> Result<Option<AvailableExecutedBlock<E>>, AvailabilityCheckError>
where
@@ -171,12 +173,11 @@ impl<E: EthSpec> PendingComponents<E> {
};
let num_expected_blobs = block.num_blobs_expected();
let num_expected_columns = num_expected_columns as usize;
let blob_data = if num_expected_blobs == 0 {
Some(AvailableBlockData::NoData)
} else if spec.is_peer_das_enabled_for_epoch(block.epoch()) {
let num_received_columns = self.verified_data_columns.len();
let num_expected_columns = block.custody_columns_count();
match num_received_columns.cmp(&num_expected_columns) {
Ordering::Greater => {
// Should never happen
@@ -254,7 +255,6 @@ impl<E: EthSpec> PendingComponents<E> {
block,
import_data,
payload_verification_outcome,
custody_columns_count: _,
} = recover(block.clone())?;
let available_block = AvailableBlock {
@@ -308,19 +308,21 @@ impl<E: EthSpec> PendingComponents<E> {
})
}
pub fn status_str(&self, block_epoch: Epoch, spec: &ChainSpec) -> String {
pub fn status_str(
&self,
block_epoch: Epoch,
num_expected_columns: Option<u64>,
spec: &ChainSpec,
) -> String {
let block_count = if self.executed_block.is_some() { 1 } else { 0 };
if spec.is_peer_das_enabled_for_epoch(block_epoch) {
let custody_columns_count = if let Some(block) = self.get_cached_block() {
&block.custody_columns_count().to_string()
} else {
"?"
};
format!(
"block {} data_columns {}/{}",
block_count,
self.verified_data_columns.len(),
custody_columns_count,
num_expected_columns
.map(|c| c.to_string())
.unwrap_or("?".into())
)
} else {
let num_expected_blobs = if let Some(block) = self.get_cached_block() {
@@ -346,6 +348,7 @@ pub struct DataAvailabilityCheckerInner<T: BeaconChainTypes> {
/// This cache holds a limited number of states in memory and reconstructs them
/// from disk when necessary. This is necessary until we merge tree-states
state_cache: StateLRUCache<T>,
custody_context: Arc<CustodyContext>,
spec: Arc<ChainSpec>,
}
@@ -362,11 +365,13 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
pub fn new(
capacity: NonZeroUsize,
beacon_store: BeaconStore<T>,
custody_context: Arc<CustodyContext>,
spec: Arc<ChainSpec>,
) -> Result<Self, AvailabilityCheckError> {
Ok(Self {
critical: RwLock::new(LruCache::new(capacity)),
state_cache: StateLRUCache::new(beacon_store, spec.clone()),
custody_context,
spec,
})
}
@@ -470,13 +475,15 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
debug!(
component = "blobs",
?block_root,
status = pending_components.status_str(epoch, &self.spec),
status = pending_components.status_str(epoch, None, &self.spec),
"Component added to data availability checker"
);
if let Some(available_block) = pending_components.make_available(&self.spec, |block| {
self.state_cache.recover_pending_executed_block(block)
})? {
if let Some(available_block) = pending_components.make_available(
&self.spec,
self.custody_context.sampling_size(Some(epoch), &self.spec),
|block| self.state_cache.recover_pending_executed_block(block),
)? {
// We keep the pending components in the availability cache during block import (#5845).
write_lock.put(block_root, pending_components);
drop(write_lock);
@@ -519,16 +526,19 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
// Merge in the data columns.
pending_components.merge_data_columns(kzg_verified_data_columns)?;
let num_expected_columns = self.custody_context.sampling_size(Some(epoch), &self.spec);
debug!(
component = "data_columns",
?block_root,
status = pending_components.status_str(epoch, &self.spec),
status = pending_components.status_str(epoch, Some(num_expected_columns), &self.spec),
"Component added to data availability checker"
);
if let Some(available_block) = pending_components.make_available(&self.spec, |block| {
self.state_cache.recover_pending_executed_block(block)
})? {
if let Some(available_block) =
pending_components.make_available(&self.spec, num_expected_columns, |block| {
self.state_cache.recover_pending_executed_block(block)
})?
{
// We keep the pending components in the availability cache during block import (#5845).
write_lock.put(block_root, pending_components);
drop(write_lock);
@@ -612,17 +622,20 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
// Merge in the block.
pending_components.merge_block(diet_executed_block);
let num_expected_columns = self.custody_context.sampling_size(Some(epoch), &self.spec);
debug!(
component = "block",
?block_root,
status = pending_components.status_str(epoch, &self.spec),
status = pending_components.status_str(epoch, Some(num_expected_columns), &self.spec),
"Component added to data availability checker"
);
// Check if we have all components and entire set is consistent.
if let Some(available_block) = pending_components.make_available(&self.spec, |block| {
self.state_cache.recover_pending_executed_block(block)
})? {
if let Some(available_block) = pending_components.make_available(
&self.spec,
self.custody_context.sampling_size(Some(epoch), &self.spec),
|block| self.state_cache.recover_pending_executed_block(block),
)? {
// We keep the pending components in the availability cache during block import (#5845).
write_lock.put(block_root, pending_components);
drop(write_lock);
@@ -700,7 +713,6 @@ mod test {
use types::{ExecPayload, MinimalEthSpec};
const LOW_VALIDATOR_COUNT: usize = 32;
const DEFAULT_TEST_CUSTODY_COLUMN_COUNT: usize = 8;
fn get_store_with_spec<E: EthSpec>(
db_path: &TempDir,
@@ -861,7 +873,6 @@ mod test {
block,
import_data,
payload_verification_outcome,
custody_columns_count: DEFAULT_TEST_CUSTODY_COLUMN_COUNT,
};
(availability_pending_block, gossip_verified_blobs)
@@ -888,9 +899,15 @@ mod test {
let spec = harness.spec.clone();
let test_store = harness.chain.store.clone();
let capacity_non_zero = new_non_zero_usize(capacity);
let custody_context = Arc::new(CustodyContext::new(false));
let cache = Arc::new(
DataAvailabilityCheckerInner::<T>::new(capacity_non_zero, test_store, spec.clone())
.expect("should create cache"),
DataAvailabilityCheckerInner::<T>::new(
capacity_non_zero,
test_store,
custody_context,
spec.clone(),
)
.expect("should create cache"),
);
(harness, cache, chain_db_path)
}
@@ -1239,8 +1256,6 @@ mod pending_components_tests {
payload_verification_status: PayloadVerificationStatus::Verified,
is_valid_merge_transition_block: false,
},
// Default custody columns count, doesn't matter here
custody_columns_count: 8,
};
(block.into(), blobs, invalid_blobs)
}

View File

@@ -24,7 +24,6 @@ pub struct DietAvailabilityPendingExecutedBlock<E: EthSpec> {
parent_eth1_finalization_data: Eth1FinalizationData,
consensus_context: OnDiskConsensusContext<E>,
payload_verification_outcome: PayloadVerificationOutcome,
custody_columns_count: usize,
}
/// just implementing the same methods as `AvailabilityPendingExecutedBlock`
@@ -54,10 +53,6 @@ impl<E: EthSpec> DietAvailabilityPendingExecutedBlock<E> {
.unwrap_or_default()
}
pub fn custody_columns_count(&self) -> usize {
self.custody_columns_count
}
/// Returns the epoch corresponding to `self.slot()`.
pub fn epoch(&self) -> Epoch {
self.block.slot().epoch(E::slots_per_epoch())
@@ -107,7 +102,6 @@ impl<T: BeaconChainTypes> StateLRUCache<T> {
executed_block.import_data.consensus_context,
),
payload_verification_outcome: executed_block.payload_verification_outcome,
custody_columns_count: executed_block.custody_columns_count,
}
}
@@ -137,7 +131,6 @@ impl<T: BeaconChainTypes> StateLRUCache<T> {
.into_consensus_context(),
},
payload_verification_outcome: diet_executed_block.payload_verification_outcome,
custody_columns_count: diet_executed_block.custody_columns_count,
})
}
@@ -224,7 +217,6 @@ impl<E: EthSpec> From<AvailabilityPendingExecutedBlock<E>>
value.import_data.consensus_context,
),
payload_verification_outcome: value.payload_verification_outcome,
custody_columns_count: value.custody_columns_count,
}
}
}