Enable reconstruction for nodes custodying more than 50% of columns and instrument tracing (#8052)

Co-Authored-By: Jimmy Chen <jchen.tc@gmail.com>

Co-Authored-By: Jimmy Chen <jimmy@sigmaprime.io>
This commit is contained in:
Jimmy Chen
2025-09-16 18:17:43 +10:00
committed by GitHub
parent 242bdfcf12
commit 3de646c8b3
7 changed files with 76 additions and 72 deletions

View File

@@ -3299,10 +3299,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
let data_availability_checker = self.data_availability_checker.clone();
let current_span = Span::current();
let result = self
.task_executor
.spawn_blocking_handle(
move || data_availability_checker.reconstruct_data_columns(&block_root),
move || {
let _guard = current_span.enter();
data_availability_checker.reconstruct_data_columns(&block_root)
},
"reconstruct_data_columns",
)
.ok_or(BeaconChainError::RuntimeShutdown)?

View File

@@ -547,6 +547,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
}
}
#[instrument(skip_all, level = "debug")]
pub fn reconstruct_data_columns(
&self,
block_root: &Hash256,

View File

@@ -592,9 +592,9 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
/// Check whether data column reconstruction should be attempted.
///
/// Potentially trigger reconstruction if:
/// - Our custody requirement is all columns (supernode), and we haven't got all columns
/// - We have >= 50% of columns, but not all columns
/// Potentially trigger reconstruction if all the following satisfy:
/// - Our custody requirement is more than 50% of total columns,
/// - We haven't received all required columns
/// - Reconstruction hasn't been started for the block
///
/// If reconstruction is required, returns `PendingComponents` which contains the
@@ -609,15 +609,25 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
return ReconstructColumnsDecision::No("block already imported");
};
// If we're sampling all columns, it means we must be custodying all columns.
let Some(epoch) = pending_components
.verified_data_columns
.first()
.map(|c| c.as_data_column().epoch())
else {
return ReconstructColumnsDecision::No("not enough columns");
};
let total_column_count = T::EthSpec::number_of_columns();
let sampling_column_count = self
.custody_context
.num_of_data_columns_to_sample(epoch, &self.spec);
let received_column_count = pending_components.verified_data_columns.len();
if pending_components.reconstruction_started {
return ReconstructColumnsDecision::No("already started");
}
if received_column_count >= total_column_count {
return ReconstructColumnsDecision::No("all columns received");
if received_column_count >= sampling_column_count {
return ReconstructColumnsDecision::No("all sampling columns received");
}
if received_column_count < total_column_count / 2 {
return ReconstructColumnsDecision::No("not enough columns");

View File

@@ -130,7 +130,7 @@ pub struct CustodyContext<E: EthSpec> {
/// and enr values.
validator_custody_count: AtomicU64,
/// Is the node run as a supernode based on current cli parameters.
pub current_is_supernode: bool,
current_is_supernode: bool,
/// The persisted value for `is_supernode` based on the previous run of this node.
///
/// Note: We require this value because if a user restarts the node with a higher cli custody
@@ -307,6 +307,14 @@ impl<E: EthSpec> CustodyContext<E> {
.expect("should compute node sampling size from valid chain spec")
}
/// Returns whether the node should attempt reconstruction at a given epoch.
pub fn should_attempt_reconstruction(&self, epoch: Epoch, spec: &ChainSpec) -> bool {
let min_columns_for_reconstruction = E::number_of_columns() / 2;
// performing reconstruction is not necessary if sampling column count is exactly 50%,
// because the node doesn't need the remaining columns.
self.num_of_data_columns_to_sample(epoch, spec) > min_columns_for_reconstruction
}
/// Returns the ordered list of column indices that should be sampled for data availability checking at the given epoch.
///
/// # Parameters