mirror of
https://github.com/sigp/lighthouse.git
synced 2026-05-08 17:26:04 +00:00
Refactor data column reconstruction and avoid blocking processing (#6403)
* Move reconstruction logic out of `overflow_lru_cache` to simplify the code and avoids having to pass `DataColumnsToPublish` around and blocking other processing. * Publish reconstructed cells before recomputing head. Remove duplicate functions. * Merge branch 'unstable' into non-blocking-reconstruction * Merge branch 'unstable' into non-blocking-reconstruction # Conflicts: # beacon_node/beacon_chain/src/beacon_chain.rs # beacon_node/beacon_chain/src/data_availability_checker.rs # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs # beacon_node/network/src/network_beacon_processor/sync_methods.rs * Spawn a blocking task for reconstruction. * Merge branch 'unstable' into non-blocking-reconstruction # Conflicts: # beacon_node/network/src/network_beacon_processor/mod.rs * Fix fmt * Merge branch 'unstable' into non-blocking-reconstruction # Conflicts: # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs * Fix race condition by making check and mutation atomic as suggested by Lion. Also added error handling to reconstruction failure. * Add reconstruction reason metric and more debug logging to da checker. * Add comment and logging. * Rename `NotRequired` to `NotStarted`. * Remove extra character added.
This commit is contained in:
@@ -22,7 +22,7 @@ pub use crate::canonical_head::CanonicalHead;
|
||||
use crate::chain_config::ChainConfig;
|
||||
use crate::data_availability_checker::{
|
||||
Availability, AvailabilityCheckError, AvailableBlock, DataAvailabilityChecker,
|
||||
DataColumnsToPublish,
|
||||
DataColumnReconstructionResult,
|
||||
};
|
||||
use crate::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn};
|
||||
use crate::early_attester_cache::EarlyAttesterCache;
|
||||
@@ -3015,13 +3015,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
self: &Arc<Self>,
|
||||
data_columns: Vec<GossipVerifiedDataColumn<T>>,
|
||||
publish_fn: impl FnOnce() -> Result<(), BlockError>,
|
||||
) -> Result<
|
||||
(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnsToPublish<T::EthSpec>,
|
||||
),
|
||||
BlockError,
|
||||
> {
|
||||
) -> Result<AvailabilityProcessingStatus, BlockError> {
|
||||
let Ok((slot, block_root)) = data_columns
|
||||
.iter()
|
||||
.map(|c| (c.slot(), c.block_root()))
|
||||
@@ -3051,7 +3045,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
publish_fn,
|
||||
)
|
||||
.await;
|
||||
self.remove_notified_custody_columns(&block_root, r)
|
||||
self.remove_notified(&block_root, r)
|
||||
}
|
||||
|
||||
/// Cache the blobs in the processing cache, process it, then evict it from the cache if it was
|
||||
@@ -3110,13 +3104,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
pub async fn process_rpc_custody_columns(
|
||||
self: &Arc<Self>,
|
||||
custody_columns: DataColumnSidecarList<T::EthSpec>,
|
||||
) -> Result<
|
||||
(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnsToPublish<T::EthSpec>,
|
||||
),
|
||||
BlockError,
|
||||
> {
|
||||
) -> Result<AvailabilityProcessingStatus, BlockError> {
|
||||
let Ok((slot, block_root)) = custody_columns
|
||||
.iter()
|
||||
.map(|c| (c.slot(), c.block_root()))
|
||||
@@ -3154,7 +3142,67 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
let r = self
|
||||
.check_rpc_custody_columns_availability_and_import(slot, block_root, custody_columns)
|
||||
.await;
|
||||
self.remove_notified_custody_columns(&block_root, r)
|
||||
self.remove_notified(&block_root, r)
|
||||
}
|
||||
|
||||
pub async fn reconstruct_data_columns(
|
||||
self: &Arc<Self>,
|
||||
block_root: Hash256,
|
||||
) -> Result<
|
||||
Option<(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnSidecarList<T::EthSpec>,
|
||||
)>,
|
||||
BlockError,
|
||||
> {
|
||||
// As of now we only reconstruct data columns on supernodes, so if the block is already
|
||||
// available on a supernode, there's no need to reconstruct as the node must already have
|
||||
// all columns.
|
||||
if self
|
||||
.canonical_head
|
||||
.fork_choice_read_lock()
|
||||
.contains_block(&block_root)
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let data_availability_checker = self.data_availability_checker.clone();
|
||||
|
||||
let result = self
|
||||
.task_executor
|
||||
.spawn_blocking_handle(
|
||||
move || data_availability_checker.reconstruct_data_columns(&block_root),
|
||||
"reconstruct_data_columns",
|
||||
)
|
||||
.ok_or(BeaconChainError::RuntimeShutdown)?
|
||||
.await
|
||||
.map_err(BeaconChainError::TokioJoin)??;
|
||||
|
||||
match result {
|
||||
DataColumnReconstructionResult::Success((availability, data_columns_to_publish)) => {
|
||||
let Some(slot) = data_columns_to_publish.first().map(|d| d.slot()) else {
|
||||
// This should be unreachable because empty result would return `RecoveredColumnsNotImported` instead of success.
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let r = self
|
||||
.process_availability(slot, availability, || Ok(()))
|
||||
.await;
|
||||
self.remove_notified(&block_root, r)
|
||||
.map(|availability_processing_status| {
|
||||
Some((availability_processing_status, data_columns_to_publish))
|
||||
})
|
||||
}
|
||||
DataColumnReconstructionResult::NotStarted(reason)
|
||||
| DataColumnReconstructionResult::RecoveredColumnsNotImported(reason) => {
|
||||
// We use metric here because logging this would be *very* noisy.
|
||||
metrics::inc_counter_vec(
|
||||
&metrics::KZG_DATA_COLUMN_RECONSTRUCTION_INCOMPLETE_TOTAL,
|
||||
&[reason],
|
||||
);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove any block components from the *processing cache* if we no longer require them. If the
|
||||
@@ -3172,23 +3220,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
r
|
||||
}
|
||||
|
||||
/// Remove any block components from the *processing cache* if we no longer require them. If the
|
||||
/// block was imported full or erred, we no longer require them.
|
||||
fn remove_notified_custody_columns<P>(
|
||||
&self,
|
||||
block_root: &Hash256,
|
||||
r: Result<(AvailabilityProcessingStatus, P), BlockError>,
|
||||
) -> Result<(AvailabilityProcessingStatus, P), BlockError> {
|
||||
let has_missing_components = matches!(
|
||||
r,
|
||||
Ok((AvailabilityProcessingStatus::MissingComponents(_, _), _))
|
||||
);
|
||||
if !has_missing_components {
|
||||
self.reqresp_pre_import_cache.write().remove(block_root);
|
||||
}
|
||||
r
|
||||
}
|
||||
|
||||
/// Wraps `process_block` in logic to cache the block's commitments in the processing cache
|
||||
/// and evict if the block was imported or errored.
|
||||
pub async fn process_block_with_early_caching<B: IntoExecutionPendingBlock<T>>(
|
||||
@@ -3444,26 +3475,21 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
block_root: Hash256,
|
||||
data_columns: Vec<GossipVerifiedDataColumn<T>>,
|
||||
publish_fn: impl FnOnce() -> Result<(), BlockError>,
|
||||
) -> Result<
|
||||
(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnsToPublish<T::EthSpec>,
|
||||
),
|
||||
BlockError,
|
||||
> {
|
||||
) -> Result<AvailabilityProcessingStatus, BlockError> {
|
||||
if let Some(slasher) = self.slasher.as_ref() {
|
||||
for data_colum in &data_columns {
|
||||
slasher.accept_block_header(data_colum.signed_block_header());
|
||||
}
|
||||
}
|
||||
|
||||
let (availability, data_columns_to_publish) = self
|
||||
.data_availability_checker
|
||||
.put_gossip_data_columns(slot, block_root, data_columns)?;
|
||||
let availability = self.data_availability_checker.put_gossip_data_columns(
|
||||
slot,
|
||||
block_root,
|
||||
data_columns,
|
||||
)?;
|
||||
|
||||
self.process_availability(slot, availability, publish_fn)
|
||||
.await
|
||||
.map(|result| (result, data_columns_to_publish))
|
||||
}
|
||||
|
||||
/// Checks if the provided blobs can make any cached blocks available, and imports immediately
|
||||
@@ -3513,13 +3539,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
slot: Slot,
|
||||
block_root: Hash256,
|
||||
custody_columns: DataColumnSidecarList<T::EthSpec>,
|
||||
) -> Result<
|
||||
(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnsToPublish<T::EthSpec>,
|
||||
),
|
||||
BlockError,
|
||||
> {
|
||||
) -> Result<AvailabilityProcessingStatus, BlockError> {
|
||||
// Need to scope this to ensure the lock is dropped before calling `process_availability`
|
||||
// Even an explicit drop is not enough to convince the borrow checker.
|
||||
{
|
||||
@@ -3544,16 +3564,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
|
||||
// This slot value is purely informative for the consumers of
|
||||
// `AvailabilityProcessingStatus::MissingComponents` to log an error with a slot.
|
||||
let (availability, data_columns_to_publish) =
|
||||
self.data_availability_checker.put_rpc_custody_columns(
|
||||
block_root,
|
||||
slot.epoch(T::EthSpec::slots_per_epoch()),
|
||||
custody_columns,
|
||||
)?;
|
||||
let availability = self.data_availability_checker.put_rpc_custody_columns(
|
||||
block_root,
|
||||
slot.epoch(T::EthSpec::slots_per_epoch()),
|
||||
custody_columns,
|
||||
)?;
|
||||
|
||||
self.process_availability(slot, availability, || Ok(()))
|
||||
.await
|
||||
.map(|result| (result, data_columns_to_publish))
|
||||
}
|
||||
|
||||
/// Imports a fully available block. Otherwise, returns `AvailabilityProcessingStatus::MissingComponents`
|
||||
|
||||
@@ -984,6 +984,7 @@ where
|
||||
store,
|
||||
self.import_all_data_columns,
|
||||
self.spec,
|
||||
log.new(o!("service" => "data_availability_checker")),
|
||||
)
|
||||
.map_err(|e| format!("Error initializing DataAvailabilityChecker: {:?}", e))?,
|
||||
),
|
||||
|
||||
@@ -2,10 +2,12 @@ use crate::blob_verification::{verify_kzg_for_blob_list, GossipVerifiedBlob, Kzg
|
||||
use crate::block_verification_types::{
|
||||
AvailabilityPendingExecutedBlock, AvailableExecutedBlock, RpcBlock,
|
||||
};
|
||||
use crate::data_availability_checker::overflow_lru_cache::DataAvailabilityCheckerInner;
|
||||
use crate::{BeaconChain, BeaconChainTypes, BeaconStore};
|
||||
use crate::data_availability_checker::overflow_lru_cache::{
|
||||
DataAvailabilityCheckerInner, ReconstructColumnsDecision,
|
||||
};
|
||||
use crate::{metrics, BeaconChain, BeaconChainTypes, BeaconStore};
|
||||
use kzg::Kzg;
|
||||
use slog::{debug, error};
|
||||
use slog::{debug, error, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
use std::fmt;
|
||||
use std::fmt::Debug;
|
||||
@@ -27,11 +29,12 @@ use crate::data_column_verification::{
|
||||
verify_kzg_for_data_column, verify_kzg_for_data_column_list, CustodyDataColumn,
|
||||
GossipVerifiedDataColumn, KzgVerifiedCustodyDataColumn, KzgVerifiedDataColumn,
|
||||
};
|
||||
use crate::metrics::{
|
||||
KZG_DATA_COLUMN_RECONSTRUCTION_ATTEMPTS, KZG_DATA_COLUMN_RECONSTRUCTION_FAILURES,
|
||||
};
|
||||
pub use error::{Error as AvailabilityCheckError, ErrorCategory as AvailabilityCheckErrorCategory};
|
||||
use types::non_zero_usize::new_non_zero_usize;
|
||||
|
||||
pub use self::overflow_lru_cache::DataColumnsToPublish;
|
||||
|
||||
/// The LRU Cache stores `PendingComponents` which can store up to
|
||||
/// `MAX_BLOBS_PER_BLOCK = 6` blobs each. A `BlobSidecar` is 0.131256 MB. So
|
||||
/// the maximum size of a `PendingComponents` is ~ 0.787536 MB. Setting this
|
||||
@@ -71,6 +74,16 @@ pub struct DataAvailabilityChecker<T: BeaconChainTypes> {
|
||||
slot_clock: T::SlotClock,
|
||||
kzg: Arc<Kzg>,
|
||||
spec: Arc<ChainSpec>,
|
||||
log: Logger,
|
||||
}
|
||||
|
||||
pub type AvailabilityAndReconstructedColumns<E> = (Availability<E>, DataColumnSidecarList<E>);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DataColumnReconstructionResult<E: EthSpec> {
|
||||
Success(AvailabilityAndReconstructedColumns<E>),
|
||||
NotStarted(&'static str),
|
||||
RecoveredColumnsNotImported(&'static str),
|
||||
}
|
||||
|
||||
/// This type is returned after adding a block / blob to the `DataAvailabilityChecker`.
|
||||
@@ -101,6 +114,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
store: BeaconStore<T>,
|
||||
import_all_data_columns: bool,
|
||||
spec: Arc<ChainSpec>,
|
||||
log: Logger,
|
||||
) -> Result<Self, AvailabilityCheckError> {
|
||||
let custody_subnet_count = if import_all_data_columns {
|
||||
spec.data_column_sidecar_subnet_count as usize
|
||||
@@ -124,6 +138,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
slot_clock,
|
||||
kzg,
|
||||
spec,
|
||||
log,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -205,7 +220,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
.map_err(AvailabilityCheckError::InvalidBlobs)?;
|
||||
|
||||
self.availability_cache
|
||||
.put_kzg_verified_blobs(block_root, epoch, verified_blobs)
|
||||
.put_kzg_verified_blobs(block_root, epoch, verified_blobs, &self.log)
|
||||
}
|
||||
|
||||
/// Put a list of custody columns received via RPC into the availability cache. This performs KZG
|
||||
@@ -216,8 +231,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
block_root: Hash256,
|
||||
epoch: Epoch,
|
||||
custody_columns: DataColumnSidecarList<T::EthSpec>,
|
||||
) -> Result<(Availability<T::EthSpec>, DataColumnsToPublish<T::EthSpec>), AvailabilityCheckError>
|
||||
{
|
||||
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
|
||||
// TODO(das): report which column is invalid for proper peer scoring
|
||||
// TODO(das): batch KZG verification here, but fallback into checking each column
|
||||
// individually to report which column(s) are invalid.
|
||||
@@ -233,10 +247,10 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
.collect::<Result<Vec<_>, AvailabilityCheckError>>()?;
|
||||
|
||||
self.availability_cache.put_kzg_verified_data_columns(
|
||||
&self.kzg,
|
||||
block_root,
|
||||
epoch,
|
||||
verified_custody_columns,
|
||||
&self.log,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -253,6 +267,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
gossip_blob.block_root(),
|
||||
gossip_blob.epoch(),
|
||||
vec![gossip_blob.into_inner()],
|
||||
&self.log,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -267,8 +282,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
slot: Slot,
|
||||
block_root: Hash256,
|
||||
gossip_data_columns: Vec<GossipVerifiedDataColumn<T>>,
|
||||
) -> Result<(Availability<T::EthSpec>, DataColumnsToPublish<T::EthSpec>), AvailabilityCheckError>
|
||||
{
|
||||
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
|
||||
let epoch = slot.epoch(T::EthSpec::slots_per_epoch());
|
||||
|
||||
let custody_columns = gossip_data_columns
|
||||
@@ -277,10 +291,10 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
self.availability_cache.put_kzg_verified_data_columns(
|
||||
&self.kzg,
|
||||
block_root,
|
||||
epoch,
|
||||
custody_columns,
|
||||
&self.log,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -291,7 +305,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
executed_block: AvailabilityPendingExecutedBlock<T::EthSpec>,
|
||||
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
|
||||
self.availability_cache
|
||||
.put_pending_executed_block(executed_block)
|
||||
.put_pending_executed_block(executed_block, &self.log)
|
||||
}
|
||||
|
||||
pub fn remove_pending_components(&self, block_root: Hash256) {
|
||||
@@ -511,6 +525,92 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
block_cache_size: self.availability_cache.block_cache_size(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reconstruct_data_columns(
|
||||
&self,
|
||||
block_root: &Hash256,
|
||||
) -> Result<DataColumnReconstructionResult<T::EthSpec>, AvailabilityCheckError> {
|
||||
let pending_components = match self
|
||||
.availability_cache
|
||||
.check_and_set_reconstruction_started(block_root)
|
||||
{
|
||||
ReconstructColumnsDecision::Yes(pending_components) => pending_components,
|
||||
ReconstructColumnsDecision::No(reason) => {
|
||||
return Ok(DataColumnReconstructionResult::NotStarted(reason));
|
||||
}
|
||||
};
|
||||
|
||||
metrics::inc_counter(&KZG_DATA_COLUMN_RECONSTRUCTION_ATTEMPTS);
|
||||
let timer = metrics::start_timer(&metrics::DATA_AVAILABILITY_RECONSTRUCTION_TIME);
|
||||
|
||||
let all_data_columns = KzgVerifiedCustodyDataColumn::reconstruct_columns(
|
||||
&self.kzg,
|
||||
&pending_components.verified_data_columns,
|
||||
&self.spec,
|
||||
)
|
||||
.map_err(|e| {
|
||||
error!(
|
||||
self.log,
|
||||
"Error reconstructing data columns";
|
||||
"block_root" => ?block_root,
|
||||
"error" => ?e
|
||||
);
|
||||
self.availability_cache
|
||||
.handle_reconstruction_failure(block_root);
|
||||
metrics::inc_counter(&KZG_DATA_COLUMN_RECONSTRUCTION_FAILURES);
|
||||
AvailabilityCheckError::ReconstructColumnsError(e)
|
||||
})?;
|
||||
|
||||
// Check indices from cache again to make sure we don't publish components we've already received.
|
||||
let Some(existing_column_indices) = self.cached_data_column_indexes(block_root) else {
|
||||
return Ok(DataColumnReconstructionResult::RecoveredColumnsNotImported(
|
||||
"block already imported",
|
||||
));
|
||||
};
|
||||
|
||||
let data_columns_to_publish = all_data_columns
|
||||
.into_iter()
|
||||
.filter(|d| !existing_column_indices.contains(&d.index()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let Some(slot) = data_columns_to_publish
|
||||
.first()
|
||||
.map(|d| d.as_data_column().slot())
|
||||
else {
|
||||
return Ok(DataColumnReconstructionResult::RecoveredColumnsNotImported(
|
||||
"No new columns to import and publish",
|
||||
));
|
||||
};
|
||||
|
||||
metrics::stop_timer(timer);
|
||||
metrics::inc_counter_by(
|
||||
&metrics::DATA_AVAILABILITY_RECONSTRUCTED_COLUMNS,
|
||||
data_columns_to_publish.len() as u64,
|
||||
);
|
||||
|
||||
debug!(self.log, "Reconstructed columns";
|
||||
"count" => data_columns_to_publish.len(),
|
||||
"block_root" => ?block_root,
|
||||
"slot" => slot,
|
||||
);
|
||||
|
||||
self.availability_cache
|
||||
.put_kzg_verified_data_columns(
|
||||
*block_root,
|
||||
slot.epoch(T::EthSpec::slots_per_epoch()),
|
||||
data_columns_to_publish.clone(),
|
||||
&self.log,
|
||||
)
|
||||
.map(|availability| {
|
||||
DataColumnReconstructionResult::Success((
|
||||
availability,
|
||||
data_columns_to_publish
|
||||
.into_iter()
|
||||
.map(|d| d.clone_arc())
|
||||
.collect::<Vec<_>>(),
|
||||
))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper struct to group data availability checker metrics.
|
||||
|
||||
@@ -6,23 +6,19 @@ use crate::block_verification_types::{
|
||||
};
|
||||
use crate::data_availability_checker::{Availability, AvailabilityCheckError};
|
||||
use crate::data_column_verification::KzgVerifiedCustodyDataColumn;
|
||||
use crate::metrics;
|
||||
use crate::BeaconChainTypes;
|
||||
use kzg::Kzg;
|
||||
use lru::LruCache;
|
||||
use parking_lot::RwLock;
|
||||
use slog::{debug, Logger};
|
||||
use ssz_types::{FixedVector, VariableList};
|
||||
use std::collections::HashSet;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::Arc;
|
||||
use types::blob_sidecar::BlobIdentifier;
|
||||
use types::{
|
||||
BlobSidecar, ChainSpec, ColumnIndex, DataColumnIdentifier, DataColumnSidecar,
|
||||
DataColumnSidecarList, Epoch, EthSpec, Hash256, SignedBeaconBlock,
|
||||
BlobSidecar, ChainSpec, ColumnIndex, DataColumnIdentifier, DataColumnSidecar, Epoch, EthSpec,
|
||||
Hash256, SignedBeaconBlock,
|
||||
};
|
||||
|
||||
pub type DataColumnsToPublish<E> = Option<DataColumnSidecarList<E>>;
|
||||
|
||||
/// This represents the components of a partially available block
|
||||
///
|
||||
/// The blobs are all gossip and kzg verified.
|
||||
@@ -95,7 +91,7 @@ impl<E: EthSpec> PendingComponents<E> {
|
||||
/// block.
|
||||
///
|
||||
/// This corresponds to the number of commitments that are present in a block.
|
||||
pub fn num_expected_blobs(&self) -> Option<usize> {
|
||||
pub fn block_kzg_commitments_count(&self) -> Option<usize> {
|
||||
self.get_cached_block()
|
||||
.as_ref()
|
||||
.map(|b| b.get_commitments().len())
|
||||
@@ -203,21 +199,61 @@ impl<E: EthSpec> PendingComponents<E> {
|
||||
///
|
||||
/// Returns `true` if both the block exists and the number of received blobs / custody columns
|
||||
/// matches the number of expected blobs / custody columns.
|
||||
pub fn is_available(&self, block_import_requirement: &BlockImportRequirement) -> bool {
|
||||
pub fn is_available(
|
||||
&self,
|
||||
block_import_requirement: &BlockImportRequirement,
|
||||
log: &Logger,
|
||||
) -> bool {
|
||||
let block_kzg_commitments_count_opt = self.block_kzg_commitments_count();
|
||||
|
||||
match block_import_requirement {
|
||||
BlockImportRequirement::AllBlobs => self
|
||||
.num_expected_blobs()
|
||||
.map_or(false, |num_expected_blobs| {
|
||||
num_expected_blobs == self.num_received_blobs()
|
||||
}),
|
||||
BlockImportRequirement::AllBlobs => {
|
||||
let received_blobs = self.num_received_blobs();
|
||||
let expected_blobs_msg = block_kzg_commitments_count_opt
|
||||
.as_ref()
|
||||
.map(|num| num.to_string())
|
||||
.unwrap_or("unknown".to_string());
|
||||
|
||||
debug!(log,
|
||||
"Component(s) added to data availability checker";
|
||||
"block_root" => ?self.block_root,
|
||||
"received_block" => block_kzg_commitments_count_opt.is_some(),
|
||||
"received_blobs" => received_blobs,
|
||||
"expected_blobs" => expected_blobs_msg,
|
||||
);
|
||||
|
||||
block_kzg_commitments_count_opt.map_or(false, |num_expected_blobs| {
|
||||
num_expected_blobs == received_blobs
|
||||
})
|
||||
}
|
||||
BlockImportRequirement::ColumnSampling(num_expected_columns) => {
|
||||
let num_received_data_columns = self.num_received_data_columns();
|
||||
// No data columns when there are 0 blobs
|
||||
self.num_expected_blobs()
|
||||
.map_or(false, |num_expected_blobs| {
|
||||
num_expected_blobs == 0
|
||||
|| *num_expected_columns == num_received_data_columns
|
||||
})
|
||||
let expected_columns_opt = block_kzg_commitments_count_opt.map(|blob_count| {
|
||||
if blob_count > 0 {
|
||||
*num_expected_columns
|
||||
} else {
|
||||
0
|
||||
}
|
||||
});
|
||||
|
||||
let expected_columns_msg = expected_columns_opt
|
||||
.as_ref()
|
||||
.map(|num| num.to_string())
|
||||
.unwrap_or("unknown".to_string());
|
||||
|
||||
let num_received_columns = self.num_received_data_columns();
|
||||
|
||||
debug!(log,
|
||||
"Component(s) added to data availability checker";
|
||||
"block_root" => ?self.block_root,
|
||||
"received_block" => block_kzg_commitments_count_opt.is_some(),
|
||||
"received_columns" => num_received_columns,
|
||||
"expected_columns" => expected_columns_msg,
|
||||
);
|
||||
|
||||
expected_columns_opt.map_or(false, |num_expected_columns| {
|
||||
num_expected_columns == num_received_columns
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -311,10 +347,6 @@ impl<E: EthSpec> PendingComponents<E> {
|
||||
)))
|
||||
}
|
||||
|
||||
pub fn reconstruction_started(&mut self) {
|
||||
self.reconstruction_started = true;
|
||||
}
|
||||
|
||||
/// Returns the epoch of the block if it is cached, otherwise returns the epoch of the first blob.
|
||||
pub fn epoch(&self) -> Option<Epoch> {
|
||||
self.executed_block
|
||||
@@ -358,6 +390,15 @@ pub struct DataAvailabilityCheckerInner<T: BeaconChainTypes> {
|
||||
spec: Arc<ChainSpec>,
|
||||
}
|
||||
|
||||
// This enum is only used internally within the crate in the reconstruction function to improve
|
||||
// readability, so it's OK to not box the variant value, and it shouldn't impact memory much with
|
||||
// the current usage, as it's deconstructed immediately.
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub(crate) enum ReconstructColumnsDecision<E: EthSpec> {
|
||||
Yes(PendingComponents<E>),
|
||||
No(&'static str),
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
|
||||
pub fn new(
|
||||
capacity: NonZeroUsize,
|
||||
@@ -448,33 +489,12 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Potentially trigger reconstruction if:
|
||||
/// - Our custody requirement is all columns
|
||||
/// - We >= 50% of columns, but not all columns
|
||||
fn should_reconstruct(
|
||||
&self,
|
||||
block_import_requirement: &BlockImportRequirement,
|
||||
pending_components: &PendingComponents<T::EthSpec>,
|
||||
) -> bool {
|
||||
let BlockImportRequirement::ColumnSampling(num_expected_columns) = block_import_requirement
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let num_of_columns = self.spec.number_of_columns;
|
||||
let has_missing_columns = pending_components.verified_data_columns.len() < num_of_columns;
|
||||
|
||||
has_missing_columns
|
||||
&& !pending_components.reconstruction_started
|
||||
&& *num_expected_columns == num_of_columns
|
||||
&& pending_components.verified_data_columns.len() >= num_of_columns / 2
|
||||
}
|
||||
|
||||
pub fn put_kzg_verified_blobs<I: IntoIterator<Item = KzgVerifiedBlob<T::EthSpec>>>(
|
||||
&self,
|
||||
block_root: Hash256,
|
||||
epoch: Epoch,
|
||||
kzg_verified_blobs: I,
|
||||
log: &Logger,
|
||||
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
|
||||
let mut fixed_blobs = FixedVector::default();
|
||||
|
||||
@@ -496,7 +516,7 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
|
||||
pending_components.merge_blobs(fixed_blobs);
|
||||
|
||||
let block_import_requirement = self.block_import_requirement(epoch)?;
|
||||
if pending_components.is_available(&block_import_requirement) {
|
||||
if pending_components.is_available(&block_import_requirement, log) {
|
||||
write_lock.put(block_root, pending_components.clone());
|
||||
// No need to hold the write lock anymore
|
||||
drop(write_lock);
|
||||
@@ -514,12 +534,11 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
|
||||
I: IntoIterator<Item = KzgVerifiedCustodyDataColumn<T::EthSpec>>,
|
||||
>(
|
||||
&self,
|
||||
kzg: &Kzg,
|
||||
block_root: Hash256,
|
||||
epoch: Epoch,
|
||||
kzg_verified_data_columns: I,
|
||||
) -> Result<(Availability<T::EthSpec>, DataColumnsToPublish<T::EthSpec>), AvailabilityCheckError>
|
||||
{
|
||||
log: &Logger,
|
||||
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
|
||||
let mut write_lock = self.critical.write();
|
||||
|
||||
// Grab existing entry or create a new entry.
|
||||
@@ -533,65 +552,67 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
|
||||
|
||||
let block_import_requirement = self.block_import_requirement(epoch)?;
|
||||
|
||||
// Potentially trigger reconstruction if:
|
||||
// - Our custody requirement is all columns
|
||||
// - We >= 50% of columns
|
||||
let data_columns_to_publish =
|
||||
if self.should_reconstruct(&block_import_requirement, &pending_components) {
|
||||
pending_components.reconstruction_started();
|
||||
|
||||
let timer = metrics::start_timer(&metrics::DATA_AVAILABILITY_RECONSTRUCTION_TIME);
|
||||
|
||||
let existing_column_indices = pending_components
|
||||
.verified_data_columns
|
||||
.iter()
|
||||
.map(|d| d.index())
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
// Will only return an error if:
|
||||
// - < 50% of columns
|
||||
// - There are duplicates
|
||||
let all_data_columns = KzgVerifiedCustodyDataColumn::reconstruct_columns(
|
||||
kzg,
|
||||
pending_components.verified_data_columns.as_slice(),
|
||||
&self.spec,
|
||||
)
|
||||
.map_err(AvailabilityCheckError::ReconstructColumnsError)?;
|
||||
|
||||
let data_columns_to_publish = all_data_columns
|
||||
.iter()
|
||||
.filter(|d| !existing_column_indices.contains(&d.index()))
|
||||
.map(|d| d.clone_arc())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
pending_components.verified_data_columns = all_data_columns;
|
||||
|
||||
metrics::stop_timer(timer);
|
||||
metrics::inc_counter_by(
|
||||
&metrics::DATA_AVAILABILITY_RECONSTRUCTED_COLUMNS,
|
||||
data_columns_to_publish.len() as u64,
|
||||
);
|
||||
|
||||
Some(data_columns_to_publish)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if pending_components.is_available(&block_import_requirement) {
|
||||
if pending_components.is_available(&block_import_requirement, log) {
|
||||
write_lock.put(block_root, pending_components.clone());
|
||||
// No need to hold the write lock anymore
|
||||
drop(write_lock);
|
||||
pending_components
|
||||
.make_available(block_import_requirement, &self.spec, |diet_block| {
|
||||
self.state_cache.recover_pending_executed_block(diet_block)
|
||||
})
|
||||
.map(|availability| (availability, data_columns_to_publish))
|
||||
pending_components.make_available(block_import_requirement, &self.spec, |diet_block| {
|
||||
self.state_cache.recover_pending_executed_block(diet_block)
|
||||
})
|
||||
} else {
|
||||
write_lock.put(block_root, pending_components);
|
||||
Ok((
|
||||
Availability::MissingComponents(block_root),
|
||||
data_columns_to_publish,
|
||||
))
|
||||
Ok(Availability::MissingComponents(block_root))
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether data column reconstruction should be attempted.
|
||||
///
|
||||
/// Potentially trigger reconstruction if:
|
||||
/// - Our custody requirement is all columns (supernode), and we haven't got all columns
|
||||
/// - We have >= 50% of columns, but not all columns
|
||||
/// - Reconstruction hasn't been started for the block
|
||||
///
|
||||
/// If reconstruction is required, returns `PendingComponents` which contains the
|
||||
/// components to be used as inputs to reconstruction, otherwise returns a `reason`.
|
||||
pub fn check_and_set_reconstruction_started(
|
||||
&self,
|
||||
block_root: &Hash256,
|
||||
) -> ReconstructColumnsDecision<T::EthSpec> {
|
||||
let mut write_lock = self.critical.write();
|
||||
let Some(pending_components) = write_lock.get_mut(block_root) else {
|
||||
// Block may have been imported as it does not exist in availability cache.
|
||||
return ReconstructColumnsDecision::No("block already imported");
|
||||
};
|
||||
|
||||
// If we're sampling all columns, it means we must be custodying all columns.
|
||||
let custody_column_count = self.sampling_column_count();
|
||||
let total_column_count = self.spec.number_of_columns;
|
||||
let received_column_count = pending_components.verified_data_columns.len();
|
||||
|
||||
if pending_components.reconstruction_started {
|
||||
return ReconstructColumnsDecision::No("already started");
|
||||
}
|
||||
if custody_column_count != total_column_count {
|
||||
return ReconstructColumnsDecision::No("not required for full node");
|
||||
}
|
||||
if received_column_count == self.spec.number_of_columns {
|
||||
return ReconstructColumnsDecision::No("all columns received");
|
||||
}
|
||||
if received_column_count < total_column_count / 2 {
|
||||
return ReconstructColumnsDecision::No("not enough columns");
|
||||
}
|
||||
|
||||
pending_components.reconstruction_started = true;
|
||||
ReconstructColumnsDecision::Yes(pending_components.clone())
|
||||
}
|
||||
|
||||
/// This could mean some invalid data columns made it through to the `DataAvailabilityChecker`.
|
||||
/// In this case, we remove all data columns in `PendingComponents`, reset reconstruction
|
||||
/// status so that we can attempt to retrieve columns from peers again.
|
||||
pub fn handle_reconstruction_failure(&self, block_root: &Hash256) {
|
||||
if let Some(pending_components_mut) = self.critical.write().get_mut(block_root) {
|
||||
pending_components_mut.verified_data_columns = vec![];
|
||||
pending_components_mut.reconstruction_started = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -600,6 +621,7 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
|
||||
pub fn put_pending_executed_block(
|
||||
&self,
|
||||
executed_block: AvailabilityPendingExecutedBlock<T::EthSpec>,
|
||||
log: &Logger,
|
||||
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
|
||||
let mut write_lock = self.critical.write();
|
||||
let block_root = executed_block.import_data.block_root;
|
||||
@@ -621,7 +643,7 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
|
||||
|
||||
// Check if we have all components and entire set is consistent.
|
||||
let block_import_requirement = self.block_import_requirement(epoch)?;
|
||||
if pending_components.is_available(&block_import_requirement) {
|
||||
if pending_components.is_available(&block_import_requirement, log) {
|
||||
write_lock.put(block_root, pending_components.clone());
|
||||
// No need to hold the write lock anymore
|
||||
drop(write_lock);
|
||||
@@ -919,7 +941,7 @@ mod test {
|
||||
);
|
||||
assert!(cache.critical.read().is_empty(), "cache should be empty");
|
||||
let availability = cache
|
||||
.put_pending_executed_block(pending_block)
|
||||
.put_pending_executed_block(pending_block, harness.logger())
|
||||
.expect("should put block");
|
||||
if blobs_expected == 0 {
|
||||
assert!(
|
||||
@@ -958,7 +980,7 @@ mod test {
|
||||
for (blob_index, gossip_blob) in blobs.into_iter().enumerate() {
|
||||
kzg_verified_blobs.push(gossip_blob.into_inner());
|
||||
let availability = cache
|
||||
.put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone())
|
||||
.put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone(), harness.logger())
|
||||
.expect("should put blob");
|
||||
if blob_index == blobs_expected - 1 {
|
||||
assert!(matches!(availability, Availability::Available(_)));
|
||||
@@ -985,7 +1007,7 @@ mod test {
|
||||
for gossip_blob in blobs {
|
||||
kzg_verified_blobs.push(gossip_blob.into_inner());
|
||||
let availability = cache
|
||||
.put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone())
|
||||
.put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone(), harness.logger())
|
||||
.expect("should put blob");
|
||||
assert_eq!(
|
||||
availability,
|
||||
@@ -995,7 +1017,7 @@ mod test {
|
||||
assert_eq!(cache.critical.read().len(), 1);
|
||||
}
|
||||
let availability = cache
|
||||
.put_pending_executed_block(pending_block)
|
||||
.put_pending_executed_block(pending_block, harness.logger())
|
||||
.expect("should put block");
|
||||
assert!(
|
||||
matches!(availability, Availability::Available(_)),
|
||||
@@ -1063,7 +1085,7 @@ mod test {
|
||||
|
||||
// put the block in the cache
|
||||
let availability = cache
|
||||
.put_pending_executed_block(pending_block)
|
||||
.put_pending_executed_block(pending_block, harness.logger())
|
||||
.expect("should put block");
|
||||
|
||||
// grab the diet block from the cache for later testing
|
||||
|
||||
@@ -313,10 +313,7 @@ impl<E: EthSpec> KzgVerifiedCustodyDataColumn<E> {
|
||||
kzg: &Kzg,
|
||||
partial_set_of_columns: &[Self],
|
||||
spec: &ChainSpec,
|
||||
) -> Result<Vec<Self>, KzgError> {
|
||||
// Will only return an error if:
|
||||
// - < 50% of columns
|
||||
// - There are duplicates
|
||||
) -> Result<Vec<KzgVerifiedCustodyDataColumn<E>>, KzgError> {
|
||||
let all_data_columns = reconstruct_data_columns(
|
||||
kzg,
|
||||
&partial_set_of_columns
|
||||
@@ -328,10 +325,8 @@ impl<E: EthSpec> KzgVerifiedCustodyDataColumn<E> {
|
||||
|
||||
Ok(all_data_columns
|
||||
.into_iter()
|
||||
.map(|d| {
|
||||
KzgVerifiedCustodyDataColumn::from_asserted_custody(KzgVerifiedDataColumn {
|
||||
data: d,
|
||||
})
|
||||
.map(|data| {
|
||||
KzgVerifiedCustodyDataColumn::from_asserted_custody(KzgVerifiedDataColumn { data })
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
@@ -1887,6 +1887,31 @@ pub static DATA_AVAILABILITY_RECONSTRUCTED_COLUMNS: LazyLock<Result<IntCounter>>
|
||||
)
|
||||
});
|
||||
|
||||
pub static KZG_DATA_COLUMN_RECONSTRUCTION_ATTEMPTS: LazyLock<Result<IntCounter>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_int_counter(
|
||||
"kzg_data_column_reconstruction_attempts",
|
||||
"Count of times data column reconstruction has been attempted",
|
||||
)
|
||||
});
|
||||
|
||||
pub static KZG_DATA_COLUMN_RECONSTRUCTION_FAILURES: LazyLock<Result<IntCounter>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_int_counter(
|
||||
"kzg_data_column_reconstruction_failures",
|
||||
"Count of times data column reconstruction has failed",
|
||||
)
|
||||
});
|
||||
|
||||
pub static KZG_DATA_COLUMN_RECONSTRUCTION_INCOMPLETE_TOTAL: LazyLock<Result<IntCounterVec>> =
|
||||
LazyLock::new(|| {
|
||||
try_create_int_counter_vec(
|
||||
"kzg_data_column_reconstruction_incomplete_total",
|
||||
"Count of times data column reconstruction attempts did not result in an import",
|
||||
&["reason"],
|
||||
)
|
||||
});
|
||||
|
||||
/*
|
||||
* light_client server metrics
|
||||
*/
|
||||
|
||||
@@ -4,6 +4,7 @@ use crate::{
|
||||
service::NetworkMessage,
|
||||
sync::SyncMessage,
|
||||
};
|
||||
use beacon_chain::blob_verification::{GossipBlobError, GossipVerifiedBlob};
|
||||
use beacon_chain::block_verification_types::AsBlock;
|
||||
use beacon_chain::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn};
|
||||
use beacon_chain::store::Error;
|
||||
@@ -18,13 +19,7 @@ use beacon_chain::{
|
||||
AvailabilityProcessingStatus, BeaconChainError, BeaconChainTypes, BlockError, ForkChoiceError,
|
||||
GossipVerifiedBlock, NotifyExecutionLayer,
|
||||
};
|
||||
use beacon_chain::{
|
||||
blob_verification::{GossipBlobError, GossipVerifiedBlob},
|
||||
data_availability_checker::DataColumnsToPublish,
|
||||
};
|
||||
use lighthouse_network::{
|
||||
Client, MessageAcceptance, MessageId, PeerAction, PeerId, PubsubMessage, ReportSource,
|
||||
};
|
||||
use lighthouse_network::{Client, MessageAcceptance, MessageId, PeerAction, PeerId, ReportSource};
|
||||
use operation_pool::ReceivedPreCapella;
|
||||
use slog::{crit, debug, error, info, trace, warn, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
@@ -171,26 +166,6 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn handle_data_columns_to_publish(
|
||||
&self,
|
||||
data_columns_to_publish: DataColumnsToPublish<T::EthSpec>,
|
||||
) {
|
||||
if let Some(data_columns_to_publish) = data_columns_to_publish {
|
||||
self.send_network_message(NetworkMessage::Publish {
|
||||
messages: data_columns_to_publish
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let subnet = DataColumnSubnetId::from_column_index::<T::EthSpec>(
|
||||
d.index as usize,
|
||||
&self.chain.spec,
|
||||
);
|
||||
PubsubMessage::DataColumnSidecar(Box::new((subnet, d.clone())))
|
||||
})
|
||||
.collect(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Send a message on `message_tx` that the `message_id` sent by `peer_id` should be propagated on
|
||||
/// the gossip network.
|
||||
///
|
||||
@@ -1022,9 +997,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
.process_gossip_data_columns(vec![verified_data_column], || Ok(()))
|
||||
.await
|
||||
{
|
||||
Ok((availability, data_columns_to_publish)) => {
|
||||
self.handle_data_columns_to_publish(data_columns_to_publish);
|
||||
|
||||
Ok(availability) => {
|
||||
match availability {
|
||||
AvailabilityProcessingStatus::Imported(block_root) => {
|
||||
// Note: Reusing block imported metric here
|
||||
@@ -1052,7 +1025,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
"block_root" => %block_root,
|
||||
);
|
||||
|
||||
// Potentially trigger reconstruction
|
||||
self.attempt_data_column_reconstruction(block_root).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,9 @@ use crate::sync::manager::BlockProcessType;
|
||||
use crate::sync::SamplingId;
|
||||
use crate::{service::NetworkMessage, sync::manager::SyncMessage};
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::{builder::Witness, eth1_chain::CachingEth1Backend, BeaconChain};
|
||||
use beacon_chain::{
|
||||
builder::Witness, eth1_chain::CachingEth1Backend, AvailabilityProcessingStatus, BeaconChain,
|
||||
};
|
||||
use beacon_chain::{BeaconChainTypes, NotifyExecutionLayer};
|
||||
use beacon_processor::{
|
||||
work_reprocessing_queue::ReprocessQueueMessage, BeaconProcessorChannels, BeaconProcessorSend,
|
||||
@@ -16,9 +18,9 @@ use lighthouse_network::rpc::methods::{
|
||||
use lighthouse_network::rpc::{RequestId, SubstreamId};
|
||||
use lighthouse_network::{
|
||||
rpc::{BlocksByRangeRequest, BlocksByRootRequest, LightClientBootstrapRequest, StatusMessage},
|
||||
Client, MessageId, NetworkGlobals, PeerId,
|
||||
Client, MessageId, NetworkGlobals, PeerId, PubsubMessage,
|
||||
};
|
||||
use slog::{debug, Logger};
|
||||
use slog::{debug, error, trace, Logger};
|
||||
use slot_clock::ManualSlotClock;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
@@ -848,6 +850,75 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
"error" => %e)
|
||||
});
|
||||
}
|
||||
|
||||
/// Attempt to reconstruct all data columns if the following conditions satisfies:
|
||||
/// - Our custody requirement is all columns
|
||||
/// - We have >= 50% of columns, but not all columns
|
||||
///
|
||||
/// Returns `Some(AvailabilityProcessingStatus)` if reconstruction is successfully performed,
|
||||
/// otherwise returns `None`.
|
||||
async fn attempt_data_column_reconstruction(
|
||||
&self,
|
||||
block_root: Hash256,
|
||||
) -> Option<AvailabilityProcessingStatus> {
|
||||
let result = self.chain.reconstruct_data_columns(block_root).await;
|
||||
match result {
|
||||
Ok(Some((availability_processing_status, data_columns_to_publish))) => {
|
||||
self.send_network_message(NetworkMessage::Publish {
|
||||
messages: data_columns_to_publish
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let subnet = DataColumnSubnetId::from_column_index::<T::EthSpec>(
|
||||
d.index as usize,
|
||||
&self.chain.spec,
|
||||
);
|
||||
PubsubMessage::DataColumnSidecar(Box::new((subnet, d.clone())))
|
||||
})
|
||||
.collect(),
|
||||
});
|
||||
|
||||
match &availability_processing_status {
|
||||
AvailabilityProcessingStatus::Imported(hash) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Block components available via reconstruction";
|
||||
"result" => "imported block and custody columns",
|
||||
"block_hash" => %hash,
|
||||
);
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
}
|
||||
AvailabilityProcessingStatus::MissingComponents(_, _) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Block components still missing block after reconstruction";
|
||||
"result" => "imported all custody columns",
|
||||
"block_hash" => %block_root,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Some(availability_processing_status)
|
||||
}
|
||||
Ok(None) => {
|
||||
// reason is tracked via the `KZG_DATA_COLUMN_RECONSTRUCTION_INCOMPLETE_TOTAL` metric
|
||||
trace!(
|
||||
self.log,
|
||||
"Reconstruction not required for block";
|
||||
"block_hash" => %block_root,
|
||||
);
|
||||
None
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
self.log,
|
||||
"Error during data column reconstruction";
|
||||
"block_root" => %block_root,
|
||||
"error" => ?e
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type TestBeaconChainType<E> =
|
||||
|
||||
@@ -327,34 +327,37 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
_seen_timestamp: Duration,
|
||||
process_type: BlockProcessType,
|
||||
) {
|
||||
let result = self
|
||||
let mut result = self
|
||||
.chain
|
||||
.process_rpc_custody_columns(custody_columns)
|
||||
.await;
|
||||
|
||||
match &result {
|
||||
Ok((availability, data_columns_to_publish)) => {
|
||||
self.handle_data_columns_to_publish(data_columns_to_publish.clone());
|
||||
|
||||
match availability {
|
||||
AvailabilityProcessingStatus::Imported(hash) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Block components retrieved";
|
||||
"result" => "imported block and custody columns",
|
||||
"block_hash" => %hash,
|
||||
);
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
}
|
||||
AvailabilityProcessingStatus::MissingComponents(_, _) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Missing components over rpc";
|
||||
"block_hash" => %block_root,
|
||||
);
|
||||
Ok(availability) => match availability {
|
||||
AvailabilityProcessingStatus::Imported(hash) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Block components retrieved";
|
||||
"result" => "imported block and custody columns",
|
||||
"block_hash" => %hash,
|
||||
);
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
}
|
||||
AvailabilityProcessingStatus::MissingComponents(_, _) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Missing components over rpc";
|
||||
"block_hash" => %block_root,
|
||||
);
|
||||
// Attempt reconstruction here before notifying sync, to avoid sending out more requests
|
||||
// that we may no longer need.
|
||||
if let Some(availability) =
|
||||
self.attempt_data_column_reconstruction(block_root).await
|
||||
{
|
||||
result = Ok(availability)
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(BlockError::DuplicateFullyImported(_)) => {
|
||||
debug!(
|
||||
self.log,
|
||||
@@ -374,7 +377,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
|
||||
self.send_sync_message(SyncMessage::BlockComponentProcessed {
|
||||
process_type,
|
||||
result: result.map(|(r, _)| r).into(),
|
||||
result: result.into(),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user