mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-30 03:03:45 +00:00
Refactor data column reconstruction and avoid blocking processing (#6403)
* Move reconstruction logic out of `overflow_lru_cache` to simplify the code and avoids having to pass `DataColumnsToPublish` around and blocking other processing. * Publish reconstructed cells before recomputing head. Remove duplicate functions. * Merge branch 'unstable' into non-blocking-reconstruction * Merge branch 'unstable' into non-blocking-reconstruction # Conflicts: # beacon_node/beacon_chain/src/beacon_chain.rs # beacon_node/beacon_chain/src/data_availability_checker.rs # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs # beacon_node/network/src/network_beacon_processor/sync_methods.rs * Spawn a blocking task for reconstruction. * Merge branch 'unstable' into non-blocking-reconstruction # Conflicts: # beacon_node/network/src/network_beacon_processor/mod.rs * Fix fmt * Merge branch 'unstable' into non-blocking-reconstruction # Conflicts: # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs * Fix race condition by making check and mutation atomic as suggested by Lion. Also added error handling to reconstruction failure. * Add reconstruction reason metric and more debug logging to da checker. * Add comment and logging. * Rename `NotRequired` to `NotStarted`. * Remove extra character added.
This commit is contained in:
@@ -22,7 +22,7 @@ pub use crate::canonical_head::CanonicalHead;
|
||||
use crate::chain_config::ChainConfig;
|
||||
use crate::data_availability_checker::{
|
||||
Availability, AvailabilityCheckError, AvailableBlock, DataAvailabilityChecker,
|
||||
DataColumnsToPublish,
|
||||
DataColumnReconstructionResult,
|
||||
};
|
||||
use crate::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn};
|
||||
use crate::early_attester_cache::EarlyAttesterCache;
|
||||
@@ -3015,13 +3015,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
self: &Arc<Self>,
|
||||
data_columns: Vec<GossipVerifiedDataColumn<T>>,
|
||||
publish_fn: impl FnOnce() -> Result<(), BlockError>,
|
||||
) -> Result<
|
||||
(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnsToPublish<T::EthSpec>,
|
||||
),
|
||||
BlockError,
|
||||
> {
|
||||
) -> Result<AvailabilityProcessingStatus, BlockError> {
|
||||
let Ok((slot, block_root)) = data_columns
|
||||
.iter()
|
||||
.map(|c| (c.slot(), c.block_root()))
|
||||
@@ -3051,7 +3045,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
publish_fn,
|
||||
)
|
||||
.await;
|
||||
self.remove_notified_custody_columns(&block_root, r)
|
||||
self.remove_notified(&block_root, r)
|
||||
}
|
||||
|
||||
/// Cache the blobs in the processing cache, process it, then evict it from the cache if it was
|
||||
@@ -3110,13 +3104,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
pub async fn process_rpc_custody_columns(
|
||||
self: &Arc<Self>,
|
||||
custody_columns: DataColumnSidecarList<T::EthSpec>,
|
||||
) -> Result<
|
||||
(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnsToPublish<T::EthSpec>,
|
||||
),
|
||||
BlockError,
|
||||
> {
|
||||
) -> Result<AvailabilityProcessingStatus, BlockError> {
|
||||
let Ok((slot, block_root)) = custody_columns
|
||||
.iter()
|
||||
.map(|c| (c.slot(), c.block_root()))
|
||||
@@ -3154,7 +3142,67 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
let r = self
|
||||
.check_rpc_custody_columns_availability_and_import(slot, block_root, custody_columns)
|
||||
.await;
|
||||
self.remove_notified_custody_columns(&block_root, r)
|
||||
self.remove_notified(&block_root, r)
|
||||
}
|
||||
|
||||
pub async fn reconstruct_data_columns(
|
||||
self: &Arc<Self>,
|
||||
block_root: Hash256,
|
||||
) -> Result<
|
||||
Option<(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnSidecarList<T::EthSpec>,
|
||||
)>,
|
||||
BlockError,
|
||||
> {
|
||||
// As of now we only reconstruct data columns on supernodes, so if the block is already
|
||||
// available on a supernode, there's no need to reconstruct as the node must already have
|
||||
// all columns.
|
||||
if self
|
||||
.canonical_head
|
||||
.fork_choice_read_lock()
|
||||
.contains_block(&block_root)
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let data_availability_checker = self.data_availability_checker.clone();
|
||||
|
||||
let result = self
|
||||
.task_executor
|
||||
.spawn_blocking_handle(
|
||||
move || data_availability_checker.reconstruct_data_columns(&block_root),
|
||||
"reconstruct_data_columns",
|
||||
)
|
||||
.ok_or(BeaconChainError::RuntimeShutdown)?
|
||||
.await
|
||||
.map_err(BeaconChainError::TokioJoin)??;
|
||||
|
||||
match result {
|
||||
DataColumnReconstructionResult::Success((availability, data_columns_to_publish)) => {
|
||||
let Some(slot) = data_columns_to_publish.first().map(|d| d.slot()) else {
|
||||
// This should be unreachable because empty result would return `RecoveredColumnsNotImported` instead of success.
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let r = self
|
||||
.process_availability(slot, availability, || Ok(()))
|
||||
.await;
|
||||
self.remove_notified(&block_root, r)
|
||||
.map(|availability_processing_status| {
|
||||
Some((availability_processing_status, data_columns_to_publish))
|
||||
})
|
||||
}
|
||||
DataColumnReconstructionResult::NotStarted(reason)
|
||||
| DataColumnReconstructionResult::RecoveredColumnsNotImported(reason) => {
|
||||
// We use metric here because logging this would be *very* noisy.
|
||||
metrics::inc_counter_vec(
|
||||
&metrics::KZG_DATA_COLUMN_RECONSTRUCTION_INCOMPLETE_TOTAL,
|
||||
&[reason],
|
||||
);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove any block components from the *processing cache* if we no longer require them. If the
|
||||
@@ -3172,23 +3220,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
r
|
||||
}
|
||||
|
||||
/// Remove any block components from the *processing cache* if we no longer require them. If the
|
||||
/// block was imported full or erred, we no longer require them.
|
||||
fn remove_notified_custody_columns<P>(
|
||||
&self,
|
||||
block_root: &Hash256,
|
||||
r: Result<(AvailabilityProcessingStatus, P), BlockError>,
|
||||
) -> Result<(AvailabilityProcessingStatus, P), BlockError> {
|
||||
let has_missing_components = matches!(
|
||||
r,
|
||||
Ok((AvailabilityProcessingStatus::MissingComponents(_, _), _))
|
||||
);
|
||||
if !has_missing_components {
|
||||
self.reqresp_pre_import_cache.write().remove(block_root);
|
||||
}
|
||||
r
|
||||
}
|
||||
|
||||
/// Wraps `process_block` in logic to cache the block's commitments in the processing cache
|
||||
/// and evict if the block was imported or errored.
|
||||
pub async fn process_block_with_early_caching<B: IntoExecutionPendingBlock<T>>(
|
||||
@@ -3444,26 +3475,21 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
block_root: Hash256,
|
||||
data_columns: Vec<GossipVerifiedDataColumn<T>>,
|
||||
publish_fn: impl FnOnce() -> Result<(), BlockError>,
|
||||
) -> Result<
|
||||
(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnsToPublish<T::EthSpec>,
|
||||
),
|
||||
BlockError,
|
||||
> {
|
||||
) -> Result<AvailabilityProcessingStatus, BlockError> {
|
||||
if let Some(slasher) = self.slasher.as_ref() {
|
||||
for data_colum in &data_columns {
|
||||
slasher.accept_block_header(data_colum.signed_block_header());
|
||||
}
|
||||
}
|
||||
|
||||
let (availability, data_columns_to_publish) = self
|
||||
.data_availability_checker
|
||||
.put_gossip_data_columns(slot, block_root, data_columns)?;
|
||||
let availability = self.data_availability_checker.put_gossip_data_columns(
|
||||
slot,
|
||||
block_root,
|
||||
data_columns,
|
||||
)?;
|
||||
|
||||
self.process_availability(slot, availability, publish_fn)
|
||||
.await
|
||||
.map(|result| (result, data_columns_to_publish))
|
||||
}
|
||||
|
||||
/// Checks if the provided blobs can make any cached blocks available, and imports immediately
|
||||
@@ -3513,13 +3539,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
slot: Slot,
|
||||
block_root: Hash256,
|
||||
custody_columns: DataColumnSidecarList<T::EthSpec>,
|
||||
) -> Result<
|
||||
(
|
||||
AvailabilityProcessingStatus,
|
||||
DataColumnsToPublish<T::EthSpec>,
|
||||
),
|
||||
BlockError,
|
||||
> {
|
||||
) -> Result<AvailabilityProcessingStatus, BlockError> {
|
||||
// Need to scope this to ensure the lock is dropped before calling `process_availability`
|
||||
// Even an explicit drop is not enough to convince the borrow checker.
|
||||
{
|
||||
@@ -3544,16 +3564,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
|
||||
// This slot value is purely informative for the consumers of
|
||||
// `AvailabilityProcessingStatus::MissingComponents` to log an error with a slot.
|
||||
let (availability, data_columns_to_publish) =
|
||||
self.data_availability_checker.put_rpc_custody_columns(
|
||||
block_root,
|
||||
slot.epoch(T::EthSpec::slots_per_epoch()),
|
||||
custody_columns,
|
||||
)?;
|
||||
let availability = self.data_availability_checker.put_rpc_custody_columns(
|
||||
block_root,
|
||||
slot.epoch(T::EthSpec::slots_per_epoch()),
|
||||
custody_columns,
|
||||
)?;
|
||||
|
||||
self.process_availability(slot, availability, || Ok(()))
|
||||
.await
|
||||
.map(|result| (result, data_columns_to_publish))
|
||||
}
|
||||
|
||||
/// Imports a fully available block. Otherwise, returns `AvailabilityProcessingStatus::MissingComponents`
|
||||
|
||||
Reference in New Issue
Block a user