mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-14 18:32:42 +00:00
Refactor data column reconstruction and avoid blocking processing (#6403)
* Move reconstruction logic out of `overflow_lru_cache` to simplify the code and avoids having to pass `DataColumnsToPublish` around and blocking other processing. * Publish reconstructed cells before recomputing head. Remove duplicate functions. * Merge branch 'unstable' into non-blocking-reconstruction * Merge branch 'unstable' into non-blocking-reconstruction # Conflicts: # beacon_node/beacon_chain/src/beacon_chain.rs # beacon_node/beacon_chain/src/data_availability_checker.rs # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs # beacon_node/network/src/network_beacon_processor/sync_methods.rs * Spawn a blocking task for reconstruction. * Merge branch 'unstable' into non-blocking-reconstruction # Conflicts: # beacon_node/network/src/network_beacon_processor/mod.rs * Fix fmt * Merge branch 'unstable' into non-blocking-reconstruction # Conflicts: # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs * Fix race condition by making check and mutation atomic as suggested by Lion. Also added error handling to reconstruction failure. * Add reconstruction reason metric and more debug logging to da checker. * Add comment and logging. * Rename `NotRequired` to `NotStarted`. * Remove extra character added.
This commit is contained in:
@@ -4,6 +4,7 @@ use crate::{
|
||||
service::NetworkMessage,
|
||||
sync::SyncMessage,
|
||||
};
|
||||
use beacon_chain::blob_verification::{GossipBlobError, GossipVerifiedBlob};
|
||||
use beacon_chain::block_verification_types::AsBlock;
|
||||
use beacon_chain::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn};
|
||||
use beacon_chain::store::Error;
|
||||
@@ -18,13 +19,7 @@ use beacon_chain::{
|
||||
AvailabilityProcessingStatus, BeaconChainError, BeaconChainTypes, BlockError, ForkChoiceError,
|
||||
GossipVerifiedBlock, NotifyExecutionLayer,
|
||||
};
|
||||
use beacon_chain::{
|
||||
blob_verification::{GossipBlobError, GossipVerifiedBlob},
|
||||
data_availability_checker::DataColumnsToPublish,
|
||||
};
|
||||
use lighthouse_network::{
|
||||
Client, MessageAcceptance, MessageId, PeerAction, PeerId, PubsubMessage, ReportSource,
|
||||
};
|
||||
use lighthouse_network::{Client, MessageAcceptance, MessageId, PeerAction, PeerId, ReportSource};
|
||||
use operation_pool::ReceivedPreCapella;
|
||||
use slog::{crit, debug, error, info, trace, warn, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
@@ -171,26 +166,6 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn handle_data_columns_to_publish(
|
||||
&self,
|
||||
data_columns_to_publish: DataColumnsToPublish<T::EthSpec>,
|
||||
) {
|
||||
if let Some(data_columns_to_publish) = data_columns_to_publish {
|
||||
self.send_network_message(NetworkMessage::Publish {
|
||||
messages: data_columns_to_publish
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let subnet = DataColumnSubnetId::from_column_index::<T::EthSpec>(
|
||||
d.index as usize,
|
||||
&self.chain.spec,
|
||||
);
|
||||
PubsubMessage::DataColumnSidecar(Box::new((subnet, d.clone())))
|
||||
})
|
||||
.collect(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Send a message on `message_tx` that the `message_id` sent by `peer_id` should be propagated on
|
||||
/// the gossip network.
|
||||
///
|
||||
@@ -1022,9 +997,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
.process_gossip_data_columns(vec![verified_data_column], || Ok(()))
|
||||
.await
|
||||
{
|
||||
Ok((availability, data_columns_to_publish)) => {
|
||||
self.handle_data_columns_to_publish(data_columns_to_publish);
|
||||
|
||||
Ok(availability) => {
|
||||
match availability {
|
||||
AvailabilityProcessingStatus::Imported(block_root) => {
|
||||
// Note: Reusing block imported metric here
|
||||
@@ -1052,7 +1025,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
"block_root" => %block_root,
|
||||
);
|
||||
|
||||
// Potentially trigger reconstruction
|
||||
self.attempt_data_column_reconstruction(block_root).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,9 @@ use crate::sync::manager::BlockProcessType;
|
||||
use crate::sync::SamplingId;
|
||||
use crate::{service::NetworkMessage, sync::manager::SyncMessage};
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::{builder::Witness, eth1_chain::CachingEth1Backend, BeaconChain};
|
||||
use beacon_chain::{
|
||||
builder::Witness, eth1_chain::CachingEth1Backend, AvailabilityProcessingStatus, BeaconChain,
|
||||
};
|
||||
use beacon_chain::{BeaconChainTypes, NotifyExecutionLayer};
|
||||
use beacon_processor::{
|
||||
work_reprocessing_queue::ReprocessQueueMessage, BeaconProcessorChannels, BeaconProcessorSend,
|
||||
@@ -16,9 +18,9 @@ use lighthouse_network::rpc::methods::{
|
||||
use lighthouse_network::rpc::{RequestId, SubstreamId};
|
||||
use lighthouse_network::{
|
||||
rpc::{BlocksByRangeRequest, BlocksByRootRequest, LightClientBootstrapRequest, StatusMessage},
|
||||
Client, MessageId, NetworkGlobals, PeerId,
|
||||
Client, MessageId, NetworkGlobals, PeerId, PubsubMessage,
|
||||
};
|
||||
use slog::{debug, Logger};
|
||||
use slog::{debug, error, trace, Logger};
|
||||
use slot_clock::ManualSlotClock;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
@@ -848,6 +850,75 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
"error" => %e)
|
||||
});
|
||||
}
|
||||
|
||||
/// Attempt to reconstruct all data columns if the following conditions satisfies:
|
||||
/// - Our custody requirement is all columns
|
||||
/// - We have >= 50% of columns, but not all columns
|
||||
///
|
||||
/// Returns `Some(AvailabilityProcessingStatus)` if reconstruction is successfully performed,
|
||||
/// otherwise returns `None`.
|
||||
async fn attempt_data_column_reconstruction(
|
||||
&self,
|
||||
block_root: Hash256,
|
||||
) -> Option<AvailabilityProcessingStatus> {
|
||||
let result = self.chain.reconstruct_data_columns(block_root).await;
|
||||
match result {
|
||||
Ok(Some((availability_processing_status, data_columns_to_publish))) => {
|
||||
self.send_network_message(NetworkMessage::Publish {
|
||||
messages: data_columns_to_publish
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let subnet = DataColumnSubnetId::from_column_index::<T::EthSpec>(
|
||||
d.index as usize,
|
||||
&self.chain.spec,
|
||||
);
|
||||
PubsubMessage::DataColumnSidecar(Box::new((subnet, d.clone())))
|
||||
})
|
||||
.collect(),
|
||||
});
|
||||
|
||||
match &availability_processing_status {
|
||||
AvailabilityProcessingStatus::Imported(hash) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Block components available via reconstruction";
|
||||
"result" => "imported block and custody columns",
|
||||
"block_hash" => %hash,
|
||||
);
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
}
|
||||
AvailabilityProcessingStatus::MissingComponents(_, _) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Block components still missing block after reconstruction";
|
||||
"result" => "imported all custody columns",
|
||||
"block_hash" => %block_root,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Some(availability_processing_status)
|
||||
}
|
||||
Ok(None) => {
|
||||
// reason is tracked via the `KZG_DATA_COLUMN_RECONSTRUCTION_INCOMPLETE_TOTAL` metric
|
||||
trace!(
|
||||
self.log,
|
||||
"Reconstruction not required for block";
|
||||
"block_hash" => %block_root,
|
||||
);
|
||||
None
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
self.log,
|
||||
"Error during data column reconstruction";
|
||||
"block_root" => %block_root,
|
||||
"error" => ?e
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type TestBeaconChainType<E> =
|
||||
|
||||
@@ -327,34 +327,37 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
_seen_timestamp: Duration,
|
||||
process_type: BlockProcessType,
|
||||
) {
|
||||
let result = self
|
||||
let mut result = self
|
||||
.chain
|
||||
.process_rpc_custody_columns(custody_columns)
|
||||
.await;
|
||||
|
||||
match &result {
|
||||
Ok((availability, data_columns_to_publish)) => {
|
||||
self.handle_data_columns_to_publish(data_columns_to_publish.clone());
|
||||
|
||||
match availability {
|
||||
AvailabilityProcessingStatus::Imported(hash) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Block components retrieved";
|
||||
"result" => "imported block and custody columns",
|
||||
"block_hash" => %hash,
|
||||
);
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
}
|
||||
AvailabilityProcessingStatus::MissingComponents(_, _) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Missing components over rpc";
|
||||
"block_hash" => %block_root,
|
||||
);
|
||||
Ok(availability) => match availability {
|
||||
AvailabilityProcessingStatus::Imported(hash) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Block components retrieved";
|
||||
"result" => "imported block and custody columns",
|
||||
"block_hash" => %hash,
|
||||
);
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
}
|
||||
AvailabilityProcessingStatus::MissingComponents(_, _) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Missing components over rpc";
|
||||
"block_hash" => %block_root,
|
||||
);
|
||||
// Attempt reconstruction here before notifying sync, to avoid sending out more requests
|
||||
// that we may no longer need.
|
||||
if let Some(availability) =
|
||||
self.attempt_data_column_reconstruction(block_root).await
|
||||
{
|
||||
result = Ok(availability)
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(BlockError::DuplicateFullyImported(_)) => {
|
||||
debug!(
|
||||
self.log,
|
||||
@@ -374,7 +377,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
|
||||
self.send_sync_message(SyncMessage::BlockComponentProcessed {
|
||||
process_type,
|
||||
result: result.map(|(r, _)| r).into(),
|
||||
result: result.into(),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user