mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-11 18:04:18 +00:00
Improving blob propagation post-PeerDAS with Decentralized Blob Building (#6268)
* Get blobs from EL. Co-authored-by: Michael Sproul <michael@sigmaprime.io> * Avoid cloning blobs after fetching blobs. * Address review comments and refactor code. * Fix lint. * Move blob computation metric to the right spot. * Merge branch 'unstable' into das-fetch-blobs * Merge branch 'unstable' into das-fetch-blobs # Conflicts: # beacon_node/beacon_chain/src/beacon_chain.rs # beacon_node/beacon_chain/src/block_verification.rs # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs * Merge branch 'unstable' into das-fetch-blobs # Conflicts: # beacon_node/beacon_chain/src/beacon_chain.rs * Gradual publication of data columns for supernodes. * Recompute head after importing block with blobs from the EL. * Fix lint * Merge branch 'unstable' into das-fetch-blobs * Use blocking task instead of async when computing cells. * Merge branch 'das-fetch-blobs' of github.com:jimmygchen/lighthouse into das-fetch-blobs * Merge remote-tracking branch 'origin/unstable' into das-fetch-blobs * Fix semantic conflicts * Downgrade error log. * Merge branch 'unstable' into das-fetch-blobs # Conflicts: # beacon_node/beacon_chain/src/data_availability_checker.rs # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs # beacon_node/execution_layer/src/engine_api.rs # beacon_node/execution_layer/src/engine_api/json_structures.rs # beacon_node/network/src/network_beacon_processor/gossip_methods.rs # beacon_node/network/src/network_beacon_processor/mod.rs # beacon_node/network/src/network_beacon_processor/sync_methods.rs * Merge branch 'unstable' into das-fetch-blobs * Publish block without waiting for blob and column proof computation. * Address review comments and refactor. * Merge branch 'unstable' into das-fetch-blobs * Fix test and docs. * Comment cleanups. * Merge branch 'unstable' into das-fetch-blobs * Address review comments and cleanup * Address review comments and cleanup * Refactor to de-duplicate gradual publication logic. * Add more logging. * Merge remote-tracking branch 'origin/unstable' into das-fetch-blobs # Conflicts: # Cargo.lock * Fix incorrect comparison on `num_fetched_blobs`. * Implement gradual blob publication. * Merge branch 'unstable' into das-fetch-blobs * Inline `publish_fn`. * Merge branch 'das-fetch-blobs' of github.com:jimmygchen/lighthouse into das-fetch-blobs * Gossip verify blobs before publishing * Avoid queries for 0 blobs and error for duplicates * Gossip verified engine blob before processing them, and use observe cache to detect duplicates before publishing. * Merge branch 'das-fetch-blobs' of github.com:jimmygchen/lighthouse into das-fetch-blobs # Conflicts: # beacon_node/network/src/network_beacon_processor/mod.rs * Merge branch 'unstable' into das-fetch-blobs * Fix invalid commitment inclusion proofs in blob sidecars created from EL blobs. * Only publish EL blobs triggered from gossip block, and not RPC block. * Downgrade gossip blob log to `debug`. * Merge branch 'unstable' into das-fetch-blobs * Merge branch 'unstable' into das-fetch-blobs * Grammar
This commit is contained in:
@@ -914,18 +914,15 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
let blob_slot = verified_blob.slot();
|
||||
let blob_index = verified_blob.id().index;
|
||||
|
||||
let result = self
|
||||
.chain
|
||||
.process_gossip_blob(verified_blob, || Ok(()))
|
||||
.await;
|
||||
let result = self.chain.process_gossip_blob(verified_blob).await;
|
||||
|
||||
match &result {
|
||||
Ok(AvailabilityProcessingStatus::Imported(block_root)) => {
|
||||
// Note: Reusing block imported metric here
|
||||
metrics::inc_counter(&metrics::BEACON_PROCESSOR_GOSSIP_BLOCK_IMPORTED_TOTAL);
|
||||
info!(
|
||||
debug!(
|
||||
self.log,
|
||||
"Gossipsub blob processed, imported fully available block";
|
||||
"Gossipsub blob processed - imported fully available block";
|
||||
"block_root" => %block_root
|
||||
);
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
@@ -936,9 +933,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
);
|
||||
}
|
||||
Ok(AvailabilityProcessingStatus::MissingComponents(slot, block_root)) => {
|
||||
trace!(
|
||||
debug!(
|
||||
self.log,
|
||||
"Processed blob, waiting for other components";
|
||||
"Processed gossip blob - waiting for other components";
|
||||
"slot" => %slot,
|
||||
"blob_index" => %blob_index,
|
||||
"block_root" => %block_root,
|
||||
@@ -1079,7 +1076,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
message_id,
|
||||
peer_id,
|
||||
peer_client,
|
||||
block,
|
||||
block.clone(),
|
||||
reprocess_tx.clone(),
|
||||
seen_duration,
|
||||
)
|
||||
@@ -1497,6 +1494,13 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
"slot" => slot,
|
||||
"block_root" => %block_root,
|
||||
);
|
||||
|
||||
// Block is valid, we can now attempt fetching blobs from EL using version hashes
|
||||
// derived from kzg commitments from the block, without having to wait for all blobs
|
||||
// to be sent from the peers if we already have them.
|
||||
let publish_blobs = true;
|
||||
self.fetch_engine_blobs_and_publish(block.clone(), *block_root, publish_blobs)
|
||||
.await;
|
||||
}
|
||||
Err(BlockError::ParentUnknown { .. }) => {
|
||||
// This should not occur. It should be checked by `should_forward_block`.
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
use crate::sync::manager::BlockProcessType;
|
||||
use crate::sync::SamplingId;
|
||||
use crate::{service::NetworkMessage, sync::manager::SyncMessage};
|
||||
use beacon_chain::blob_verification::{GossipBlobError, GossipVerifiedBlob};
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::data_column_verification::{observe_gossip_data_column, GossipDataColumnError};
|
||||
use beacon_chain::fetch_blobs::{
|
||||
fetch_and_process_engine_blobs, BlobsOrDataColumns, FetchEngineBlobError,
|
||||
};
|
||||
use beacon_chain::observed_data_sidecars::DoNotObserve;
|
||||
use beacon_chain::{
|
||||
builder::Witness, eth1_chain::CachingEth1Backend, AvailabilityProcessingStatus, BeaconChain,
|
||||
BeaconChainTypes, BlockError, NotifyExecutionLayer,
|
||||
};
|
||||
use beacon_chain::{BeaconChainTypes, NotifyExecutionLayer};
|
||||
use beacon_processor::{
|
||||
work_reprocessing_queue::ReprocessQueueMessage, BeaconProcessorChannels, BeaconProcessorSend,
|
||||
DuplicateCache, GossipAggregatePackage, GossipAttestationPackage, Work,
|
||||
@@ -21,7 +27,8 @@ use lighthouse_network::{
|
||||
rpc::{BlocksByRangeRequest, BlocksByRootRequest, LightClientBootstrapRequest, StatusMessage},
|
||||
Client, MessageId, NetworkGlobals, PeerId, PubsubMessage,
|
||||
};
|
||||
use slog::{debug, error, trace, Logger};
|
||||
use rand::prelude::SliceRandom;
|
||||
use slog::{debug, error, trace, warn, Logger};
|
||||
use slot_clock::ManualSlotClock;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
@@ -67,6 +74,9 @@ pub struct NetworkBeaconProcessor<T: BeaconChainTypes> {
|
||||
pub log: Logger,
|
||||
}
|
||||
|
||||
// Publish blobs in batches of exponentially increasing size.
|
||||
const BLOB_PUBLICATION_EXP_FACTOR: usize = 2;
|
||||
|
||||
impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
fn try_send(&self, event: BeaconWorkEvent<T::EthSpec>) -> Result<(), Error<T::EthSpec>> {
|
||||
self.beacon_processor_send
|
||||
@@ -878,6 +888,79 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
});
|
||||
}
|
||||
|
||||
pub async fn fetch_engine_blobs_and_publish(
|
||||
self: &Arc<Self>,
|
||||
block: Arc<SignedBeaconBlock<T::EthSpec, FullPayload<T::EthSpec>>>,
|
||||
block_root: Hash256,
|
||||
publish_blobs: bool,
|
||||
) {
|
||||
let self_cloned = self.clone();
|
||||
let publish_fn = move |blobs_or_data_column| {
|
||||
if publish_blobs {
|
||||
match blobs_or_data_column {
|
||||
BlobsOrDataColumns::Blobs(blobs) => {
|
||||
self_cloned.publish_blobs_gradually(blobs, block_root);
|
||||
}
|
||||
BlobsOrDataColumns::DataColumns(columns) => {
|
||||
self_cloned.publish_data_columns_gradually(columns, block_root);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
match fetch_and_process_engine_blobs(
|
||||
self.chain.clone(),
|
||||
block_root,
|
||||
block.clone(),
|
||||
publish_fn,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Some(availability)) => match availability {
|
||||
AvailabilityProcessingStatus::Imported(_) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Block components retrieved from EL";
|
||||
"result" => "imported block and custody columns",
|
||||
"block_root" => %block_root,
|
||||
);
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
}
|
||||
AvailabilityProcessingStatus::MissingComponents(_, _) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Still missing blobs after engine blobs processed successfully";
|
||||
"block_root" => %block_root,
|
||||
);
|
||||
}
|
||||
},
|
||||
Ok(None) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Fetch blobs completed without import";
|
||||
"block_root" => %block_root,
|
||||
);
|
||||
}
|
||||
Err(FetchEngineBlobError::BlobProcessingError(BlockError::DuplicateFullyImported(
|
||||
..,
|
||||
))) => {
|
||||
debug!(
|
||||
self.log,
|
||||
"Fetch blobs duplicate import";
|
||||
"block_root" => %block_root,
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
self.log,
|
||||
"Error fetching or processing blobs from EL";
|
||||
"error" => ?e,
|
||||
"block_root" => %block_root,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempt to reconstruct all data columns if the following conditions satisfies:
|
||||
/// - Our custody requirement is all columns
|
||||
/// - We have >= 50% of columns, but not all columns
|
||||
@@ -885,25 +968,13 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
/// Returns `Some(AvailabilityProcessingStatus)` if reconstruction is successfully performed,
|
||||
/// otherwise returns `None`.
|
||||
async fn attempt_data_column_reconstruction(
|
||||
&self,
|
||||
self: &Arc<Self>,
|
||||
block_root: Hash256,
|
||||
) -> Option<AvailabilityProcessingStatus> {
|
||||
let result = self.chain.reconstruct_data_columns(block_root).await;
|
||||
match result {
|
||||
Ok(Some((availability_processing_status, data_columns_to_publish))) => {
|
||||
self.send_network_message(NetworkMessage::Publish {
|
||||
messages: data_columns_to_publish
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let subnet = DataColumnSubnetId::from_column_index::<T::EthSpec>(
|
||||
d.index as usize,
|
||||
&self.chain.spec,
|
||||
);
|
||||
PubsubMessage::DataColumnSidecar(Box::new((subnet, d.clone())))
|
||||
})
|
||||
.collect(),
|
||||
});
|
||||
|
||||
self.publish_data_columns_gradually(data_columns_to_publish, block_root);
|
||||
match &availability_processing_status {
|
||||
AvailabilityProcessingStatus::Imported(hash) => {
|
||||
debug!(
|
||||
@@ -946,6 +1017,175 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This function gradually publishes blobs to the network in randomised batches.
|
||||
///
|
||||
/// This is an optimisation to reduce outbound bandwidth and ensures each blob is published
|
||||
/// by some nodes on the network as soon as possible. Our hope is that some blobs arrive from
|
||||
/// other nodes in the meantime, obviating the need for us to publish them. If no other
|
||||
/// publisher exists for a blob, it will eventually get published here.
|
||||
fn publish_blobs_gradually(
|
||||
self: &Arc<Self>,
|
||||
mut blobs: Vec<GossipVerifiedBlob<T, DoNotObserve>>,
|
||||
block_root: Hash256,
|
||||
) {
|
||||
let self_clone = self.clone();
|
||||
|
||||
self.executor.spawn(
|
||||
async move {
|
||||
let chain = self_clone.chain.clone();
|
||||
let log = self_clone.chain.logger();
|
||||
let publish_fn = |blobs: Vec<Arc<BlobSidecar<T::EthSpec>>>| {
|
||||
self_clone.send_network_message(NetworkMessage::Publish {
|
||||
messages: blobs
|
||||
.into_iter()
|
||||
.map(|blob| PubsubMessage::BlobSidecar(Box::new((blob.index, blob))))
|
||||
.collect(),
|
||||
});
|
||||
};
|
||||
|
||||
// Permute the blobs and split them into batches.
|
||||
// The hope is that we won't need to publish some blobs because we will receive them
|
||||
// on gossip from other nodes.
|
||||
blobs.shuffle(&mut rand::thread_rng());
|
||||
|
||||
let blob_publication_batch_interval = chain.config.blob_publication_batch_interval;
|
||||
let mut publish_count = 0usize;
|
||||
let blob_count = blobs.len();
|
||||
let mut blobs_iter = blobs.into_iter().peekable();
|
||||
let mut batch_size = 1usize;
|
||||
|
||||
while blobs_iter.peek().is_some() {
|
||||
let batch = blobs_iter.by_ref().take(batch_size);
|
||||
let publishable = batch
|
||||
.filter_map(|unobserved| match unobserved.observe(&chain) {
|
||||
Ok(observed) => Some(observed.clone_blob()),
|
||||
Err(GossipBlobError::RepeatBlob { .. }) => None,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
log,
|
||||
"Previously verified blob is invalid";
|
||||
"error" => ?e
|
||||
);
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if !publishable.is_empty() {
|
||||
debug!(
|
||||
log,
|
||||
"Publishing blob batch";
|
||||
"publish_count" => publishable.len(),
|
||||
"block_root" => ?block_root,
|
||||
);
|
||||
publish_count += publishable.len();
|
||||
publish_fn(publishable);
|
||||
}
|
||||
|
||||
tokio::time::sleep(blob_publication_batch_interval).await;
|
||||
batch_size *= BLOB_PUBLICATION_EXP_FACTOR;
|
||||
}
|
||||
|
||||
debug!(
|
||||
log,
|
||||
"Batch blob publication complete";
|
||||
"batch_interval" => blob_publication_batch_interval.as_millis(),
|
||||
"blob_count" => blob_count,
|
||||
"published_count" => publish_count,
|
||||
"block_root" => ?block_root,
|
||||
)
|
||||
},
|
||||
"gradual_blob_publication",
|
||||
);
|
||||
}
|
||||
|
||||
/// This function gradually publishes data columns to the network in randomised batches.
|
||||
///
|
||||
/// This is an optimisation to reduce outbound bandwidth and ensures each column is published
|
||||
/// by some nodes on the network as soon as possible. Our hope is that some columns arrive from
|
||||
/// other supernodes in the meantime, obviating the need for us to publish them. If no other
|
||||
/// publisher exists for a column, it will eventually get published here.
|
||||
fn publish_data_columns_gradually(
|
||||
self: &Arc<Self>,
|
||||
mut data_columns_to_publish: DataColumnSidecarList<T::EthSpec>,
|
||||
block_root: Hash256,
|
||||
) {
|
||||
let self_clone = self.clone();
|
||||
|
||||
self.executor.spawn(
|
||||
async move {
|
||||
let chain = self_clone.chain.clone();
|
||||
let log = self_clone.chain.logger();
|
||||
let publish_fn = |columns: DataColumnSidecarList<T::EthSpec>| {
|
||||
self_clone.send_network_message(NetworkMessage::Publish {
|
||||
messages: columns
|
||||
.into_iter()
|
||||
.map(|d| {
|
||||
let subnet = DataColumnSubnetId::from_column_index::<T::EthSpec>(
|
||||
d.index as usize,
|
||||
&chain.spec,
|
||||
);
|
||||
PubsubMessage::DataColumnSidecar(Box::new((subnet, d)))
|
||||
})
|
||||
.collect(),
|
||||
});
|
||||
};
|
||||
|
||||
// If this node is a super node, permute the columns and split them into batches.
|
||||
// The hope is that we won't need to publish some columns because we will receive them
|
||||
// on gossip from other supernodes.
|
||||
data_columns_to_publish.shuffle(&mut rand::thread_rng());
|
||||
|
||||
let blob_publication_batch_interval = chain.config.blob_publication_batch_interval;
|
||||
let blob_publication_batches = chain.config.blob_publication_batches;
|
||||
let batch_size = chain.spec.number_of_columns / blob_publication_batches;
|
||||
let mut publish_count = 0usize;
|
||||
|
||||
for batch in data_columns_to_publish.chunks(batch_size) {
|
||||
let publishable = batch
|
||||
.iter()
|
||||
.filter_map(|col| match observe_gossip_data_column(col, &chain) {
|
||||
Ok(()) => Some(col.clone()),
|
||||
Err(GossipDataColumnError::PriorKnown { .. }) => None,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
log,
|
||||
"Previously verified data column is invalid";
|
||||
"error" => ?e
|
||||
);
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if !publishable.is_empty() {
|
||||
debug!(
|
||||
log,
|
||||
"Publishing data column batch";
|
||||
"publish_count" => publishable.len(),
|
||||
"block_root" => ?block_root,
|
||||
);
|
||||
publish_count += publishable.len();
|
||||
publish_fn(publishable);
|
||||
}
|
||||
|
||||
tokio::time::sleep(blob_publication_batch_interval).await;
|
||||
}
|
||||
|
||||
debug!(
|
||||
log,
|
||||
"Batch data column publishing complete";
|
||||
"batch_size" => batch_size,
|
||||
"batch_interval" => blob_publication_batch_interval.as_millis(),
|
||||
"data_columns_to_publish_count" => data_columns_to_publish.len(),
|
||||
"published_count" => publish_count,
|
||||
"block_root" => ?block_root,
|
||||
)
|
||||
},
|
||||
"gradual_data_column_publication",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
type TestBeaconChainType<E> =
|
||||
|
||||
@@ -153,6 +153,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
"process_type" => ?process_type,
|
||||
);
|
||||
|
||||
let signed_beacon_block = block.block_cloned();
|
||||
let result = self
|
||||
.chain
|
||||
.process_block_with_early_caching(
|
||||
@@ -166,26 +167,37 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
metrics::inc_counter(&metrics::BEACON_PROCESSOR_RPC_BLOCK_IMPORTED_TOTAL);
|
||||
|
||||
// RPC block imported, regardless of process type
|
||||
if let &Ok(AvailabilityProcessingStatus::Imported(hash)) = &result {
|
||||
info!(self.log, "New RPC block received"; "slot" => slot, "hash" => %hash);
|
||||
match result.as_ref() {
|
||||
Ok(AvailabilityProcessingStatus::Imported(hash)) => {
|
||||
info!(self.log, "New RPC block received"; "slot" => slot, "hash" => %hash);
|
||||
|
||||
// Trigger processing for work referencing this block.
|
||||
let reprocess_msg = ReprocessQueueMessage::BlockImported {
|
||||
block_root: hash,
|
||||
parent_root,
|
||||
};
|
||||
if reprocess_tx.try_send(reprocess_msg).is_err() {
|
||||
error!(self.log, "Failed to inform block import"; "source" => "rpc", "block_root" => %hash)
|
||||
};
|
||||
self.chain.block_times_cache.write().set_time_observed(
|
||||
hash,
|
||||
slot,
|
||||
seen_timestamp,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
// Trigger processing for work referencing this block.
|
||||
let reprocess_msg = ReprocessQueueMessage::BlockImported {
|
||||
block_root: *hash,
|
||||
parent_root,
|
||||
};
|
||||
if reprocess_tx.try_send(reprocess_msg).is_err() {
|
||||
error!(self.log, "Failed to inform block import"; "source" => "rpc", "block_root" => %hash)
|
||||
};
|
||||
self.chain.block_times_cache.write().set_time_observed(
|
||||
*hash,
|
||||
slot,
|
||||
seen_timestamp,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
self.chain.recompute_head_at_current_slot().await;
|
||||
}
|
||||
Ok(AvailabilityProcessingStatus::MissingComponents(..)) => {
|
||||
// Block is valid, we can now attempt fetching blobs from EL using version hashes
|
||||
// derived from kzg commitments from the block, without having to wait for all blobs
|
||||
// to be sent from the peers if we already have them.
|
||||
let publish_blobs = false;
|
||||
self.fetch_engine_blobs_and_publish(signed_beacon_block, block_root, publish_blobs)
|
||||
.await
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// RPC block imported or execution validated. If the block was already imported by gossip we
|
||||
|
||||
Reference in New Issue
Block a user