Verify getBlobsV2 response and avoid reprocessing imported data columns (#7493)

#7461 and partly #6439.

Desired behaviour after receiving `engine_getBlobs` response:

1. Gossip verify the blobs and proofs, but don't mark them as observed yet. This is because not all blobs are published immediately (due to staggered publishing). If we mark them as observed and not publish them, we could end up blocking the gossip propagation.
2. Blobs are marked as observed _either_ when:
* They are received from gossip and forwarded to the network .
* They are published by the node.

Current behaviour:
-  We only gossip verify `engine_getBlobsV1` responses, but not `engine_getBlobsV2` responses (PeerDAS).
-  After importing EL blobs AND before they're published, if the same blobs arrive via gossip, they will get re-processed, which may result in a re-import.


  1. Perform gossip verification on data columns computed from EL `getBlobsV2` response. We currently only do this for `getBlobsV1` to prevent importing blobs with invalid proofs into the `DataAvailabilityChecker`, this should be done on V2 responses too.
2. Add additional gossip verification to make sure we don't re-process a ~~blob~~ or data column that was imported via the EL `getBlobs` but not yet "seen" on the gossip network. If an "unobserved" gossip blob is found in the availability cache, then we know it has passed verification so we can immediately propagate the `ACCEPT` result and forward it to the network, but without re-processing it.

**UPDATE:** I've left blobs out for the second change mentioned above, as the likelihood and impact is very slow and we haven't seen it enough, but under PeerDAS this issue is a regular occurrence and we do see the same block getting imported many times.
This commit is contained in:
Jimmy Chen
2025-05-27 05:55:58 +10:00
committed by GitHub
parent f01dc556d1
commit e6ef644db4
12 changed files with 371 additions and 160 deletions

View File

@@ -1,4 +1,5 @@
use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob};
use crate::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn};
use crate::fetch_blobs::{EngineGetBlobsOutput, FetchEngineBlobError};
use crate::observed_data_sidecars::DoNotObserve;
use crate::{AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes};
@@ -8,7 +9,7 @@ use kzg::Kzg;
use mockall::automock;
use std::sync::Arc;
use task_executor::TaskExecutor;
use types::{BlobSidecar, ChainSpec, Hash256, Slot};
use types::{BlobSidecar, ChainSpec, DataColumnSidecar, Hash256, Slot};
/// An adapter to the `BeaconChain` functionalities to remove `BeaconChain` from direct dependency to enable testing fetch blobs logic.
pub(crate) struct FetchBlobsBeaconAdapter<T: BeaconChainTypes> {
@@ -74,11 +75,19 @@ impl<T: BeaconChainTypes> FetchBlobsBeaconAdapter<T> {
GossipVerifiedBlob::<T, DoNotObserve>::new(blob.clone(), blob.index, &self.chain)
}
pub(crate) fn verify_data_column_for_gossip(
&self,
data_column: Arc<DataColumnSidecar<T::EthSpec>>,
) -> Result<GossipVerifiedDataColumn<T, DoNotObserve>, GossipDataColumnError> {
let index = data_column.index;
GossipVerifiedDataColumn::<T, DoNotObserve>::new(data_column, index, &self.chain)
}
pub(crate) async fn process_engine_blobs(
&self,
slot: Slot,
block_root: Hash256,
blobs: EngineGetBlobsOutput<T::EthSpec>,
blobs: EngineGetBlobsOutput<T>,
) -> Result<AvailabilityProcessingStatus, FetchEngineBlobError> {
self.chain
.process_engine_blobs(slot, block_root, blobs)

View File

@@ -13,6 +13,7 @@ mod fetch_blobs_beacon_adapter;
mod tests;
use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob};
use crate::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn};
#[cfg_attr(test, double)]
use crate::fetch_blobs::fetch_blobs_beacon_adapter::FetchBlobsBeaconAdapter;
use crate::kzg_utils::blobs_to_data_column_sidecars;
@@ -34,24 +35,17 @@ use tracing::{debug, warn};
use types::blob_sidecar::{BlobSidecarError, FixedBlobSidecarList};
use types::data_column_sidecar::DataColumnSidecarError;
use types::{
BeaconStateError, Blob, BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecarList, EthSpec,
FullPayload, Hash256, KzgProofs, SignedBeaconBlock, SignedBeaconBlockHeader, VersionedHash,
BeaconStateError, Blob, BlobSidecar, ChainSpec, ColumnIndex, EthSpec, FullPayload, Hash256,
KzgProofs, SignedBeaconBlock, SignedBeaconBlockHeader, VersionedHash,
};
/// Blobs or data column to be published to the gossip network.
pub enum BlobsOrDataColumns<T: BeaconChainTypes> {
/// Result from engine get blobs to be passed onto `DataAvailabilityChecker` and published to the
/// gossip network. The blobs / data columns have not been marked as observed yet, as they may not
/// be published immediately.
pub enum EngineGetBlobsOutput<T: BeaconChainTypes> {
Blobs(Vec<GossipVerifiedBlob<T, DoNotObserve>>),
DataColumns(DataColumnSidecarList<T::EthSpec>),
}
/// Result from engine get blobs to be passed onto `DataAvailabilityChecker`.
///
/// The blobs are retrieved from a trusted EL and columns are computed locally, therefore they are
/// considered valid without requiring extra validation.
pub enum EngineGetBlobsOutput<E: EthSpec> {
Blobs(FixedBlobSidecarList<E>),
/// A filtered list of custody data columns to be imported into the `DataAvailabilityChecker`.
CustodyColumns(DataColumnSidecarList<E>),
CustodyColumns(Vec<GossipVerifiedDataColumn<T, DoNotObserve>>),
}
#[derive(Debug)]
@@ -64,6 +58,7 @@ pub enum FetchEngineBlobError {
ExecutionLayerMissing,
InternalError(String),
GossipBlob(GossipBlobError),
GossipDataColumn(GossipDataColumnError),
RequestFailed(ExecutionLayerError),
RuntimeShutdown,
TokioJoin(tokio::task::JoinError),
@@ -76,7 +71,7 @@ pub async fn fetch_and_process_engine_blobs<T: BeaconChainTypes>(
block_root: Hash256,
block: Arc<SignedBeaconBlock<T::EthSpec, FullPayload<T::EthSpec>>>,
custody_columns: HashSet<ColumnIndex>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + 'static,
publish_fn: impl Fn(EngineGetBlobsOutput<T>) + Send + 'static,
) -> Result<Option<AvailabilityProcessingStatus>, FetchEngineBlobError> {
fetch_and_process_engine_blobs_inner(
FetchBlobsBeaconAdapter::new(chain),
@@ -95,7 +90,7 @@ async fn fetch_and_process_engine_blobs_inner<T: BeaconChainTypes>(
block_root: Hash256,
block: Arc<SignedBeaconBlock<T::EthSpec, FullPayload<T::EthSpec>>>,
custody_columns: HashSet<ColumnIndex>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + 'static,
publish_fn: impl Fn(EngineGetBlobsOutput<T>) + Send + 'static,
) -> Result<Option<AvailabilityProcessingStatus>, FetchEngineBlobError> {
let versioned_hashes = if let Some(kzg_commitments) = block
.message()
@@ -148,7 +143,7 @@ async fn fetch_and_process_blobs_v1<T: BeaconChainTypes>(
block_root: Hash256,
block: Arc<SignedBeaconBlock<T::EthSpec>>,
versioned_hashes: Vec<VersionedHash>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + Sized,
publish_fn: impl Fn(EngineGetBlobsOutput<T>) + Send + Sized,
) -> Result<Option<AvailabilityProcessingStatus>, FetchEngineBlobError> {
let num_expected_blobs = versioned_hashes.len();
metrics::observe(&metrics::BLOBS_FROM_EL_EXPECTED, num_expected_blobs as f64);
@@ -189,7 +184,7 @@ async fn fetch_and_process_blobs_v1<T: BeaconChainTypes>(
// and be accepted (and propagated) while we are waiting to publish. Just before publishing
// we will observe the blobs/columns and only proceed with publishing if they are not yet seen.
let blobs_to_import_and_publish = fixed_blob_sidecar_list
.iter()
.into_iter()
.filter_map(|opt_blob| {
let blob = opt_blob.as_ref()?;
match chain_adapter.verify_blob_for_gossip(blob) {
@@ -203,7 +198,9 @@ async fn fetch_and_process_blobs_v1<T: BeaconChainTypes>(
.map_err(FetchEngineBlobError::GossipBlob)?;
if !blobs_to_import_and_publish.is_empty() {
publish_fn(BlobsOrDataColumns::Blobs(blobs_to_import_and_publish));
publish_fn(EngineGetBlobsOutput::Blobs(
blobs_to_import_and_publish.clone(),
));
}
debug!(num_fetched_blobs, "Processing engine blobs");
@@ -212,7 +209,7 @@ async fn fetch_and_process_blobs_v1<T: BeaconChainTypes>(
.process_engine_blobs(
block.slot(),
block_root,
EngineGetBlobsOutput::Blobs(fixed_blob_sidecar_list.clone()),
EngineGetBlobsOutput::Blobs(blobs_to_import_and_publish),
)
.await?;
@@ -225,7 +222,7 @@ async fn fetch_and_process_blobs_v2<T: BeaconChainTypes>(
block: Arc<SignedBeaconBlock<T::EthSpec>>,
versioned_hashes: Vec<VersionedHash>,
custody_columns_indices: HashSet<ColumnIndex>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + 'static,
publish_fn: impl Fn(EngineGetBlobsOutput<T>) + Send + 'static,
) -> Result<Option<AvailabilityProcessingStatus>, FetchEngineBlobError> {
let num_expected_blobs = versioned_hashes.len();
@@ -278,6 +275,7 @@ async fn fetch_and_process_blobs_v2<T: BeaconChainTypes>(
return Ok(None);
}
let chain_adapter = Arc::new(chain_adapter);
let custody_columns = compute_and_publish_data_columns(
&chain_adapter,
block.clone(),
@@ -303,15 +301,16 @@ async fn fetch_and_process_blobs_v2<T: BeaconChainTypes>(
/// Offload the data column computation to a blocking task to avoid holding up the async runtime.
async fn compute_and_publish_data_columns<T: BeaconChainTypes>(
chain_adapter: &FetchBlobsBeaconAdapter<T>,
chain_adapter: &Arc<FetchBlobsBeaconAdapter<T>>,
block: Arc<SignedBeaconBlock<T::EthSpec, FullPayload<T::EthSpec>>>,
blobs: Vec<Blob<T::EthSpec>>,
proofs: Vec<KzgProofs<T::EthSpec>>,
custody_columns_indices: HashSet<ColumnIndex>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + 'static,
) -> Result<DataColumnSidecarList<T::EthSpec>, FetchEngineBlobError> {
publish_fn: impl Fn(EngineGetBlobsOutput<T>) + Send + 'static,
) -> Result<Vec<GossipVerifiedDataColumn<T, DoNotObserve>>, FetchEngineBlobError> {
let kzg = chain_adapter.kzg().clone();
let spec = chain_adapter.spec().clone();
let chain_adapter_cloned = chain_adapter.clone();
chain_adapter
.executor()
.spawn_blocking_handle(
@@ -338,8 +337,54 @@ async fn compute_and_publish_data_columns<T: BeaconChainTypes>(
})
.map_err(FetchEngineBlobError::DataColumnSidecarError)?;
publish_fn(BlobsOrDataColumns::DataColumns(custody_columns.clone()));
Ok(custody_columns)
// Gossip verify data columns before publishing. This prevents blobs with invalid
// KZG proofs from the EL making it into the data availability checker. We do not
// immediately add these blobs to the observed blobs/columns cache because we want
// to allow blobs/columns to arrive on gossip and be accepted (and propagated) while
// we are waiting to publish. Just before publishing we will observe the blobs/columns
// and only proceed with publishing if they are not yet seen.
// TODO(das): we may want to just perform kzg proof verification here, since the
// `DataColumnSidecar` and inclusion proof is computed just above and is unnecessary
// to verify them.
let columns_to_import_and_publish = custody_columns
.into_iter()
.filter_map(|col| {
match chain_adapter_cloned.verify_data_column_for_gossip(col) {
Ok(verified) => Some(Ok(verified)),
Err(e) => match e {
// Ignore already seen data columns
GossipDataColumnError::PriorKnown { .. }
| GossipDataColumnError::PriorKnownUnpublished => None,
GossipDataColumnError::BeaconChainError(_)
| GossipDataColumnError::ProposalSignatureInvalid
| GossipDataColumnError::UnknownValidator(_)
| GossipDataColumnError::IsNotLaterThanParent { .. }
| GossipDataColumnError::InvalidKzgProof(_)
| GossipDataColumnError::InvalidSubnetId { .. }
| GossipDataColumnError::FutureSlot { .. }
| GossipDataColumnError::PastFinalizedSlot { .. }
| GossipDataColumnError::PubkeyCacheTimeout
| GossipDataColumnError::ProposerIndexMismatch { .. }
| GossipDataColumnError::ParentUnknown { .. }
| GossipDataColumnError::NotFinalizedDescendant { .. }
| GossipDataColumnError::InvalidInclusionProof
| GossipDataColumnError::InvalidColumnIndex(_)
| GossipDataColumnError::UnexpectedDataColumn
| GossipDataColumnError::InconsistentCommitmentsLength { .. }
| GossipDataColumnError::InconsistentProofsLength { .. } => {
Some(Err(e))
}
},
}
})
.collect::<Result<Vec<_>, _>>()
.map_err(FetchEngineBlobError::GossipDataColumn)?;
publish_fn(EngineGetBlobsOutput::CustodyColumns(
columns_to_import_and_publish.clone(),
));
Ok(columns_to_import_and_publish)
},
"compute_and_publish_data_columns",
)

View File

@@ -1,6 +1,7 @@
use crate::data_column_verification::GossipVerifiedDataColumn;
use crate::fetch_blobs::fetch_blobs_beacon_adapter::MockFetchBlobsBeaconAdapter;
use crate::fetch_blobs::{
fetch_and_process_engine_blobs_inner, BlobsOrDataColumns, FetchEngineBlobError,
fetch_and_process_engine_blobs_inner, EngineGetBlobsOutput, FetchEngineBlobError,
};
use crate::test_utils::{get_kzg, EphemeralHarnessType};
use crate::AvailabilityProcessingStatus;
@@ -148,6 +149,9 @@ async fn test_fetch_blobs_v2_success() {
// All blobs returned, fork choice doesn't contain block
mock_get_blobs_v2_response(&mut mock_adapter, Some(blobs_and_proofs));
mock_fork_choice_contains_block(&mut mock_adapter, vec![]);
mock_adapter
.expect_verify_data_column_for_gossip()
.returning(|c| Ok(GossipVerifiedDataColumn::__new_for_testing(c)));
mock_process_engine_blobs_result(
&mut mock_adapter,
Ok(AvailabilityProcessingStatus::Imported(block_root)),
@@ -174,16 +178,16 @@ async fn test_fetch_blobs_v2_success() {
assert!(
matches!(
published_columns,
BlobsOrDataColumns::DataColumns (columns) if columns.len() == custody_columns.len()
EngineGetBlobsOutput::CustodyColumns(columns) if columns.len() == custody_columns.len()
),
"should publish custody columns"
);
}
/// Extract the `BlobsOrDataColumns` passed to the `publish_fn`.
/// Extract the `EngineGetBlobsOutput` passed to the `publish_fn`.
fn extract_published_blobs(
publish_fn_args: Arc<Mutex<Vec<BlobsOrDataColumns<T>>>>,
) -> BlobsOrDataColumns<T> {
publish_fn_args: Arc<Mutex<Vec<EngineGetBlobsOutput<T>>>>,
) -> EngineGetBlobsOutput<T> {
let mut calls = publish_fn_args.lock().unwrap();
assert_eq!(calls.len(), 1);
calls.pop().unwrap()
@@ -250,8 +254,8 @@ fn create_test_block_and_blobs(
#[allow(clippy::type_complexity)]
fn mock_publish_fn() -> (
impl Fn(BlobsOrDataColumns<T>) + Send + 'static,
Arc<Mutex<Vec<BlobsOrDataColumns<T>>>>,
impl Fn(EngineGetBlobsOutput<T>) + Send + 'static,
Arc<Mutex<Vec<EngineGetBlobsOutput<T>>>>,
) {
// Keep track of the arguments captured by `publish_fn`.
let captured_args = Arc::new(Mutex::new(vec![]));