Update engine_getBlobsV2 response type and add getBlobsV2 tests (#7505)

Update `engine_getBlobsV2` response type to `Option<Vec<BlobsAndProofV2>>`. See recent spec change [here](https://github.com/ethereum/execution-apis/pull/630).

Added some tests to cover basic fetch blob scenarios.
This commit is contained in:
Jimmy Chen
2025-05-26 14:33:34 +10:00
committed by GitHub
parent a2797d4bbd
commit f01dc556d1
8 changed files with 549 additions and 63 deletions

View File

@@ -0,0 +1,389 @@
//! This module implements an optimisation to fetch blobs via JSON-RPC from the EL.
//! If a blob has already been seen in the public mempool, then it is often unnecessary to wait for
//! it to arrive on P2P gossip. This PR uses a new JSON-RPC method (`engine_getBlobsV1`) which
//! allows the CL to load the blobs quickly from the EL's blob pool.
//!
//! Once the node fetches the blobs from EL, it then publishes the remaining blobs that it hasn't seen
//! on P2P gossip to the network. From PeerDAS onwards, together with the increase in blob count,
//! broadcasting blobs requires a much higher bandwidth, and is only done by high capacity
//! supernodes.
mod fetch_blobs_beacon_adapter;
#[cfg(test)]
mod tests;
use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob};
#[cfg_attr(test, double)]
use crate::fetch_blobs::fetch_blobs_beacon_adapter::FetchBlobsBeaconAdapter;
use crate::kzg_utils::blobs_to_data_column_sidecars;
use crate::observed_data_sidecars::DoNotObserve;
use crate::{
metrics, AvailabilityProcessingStatus, BeaconChain, BeaconChainError, BeaconChainTypes,
BlockError,
};
use execution_layer::json_structures::{BlobAndProofV1, BlobAndProofV2};
use execution_layer::Error as ExecutionLayerError;
use metrics::{inc_counter, TryExt};
#[cfg(test)]
use mockall_double::double;
use ssz_types::FixedVector;
use state_processing::per_block_processing::deneb::kzg_commitment_to_versioned_hash;
use std::collections::HashSet;
use std::sync::Arc;
use tracing::{debug, warn};
use types::blob_sidecar::{BlobSidecarError, FixedBlobSidecarList};
use types::data_column_sidecar::DataColumnSidecarError;
use types::{
BeaconStateError, Blob, BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecarList, EthSpec,
FullPayload, Hash256, KzgProofs, SignedBeaconBlock, SignedBeaconBlockHeader, VersionedHash,
};
/// Blobs or data column to be published to the gossip network.
pub enum BlobsOrDataColumns<T: BeaconChainTypes> {
Blobs(Vec<GossipVerifiedBlob<T, DoNotObserve>>),
DataColumns(DataColumnSidecarList<T::EthSpec>),
}
/// Result from engine get blobs to be passed onto `DataAvailabilityChecker`.
///
/// The blobs are retrieved from a trusted EL and columns are computed locally, therefore they are
/// considered valid without requiring extra validation.
pub enum EngineGetBlobsOutput<E: EthSpec> {
Blobs(FixedBlobSidecarList<E>),
/// A filtered list of custody data columns to be imported into the `DataAvailabilityChecker`.
CustodyColumns(DataColumnSidecarList<E>),
}
#[derive(Debug)]
pub enum FetchEngineBlobError {
BeaconStateError(BeaconStateError),
BeaconChainError(Box<BeaconChainError>),
BlobProcessingError(BlockError),
BlobSidecarError(BlobSidecarError),
DataColumnSidecarError(DataColumnSidecarError),
ExecutionLayerMissing,
InternalError(String),
GossipBlob(GossipBlobError),
RequestFailed(ExecutionLayerError),
RuntimeShutdown,
TokioJoin(tokio::task::JoinError),
}
/// Fetches blobs from the EL mempool and processes them. It also broadcasts unseen blobs or
/// data columns (PeerDAS onwards) to the network, using the supplied `publish_fn`.
pub async fn fetch_and_process_engine_blobs<T: BeaconChainTypes>(
chain: Arc<BeaconChain<T>>,
block_root: Hash256,
block: Arc<SignedBeaconBlock<T::EthSpec, FullPayload<T::EthSpec>>>,
custody_columns: HashSet<ColumnIndex>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + 'static,
) -> Result<Option<AvailabilityProcessingStatus>, FetchEngineBlobError> {
fetch_and_process_engine_blobs_inner(
FetchBlobsBeaconAdapter::new(chain),
block_root,
block,
custody_columns,
publish_fn,
)
.await
}
/// Internal implementation of fetch blobs, which uses `FetchBlobsBeaconAdapter` instead of
/// `BeaconChain` for better testability.
async fn fetch_and_process_engine_blobs_inner<T: BeaconChainTypes>(
chain_adapter: FetchBlobsBeaconAdapter<T>,
block_root: Hash256,
block: Arc<SignedBeaconBlock<T::EthSpec, FullPayload<T::EthSpec>>>,
custody_columns: HashSet<ColumnIndex>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + 'static,
) -> Result<Option<AvailabilityProcessingStatus>, FetchEngineBlobError> {
let versioned_hashes = if let Some(kzg_commitments) = block
.message()
.body()
.blob_kzg_commitments()
.ok()
.filter(|blobs| !blobs.is_empty())
{
kzg_commitments
.iter()
.map(kzg_commitment_to_versioned_hash)
.collect::<Vec<_>>()
} else {
debug!("Fetch blobs not triggered - none required");
return Ok(None);
};
debug!(
num_expected_blobs = versioned_hashes.len(),
"Fetching blobs from the EL"
);
if chain_adapter
.spec()
.is_peer_das_enabled_for_epoch(block.epoch())
{
fetch_and_process_blobs_v2(
chain_adapter,
block_root,
block,
versioned_hashes,
custody_columns,
publish_fn,
)
.await
} else {
fetch_and_process_blobs_v1(
chain_adapter,
block_root,
block,
versioned_hashes,
publish_fn,
)
.await
}
}
async fn fetch_and_process_blobs_v1<T: BeaconChainTypes>(
chain_adapter: FetchBlobsBeaconAdapter<T>,
block_root: Hash256,
block: Arc<SignedBeaconBlock<T::EthSpec>>,
versioned_hashes: Vec<VersionedHash>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + Sized,
) -> Result<Option<AvailabilityProcessingStatus>, FetchEngineBlobError> {
let num_expected_blobs = versioned_hashes.len();
metrics::observe(&metrics::BLOBS_FROM_EL_EXPECTED, num_expected_blobs as f64);
debug!(num_expected_blobs, "Fetching blobs from the EL");
let response = chain_adapter
.get_blobs_v1(versioned_hashes)
.await
.inspect_err(|_| {
inc_counter(&metrics::BLOBS_FROM_EL_ERROR_TOTAL);
})?;
let num_fetched_blobs = response.iter().filter(|opt| opt.is_some()).count();
metrics::observe(&metrics::BLOBS_FROM_EL_RECEIVED, num_fetched_blobs as f64);
if num_fetched_blobs == 0 {
debug!(num_expected_blobs, "No blobs fetched from the EL");
inc_counter(&metrics::BLOBS_FROM_EL_MISS_TOTAL);
return Ok(None);
} else {
inc_counter(&metrics::BLOBS_FROM_EL_HIT_TOTAL);
}
let (signed_block_header, kzg_commitments_proof) = block
.signed_block_header_and_kzg_commitments_proof()
.map_err(FetchEngineBlobError::BeaconStateError)?;
let fixed_blob_sidecar_list = build_blob_sidecars(
&block,
response,
signed_block_header,
&kzg_commitments_proof,
chain_adapter.spec(),
)?;
// Gossip verify blobs before publishing. This prevents blobs with invalid KZG proofs from
// the EL making it into the data availability checker. We do not immediately add these
// blobs to the observed blobs/columns cache because we want to allow blobs/columns to arrive on gossip
// and be accepted (and propagated) while we are waiting to publish. Just before publishing
// we will observe the blobs/columns and only proceed with publishing if they are not yet seen.
let blobs_to_import_and_publish = fixed_blob_sidecar_list
.iter()
.filter_map(|opt_blob| {
let blob = opt_blob.as_ref()?;
match chain_adapter.verify_blob_for_gossip(blob) {
Ok(verified) => Some(Ok(verified)),
// Ignore already seen blobs.
Err(GossipBlobError::RepeatBlob { .. }) => None,
Err(e) => Some(Err(e)),
}
})
.collect::<Result<Vec<_>, _>>()
.map_err(FetchEngineBlobError::GossipBlob)?;
if !blobs_to_import_and_publish.is_empty() {
publish_fn(BlobsOrDataColumns::Blobs(blobs_to_import_and_publish));
}
debug!(num_fetched_blobs, "Processing engine blobs");
let availability_processing_status = chain_adapter
.process_engine_blobs(
block.slot(),
block_root,
EngineGetBlobsOutput::Blobs(fixed_blob_sidecar_list.clone()),
)
.await?;
Ok(Some(availability_processing_status))
}
async fn fetch_and_process_blobs_v2<T: BeaconChainTypes>(
chain_adapter: FetchBlobsBeaconAdapter<T>,
block_root: Hash256,
block: Arc<SignedBeaconBlock<T::EthSpec>>,
versioned_hashes: Vec<VersionedHash>,
custody_columns_indices: HashSet<ColumnIndex>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + 'static,
) -> Result<Option<AvailabilityProcessingStatus>, FetchEngineBlobError> {
let num_expected_blobs = versioned_hashes.len();
metrics::observe(&metrics::BLOBS_FROM_EL_EXPECTED, num_expected_blobs as f64);
debug!(num_expected_blobs, "Fetching blobs from the EL");
let response = chain_adapter
.get_blobs_v2(versioned_hashes)
.await
.inspect_err(|_| {
inc_counter(&metrics::BLOBS_FROM_EL_ERROR_TOTAL);
})?;
let Some(blobs_and_proofs) = response else {
debug!(num_expected_blobs, "No blobs fetched from the EL");
inc_counter(&metrics::BLOBS_FROM_EL_MISS_TOTAL);
return Ok(None);
};
let (blobs, proofs): (Vec<_>, Vec<_>) = blobs_and_proofs
.into_iter()
.map(|blob_and_proof| {
let BlobAndProofV2 { blob, proofs } = blob_and_proof;
(blob, proofs)
})
.unzip();
let num_fetched_blobs = blobs.len();
metrics::observe(&metrics::BLOBS_FROM_EL_RECEIVED, num_fetched_blobs as f64);
if num_fetched_blobs != num_expected_blobs {
// This scenario is not supposed to happen if the EL is spec compliant.
// It should either return all requested blobs or none, but NOT partial responses.
// If we attempt to compute columns with partial blobs, we'd end up with invalid columns.
warn!(
num_fetched_blobs,
num_expected_blobs, "The EL did not return all requested blobs"
);
inc_counter(&metrics::BLOBS_FROM_EL_MISS_TOTAL);
return Ok(None);
}
inc_counter(&metrics::BLOBS_FROM_EL_HIT_TOTAL);
if chain_adapter.fork_choice_contains_block(&block_root) {
// Avoid computing columns if the block has already been imported.
debug!(
info = "block has already been imported",
"Ignoring EL blobs response"
);
return Ok(None);
}
let custody_columns = compute_and_publish_data_columns(
&chain_adapter,
block.clone(),
blobs,
proofs,
custody_columns_indices,
publish_fn,
)
.await?;
debug!(num_fetched_blobs, "Processing engine blobs");
let availability_processing_status = chain_adapter
.process_engine_blobs(
block.slot(),
block_root,
EngineGetBlobsOutput::CustodyColumns(custody_columns),
)
.await?;
Ok(Some(availability_processing_status))
}
/// Offload the data column computation to a blocking task to avoid holding up the async runtime.
async fn compute_and_publish_data_columns<T: BeaconChainTypes>(
chain_adapter: &FetchBlobsBeaconAdapter<T>,
block: Arc<SignedBeaconBlock<T::EthSpec, FullPayload<T::EthSpec>>>,
blobs: Vec<Blob<T::EthSpec>>,
proofs: Vec<KzgProofs<T::EthSpec>>,
custody_columns_indices: HashSet<ColumnIndex>,
publish_fn: impl Fn(BlobsOrDataColumns<T>) + Send + 'static,
) -> Result<DataColumnSidecarList<T::EthSpec>, FetchEngineBlobError> {
let kzg = chain_adapter.kzg().clone();
let spec = chain_adapter.spec().clone();
chain_adapter
.executor()
.spawn_blocking_handle(
move || {
let mut timer = metrics::start_timer_vec(
&metrics::DATA_COLUMN_SIDECAR_COMPUTATION,
&[&blobs.len().to_string()],
);
let blob_refs = blobs.iter().collect::<Vec<_>>();
let cell_proofs = proofs.into_iter().flatten().collect();
let data_columns_result =
blobs_to_data_column_sidecars(&blob_refs, cell_proofs, &block, &kzg, &spec)
.discard_timer_on_break(&mut timer);
drop(timer);
// This filtering ensures we only import and publish the custody columns.
// `DataAvailabilityChecker` requires a strict match on custody columns count to
// consider a block available.
let custody_columns = data_columns_result
.map(|mut data_columns| {
data_columns.retain(|col| custody_columns_indices.contains(&col.index));
data_columns
})
.map_err(FetchEngineBlobError::DataColumnSidecarError)?;
publish_fn(BlobsOrDataColumns::DataColumns(custody_columns.clone()));
Ok(custody_columns)
},
"compute_and_publish_data_columns",
)
.ok_or(FetchEngineBlobError::RuntimeShutdown)?
.await
.map_err(FetchEngineBlobError::TokioJoin)?
}
fn build_blob_sidecars<E: EthSpec>(
block: &Arc<SignedBeaconBlock<E, FullPayload<E>>>,
response: Vec<Option<BlobAndProofV1<E>>>,
signed_block_header: SignedBeaconBlockHeader,
kzg_commitments_inclusion_proof: &FixedVector<Hash256, E::KzgCommitmentsInclusionProofDepth>,
spec: &ChainSpec,
) -> Result<FixedBlobSidecarList<E>, FetchEngineBlobError> {
let epoch = block.epoch();
let mut fixed_blob_sidecar_list =
FixedBlobSidecarList::default(spec.max_blobs_per_block(epoch) as usize);
for (index, blob_and_proof) in response
.into_iter()
.enumerate()
.filter_map(|(i, opt_blob)| Some((i, opt_blob?)))
{
match BlobSidecar::new_with_existing_proof(
index,
blob_and_proof.blob,
block,
signed_block_header.clone(),
kzg_commitments_inclusion_proof,
blob_and_proof.proof,
) {
Ok(blob) => {
if let Some(blob_mut) = fixed_blob_sidecar_list.get_mut(index) {
*blob_mut = Some(Arc::new(blob));
} else {
return Err(FetchEngineBlobError::InternalError(format!(
"Blobs from EL contains blob with invalid index {index}"
)));
}
}
Err(e) => {
return Err(FetchEngineBlobError::BlobSidecarError(e));
}
}
}
Ok(fixed_blob_sidecar_list)
}