Add DataColumnSidecar gossip topic and message handling (#6147)

* Add `DataColumnSidecar` gossip topic and verification (#5050 and #5783).

* Remove gossip verification changes (#5783).

* Merge branch 'unstable' into data-column-gossip

# Conflicts:
#	beacon_node/beacon_chain/src/data_column_verification.rs
#	beacon_node/beacon_chain/src/lib.rs

* Add gossip cache timeout for data columns. Rename data column metrics for consistency.

* Remove usage of `unimplemented!` and address review comments.

* Remove unnused `GossipDataColumnError` variants and address review comments.

* Merge branch 'unstable' into data-column-gossip

* Update Cargo.lock

* Arc `ChainSpec` in discovery to avoid performance regression when needing to clone it repeatedly.
This commit is contained in:
Jimmy Chen
2024-07-25 16:05:18 +10:00
committed by GitHub
parent a2ab26c327
commit 4e5a363a4f
26 changed files with 907 additions and 31 deletions

View File

@@ -23,6 +23,7 @@ use crate::chain_config::ChainConfig;
use crate::data_availability_checker::{
Availability, AvailabilityCheckError, AvailableBlock, DataAvailabilityChecker,
};
use crate::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn};
use crate::early_attester_cache::EarlyAttesterCache;
use crate::errors::{BeaconChainError as Error, BlockProductionError};
use crate::eth1_chain::{Eth1Chain, Eth1ChainBackend};
@@ -2118,6 +2119,19 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
})
}
pub fn verify_data_column_sidecar_for_gossip(
self: &Arc<Self>,
data_column_sidecar: Arc<DataColumnSidecar<T::EthSpec>>,
subnet_id: u64,
) -> Result<GossipVerifiedDataColumn<T>, GossipDataColumnError> {
metrics::inc_counter(&metrics::DATA_COLUMN_SIDECAR_PROCESSING_REQUESTS);
let _timer = metrics::start_timer(&metrics::DATA_COLUMN_SIDECAR_GOSSIP_VERIFICATION_TIMES);
GossipVerifiedDataColumn::new(data_column_sidecar, subnet_id, self).map(|v| {
metrics::inc_counter(&metrics::DATA_COLUMN_SIDECAR_PROCESSING_SUCCESSES);
v
})
}
pub fn verify_blob_sidecar_for_gossip(
self: &Arc<Self>,
blob_sidecar: Arc<BlobSidecar<T::EthSpec>>,
@@ -2964,6 +2978,39 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
self.remove_notified(&block_root, r)
}
/// Cache the data columns in the processing cache, process it, then evict it from the cache if it was
/// imported or errors.
pub async fn process_gossip_data_columns(
self: &Arc<Self>,
data_columns: Vec<GossipVerifiedDataColumn<T>>,
) -> Result<AvailabilityProcessingStatus, BlockError<T::EthSpec>> {
let Ok(block_root) = data_columns
.iter()
.map(|c| c.block_root())
.unique()
.exactly_one()
else {
return Err(BlockError::InternalError(
"Columns should be from the same block".to_string(),
));
};
// If this block has already been imported to forkchoice it must have been available, so
// we don't need to process its samples again.
if self
.canonical_head
.fork_choice_read_lock()
.contains_block(&block_root)
{
return Err(BlockError::BlockIsAlreadyKnown(block_root));
}
let r = self
.check_gossip_data_columns_availability_and_import(data_columns)
.await;
self.remove_notified_custody_columns(&block_root, r)
}
/// Cache the blobs in the processing cache, process it, then evict it from the cache if it was
/// imported or errors.
pub async fn process_rpc_blobs(
@@ -3013,6 +3060,21 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
r
}
/// Remove any block components from the *processing cache* if we no longer require them. If the
/// block was imported full or erred, we no longer require them.
fn remove_notified_custody_columns(
&self,
block_root: &Hash256,
r: Result<AvailabilityProcessingStatus, BlockError<T::EthSpec>>,
) -> Result<AvailabilityProcessingStatus, BlockError<T::EthSpec>> {
let has_missing_components =
matches!(r, Ok(AvailabilityProcessingStatus::MissingComponents(_, _)));
if !has_missing_components {
self.reqresp_pre_import_cache.write().remove(block_root);
}
r
}
/// Wraps `process_block` in logic to cache the block's commitments in the processing cache
/// and evict if the block was imported or errored.
pub async fn process_block_with_early_caching<B: IntoExecutionPendingBlock<T>>(
@@ -3257,6 +3319,31 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
self.process_availability(slot, availability).await
}
/// Checks if the provided data column can make any cached blocks available, and imports immediately
/// if so, otherwise caches the data column in the data availability checker.
async fn check_gossip_data_columns_availability_and_import(
self: &Arc<Self>,
data_columns: Vec<GossipVerifiedDataColumn<T>>,
) -> Result<AvailabilityProcessingStatus, BlockError<T::EthSpec>> {
if let Some(slasher) = self.slasher.as_ref() {
for data_colum in &data_columns {
slasher.accept_block_header(data_colum.signed_block_header());
}
}
let Ok(slot) = data_columns.iter().map(|c| c.slot()).unique().exactly_one() else {
return Err(BlockError::InternalError(
"Columns for the same block should have matching slot".to_string(),
));
};
let availability = self
.data_availability_checker
.put_gossip_data_columns(data_columns)?;
self.process_availability(slot, availability).await
}
/// Checks if the provided blobs can make any cached blocks available, and imports immediately
/// if so, otherwise caches the blob in the data availability checker.
async fn check_rpc_blob_availability_and_import(

View File

@@ -54,6 +54,7 @@ use crate::block_verification_types::{
AsBlock, BlockContentsError, BlockImportData, GossipVerifiedBlockContents, RpcBlock,
};
use crate::data_availability_checker::{AvailabilityCheckError, MaybeAvailableBlock};
use crate::data_column_verification::GossipDataColumnError;
use crate::eth1_finalization_cache::Eth1FinalizationData;
use crate::execution_payload::{
is_optimistic_candidate_block, validate_execution_payload_for_gossip, validate_merge_block,
@@ -303,6 +304,13 @@ pub enum BlockError<E: EthSpec> {
/// TODO: We may need to penalize the peer that gave us a potentially invalid rpc blob.
/// https://github.com/sigp/lighthouse/issues/4546
AvailabilityCheck(AvailabilityCheckError),
/// An internal error has occurred when processing the block or sidecars.
///
/// ## Peer scoring
///
/// We were unable to process this block due to an internal error. It's unclear if the block is
/// valid.
InternalError(String),
}
impl<E: EthSpec> From<AvailabilityCheckError> for BlockError<E> {
@@ -523,6 +531,20 @@ impl<E: EthSpec> BlockSlashInfo<GossipBlobError<E>> {
}
}
impl BlockSlashInfo<GossipDataColumnError> {
pub fn from_early_error_data_column(
header: SignedBeaconBlockHeader,
e: GossipDataColumnError,
) -> Self {
match e {
GossipDataColumnError::ProposalSignatureInvalid => BlockSlashInfo::SignatureInvalid(e),
// `InvalidSignature` could indicate any signature in the block, so we want
// to recheck the proposer signature alone.
_ => BlockSlashInfo::SignatureNotChecked(header, e),
}
}
}
/// Process invalid blocks to see if they are suitable for the slasher.
///
/// If no slasher is configured, this is a no-op.
@@ -2007,6 +2029,23 @@ impl<E: EthSpec> BlockBlobError for GossipBlobError<E> {
}
}
impl BlockBlobError for GossipDataColumnError {
fn not_later_than_parent_error(data_column_slot: Slot, parent_slot: Slot) -> Self {
GossipDataColumnError::IsNotLaterThanParent {
data_column_slot,
parent_slot,
}
}
fn unknown_validator_error(validator_index: u64) -> Self {
GossipDataColumnError::UnknownValidator(validator_index)
}
fn proposer_signature_invalid() -> Self {
GossipDataColumnError::ProposalSignatureInvalid
}
}
/// Performs a cheap (time-efficient) state advancement so the committees and proposer shuffling for
/// `slot` can be obtained from `state`.
///

View File

@@ -20,6 +20,7 @@ mod error;
mod overflow_lru_cache;
mod state_lru_cache;
use crate::data_column_verification::GossipVerifiedDataColumn;
pub use error::{Error as AvailabilityCheckError, ErrorCategory as AvailabilityCheckErrorCategory};
use types::non_zero_usize::new_non_zero_usize;
@@ -188,6 +189,14 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
)
}
pub fn put_gossip_data_columns(
&self,
_gossip_data_columns: Vec<GossipVerifiedDataColumn<T>>,
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
// TODO(das) to be implemented
Err(AvailabilityCheckError::Unexpected)
}
/// Check if we have all the blobs for a block. Returns `Availability` which has information
/// about whether all components have been received or more are required.
pub fn put_pending_executed_block(

View File

@@ -1,8 +1,144 @@
use crate::block_verification::{process_block_slash_info, BlockSlashInfo};
use crate::{BeaconChain, BeaconChainError, BeaconChainTypes};
use derivative::Derivative;
use kzg::{Error as KzgError, Kzg};
use ssz_derive::{Decode, Encode};
use std::sync::Arc;
use types::data_column_sidecar::{ColumnIndex, DataColumnSidecar};
use types::EthSpec;
use types::data_column_sidecar::{ColumnIndex, DataColumnIdentifier};
use types::{
BeaconStateError, DataColumnSidecar, EthSpec, Hash256, RuntimeVariableList,
SignedBeaconBlockHeader, Slot,
};
/// An error occurred while validating a gossip data column.
#[derive(Debug)]
pub enum GossipDataColumnError {
/// There was an error whilst processing the data column. It is not known if it is
/// valid or invalid.
///
/// ## Peer scoring
///
/// We were unable to process this data column due to an internal error. It's
/// unclear if the data column is valid.
BeaconChainError(BeaconChainError),
/// The proposal signature in invalid.
///
/// ## Peer scoring
///
/// The data column is invalid and the peer is faulty.
ProposalSignatureInvalid,
/// The proposal_index corresponding to data column.beacon_block_root is not known.
///
/// ## Peer scoring
///
/// The data column is invalid and the peer is faulty.
UnknownValidator(u64),
/// The provided data column is not from a later slot than its parent.
///
/// ## Peer scoring
///
/// The data column is invalid and the peer is faulty.
IsNotLaterThanParent {
data_column_slot: Slot,
parent_slot: Slot,
},
/// `Kzg` struct hasn't been initialized. This is an internal error.
///
/// ## Peer scoring
///
/// The peer isn't faulty, This is an internal error.
KzgNotInitialized,
/// The kzg verification failed.
///
/// ## Peer scoring
///
/// The data column sidecar is invalid and the peer is faulty.
InvalidKzgProof(kzg::Error),
}
impl From<BeaconChainError> for GossipDataColumnError {
fn from(e: BeaconChainError) -> Self {
GossipDataColumnError::BeaconChainError(e)
}
}
impl From<BeaconStateError> for GossipDataColumnError {
fn from(e: BeaconStateError) -> Self {
GossipDataColumnError::BeaconChainError(BeaconChainError::BeaconStateError(e))
}
}
pub type GossipVerifiedDataColumnList<T> = RuntimeVariableList<GossipVerifiedDataColumn<T>>;
/// A wrapper around a `DataColumnSidecar` that indicates it has been approved for re-gossiping on
/// the p2p network.
#[derive(Debug)]
pub struct GossipVerifiedDataColumn<T: BeaconChainTypes> {
block_root: Hash256,
data_column: KzgVerifiedDataColumn<T::EthSpec>,
}
impl<T: BeaconChainTypes> GossipVerifiedDataColumn<T> {
pub fn new(
column_sidecar: Arc<DataColumnSidecar<T::EthSpec>>,
subnet_id: u64,
chain: &BeaconChain<T>,
) -> Result<Self, GossipDataColumnError> {
let header = column_sidecar.signed_block_header.clone();
// We only process slashing info if the gossip verification failed
// since we do not process the data column any further in that case.
validate_data_column_sidecar_for_gossip(column_sidecar, subnet_id, chain).map_err(|e| {
process_block_slash_info::<_, GossipDataColumnError>(
chain,
BlockSlashInfo::from_early_error_data_column(header, e),
)
})
}
pub fn id(&self) -> DataColumnIdentifier {
DataColumnIdentifier {
block_root: self.block_root,
index: self.data_column.data_column_index(),
}
}
pub fn block_root(&self) -> Hash256 {
self.block_root
}
pub fn slot(&self) -> Slot {
self.data_column.data.slot()
}
pub fn signed_block_header(&self) -> SignedBeaconBlockHeader {
self.data_column.data.signed_block_header.clone()
}
}
/// Wrapper over a `DataColumnSidecar` for which we have completed kzg verification.
#[derive(Debug, Derivative, Clone, Encode, Decode)]
#[derivative(PartialEq, Eq)]
#[ssz(struct_behaviour = "transparent")]
pub struct KzgVerifiedDataColumn<E: EthSpec> {
data: Arc<DataColumnSidecar<E>>,
}
impl<E: EthSpec> KzgVerifiedDataColumn<E> {
pub fn new(data_column: Arc<DataColumnSidecar<E>>, kzg: &Kzg) -> Result<Self, KzgError> {
verify_kzg_for_data_column(data_column, kzg)
}
pub fn as_data_column(&self) -> &DataColumnSidecar<E> {
&self.data
}
/// This is cheap as we're calling clone on an Arc
pub fn clone_data_column(&self) -> Arc<DataColumnSidecar<E>> {
self.data.clone()
}
pub fn data_column_index(&self) -> u64 {
self.data.index
}
}
/// Data column that we must custody and has completed kzg verification
#[derive(Debug, Derivative, Clone, Encode, Decode)]
@@ -17,3 +153,48 @@ impl<E: EthSpec> KzgVerifiedCustodyDataColumn<E> {
self.data.index
}
}
/// Complete kzg verification for a `DataColumnSidecar`.
///
/// Returns an error if the kzg verification check fails.
pub fn verify_kzg_for_data_column<E: EthSpec>(
data_column: Arc<DataColumnSidecar<E>>,
_kzg: &Kzg,
) -> Result<KzgVerifiedDataColumn<E>, KzgError> {
// TODO(das): KZG verification to be implemented
Ok(KzgVerifiedDataColumn { data: data_column })
}
/// Complete kzg verification for a list of `DataColumnSidecar`s.
/// Returns an error if any of the `DataColumnSidecar`s fails kzg verification.
///
/// Note: This function should be preferred over calling `verify_kzg_for_data_column`
/// in a loop since this function kzg verifies a list of data columns more efficiently.
pub fn verify_kzg_for_data_column_list<'a, E: EthSpec, I>(
_data_column_iter: I,
_kzg: &'a Kzg,
) -> Result<(), KzgError>
where
I: Iterator<Item = &'a Arc<DataColumnSidecar<E>>> + Clone,
{
// TODO(das): implement KZG verification
Ok(())
}
pub fn validate_data_column_sidecar_for_gossip<T: BeaconChainTypes>(
data_column: Arc<DataColumnSidecar<T::EthSpec>>,
_subnet: u64,
chain: &BeaconChain<T>,
) -> Result<GossipVerifiedDataColumn<T>, GossipDataColumnError> {
// TODO(das): implement gossip verification
let kzg = chain
.kzg
.clone()
.ok_or(GossipDataColumnError::KzgNotInitialized)?;
let kzg_verified_data_column = verify_kzg_for_data_column(data_column.clone(), &kzg)
.map_err(GossipDataColumnError::InvalidKzgProof)?;
Ok(GossipVerifiedDataColumn {
block_root: data_column.block_root(),
data_column: kzg_verified_data_column,
})
}

View File

@@ -19,7 +19,7 @@ pub mod canonical_head;
pub mod capella_readiness;
pub mod chain_config;
pub mod data_availability_checker;
mod data_column_verification;
pub mod data_column_verification;
pub mod deneb_readiness;
mod early_attester_cache;
pub mod electra_readiness;

View File

@@ -1048,6 +1048,18 @@ lazy_static! {
"blob_sidecar_inclusion_proof_computation_seconds",
"Time taken to compute blob sidecar inclusion proof"
);
pub static ref DATA_COLUMN_SIDECAR_PROCESSING_REQUESTS: Result<IntCounter> = try_create_int_counter(
"beacon_data_column_sidecar_processing_requests_total",
"Count of all data column sidecars submitted for processing"
);
pub static ref DATA_COLUMN_SIDECAR_PROCESSING_SUCCESSES: Result<IntCounter> = try_create_int_counter(
"beacon_data_column_sidecar_processing_successes_total",
"Number of data column sidecars verified for gossip"
);
pub static ref DATA_COLUMN_SIDECAR_GOSSIP_VERIFICATION_TIMES: Result<Histogram> = try_create_histogram(
"beacon_data_column_sidecar_gossip_verification_seconds",
"Full runtime of data column sidecars gossip verification"
);
}
// Fifth lazy-static block is used to account for macro recursion limit.