Merge branch 'unstable' into max-blobs-preset

This commit is contained in:
Pawan Dhananjay
2024-10-21 14:46:00 -07:00
287 changed files with 10187 additions and 9415 deletions

View File

@@ -1,11 +1,11 @@
use kzg::{Error as KzgError, KzgCommitment};
use types::{BeaconStateError, Hash256};
use types::{BeaconStateError, ColumnIndex, Hash256};
#[derive(Debug)]
pub enum Error {
Kzg(KzgError),
KzgNotInitialized,
KzgVerificationFailed,
InvalidBlobs(KzgError),
InvalidColumn(ColumnIndex, KzgError),
ReconstructColumnsError(KzgError),
KzgCommitmentMismatch {
blob_commitment: KzgCommitment,
block_commitment: KzgCommitment,
@@ -36,8 +36,7 @@ pub enum ErrorCategory {
impl Error {
pub fn category(&self) -> ErrorCategory {
match self {
Error::KzgNotInitialized
| Error::SszTypes(_)
Error::SszTypes(_)
| Error::MissingBlobs
| Error::MissingCustodyColumns
| Error::StoreError(_)
@@ -48,11 +47,12 @@ impl Error {
| Error::UnableToDetermineImportRequirement
| Error::RebuildingStateCaches(_)
| Error::SlotClockError => ErrorCategory::Internal,
Error::Kzg(_)
Error::InvalidBlobs { .. }
| Error::InvalidColumn { .. }
| Error::ReconstructColumnsError { .. }
| Error::BlobIndexInvalid(_)
| Error::DataColumnIndexInvalid(_)
| Error::KzgCommitmentMismatch { .. }
| Error::KzgVerificationFailed => ErrorCategory::Malicious,
| Error::KzgCommitmentMismatch { .. } => ErrorCategory::Malicious,
}
}
}
@@ -80,9 +80,3 @@ impl From<state_processing::BlockReplayError> for Error {
Self::BlockReplayError(value)
}
}
impl From<KzgError> for Error {
fn from(value: KzgError) -> Self {
Self::Kzg(value)
}
}

View File

@@ -6,23 +6,19 @@ use crate::block_verification_types::{
};
use crate::data_availability_checker::{Availability, AvailabilityCheckError};
use crate::data_column_verification::KzgVerifiedCustodyDataColumn;
use crate::metrics;
use crate::BeaconChainTypes;
use kzg::Kzg;
use lru::LruCache;
use parking_lot::RwLock;
use std::collections::HashSet;
use slog::{debug, Logger};
use std::num::NonZeroUsize;
use std::sync::Arc;
use types::blob_sidecar::BlobIdentifier;
use types::runtime_var_list::RuntimeFixedList;
use types::{
BlobSidecar, ChainSpec, ColumnIndex, DataColumnIdentifier, DataColumnSidecar,
DataColumnSidecarList, Epoch, EthSpec, Hash256, RuntimeVariableList, SignedBeaconBlock,
BlobSidecar, ChainSpec, ColumnIndex, DataColumnIdentifier, DataColumnSidecar, Epoch, EthSpec,
Hash256, RuntimeVariableList, SignedBeaconBlock,
};
pub type DataColumnsToPublish<E> = Option<DataColumnSidecarList<E>>;
/// This represents the components of a partially available block
///
/// The blobs are all gossip and kzg verified.
@@ -40,7 +36,7 @@ pub struct PendingComponents<E: EthSpec> {
pub enum BlockImportRequirement {
AllBlobs,
CustodyColumns(usize),
ColumnSampling(usize),
}
impl<E: EthSpec> PendingComponents<E> {
@@ -91,7 +87,7 @@ impl<E: EthSpec> PendingComponents<E> {
/// block.
///
/// This corresponds to the number of commitments that are present in a block.
pub fn num_expected_blobs(&self) -> Option<usize> {
pub fn block_kzg_commitments_count(&self) -> Option<usize> {
self.get_cached_block()
.as_ref()
.map(|b| b.get_commitments().len())
@@ -196,21 +192,61 @@ impl<E: EthSpec> PendingComponents<E> {
///
/// Returns `true` if both the block exists and the number of received blobs / custody columns
/// matches the number of expected blobs / custody columns.
pub fn is_available(&self, block_import_requirement: &BlockImportRequirement) -> bool {
pub fn is_available(
&self,
block_import_requirement: &BlockImportRequirement,
log: &Logger,
) -> bool {
let block_kzg_commitments_count_opt = self.block_kzg_commitments_count();
match block_import_requirement {
BlockImportRequirement::AllBlobs => self
.num_expected_blobs()
.map_or(false, |num_expected_blobs| {
num_expected_blobs == self.num_received_blobs()
}),
BlockImportRequirement::CustodyColumns(num_expected_columns) => {
let num_received_data_columns = self.num_received_data_columns();
BlockImportRequirement::AllBlobs => {
let received_blobs = self.num_received_blobs();
let expected_blobs_msg = block_kzg_commitments_count_opt
.as_ref()
.map(|num| num.to_string())
.unwrap_or("unknown".to_string());
debug!(log,
"Component(s) added to data availability checker";
"block_root" => ?self.block_root,
"received_block" => block_kzg_commitments_count_opt.is_some(),
"received_blobs" => received_blobs,
"expected_blobs" => expected_blobs_msg,
);
block_kzg_commitments_count_opt.map_or(false, |num_expected_blobs| {
num_expected_blobs == received_blobs
})
}
BlockImportRequirement::ColumnSampling(num_expected_columns) => {
// No data columns when there are 0 blobs
self.num_expected_blobs()
.map_or(false, |num_expected_blobs| {
num_expected_blobs == 0
|| *num_expected_columns == num_received_data_columns
})
let expected_columns_opt = block_kzg_commitments_count_opt.map(|blob_count| {
if blob_count > 0 {
*num_expected_columns
} else {
0
}
});
let expected_columns_msg = expected_columns_opt
.as_ref()
.map(|num| num.to_string())
.unwrap_or("unknown".to_string());
let num_received_columns = self.num_received_data_columns();
debug!(log,
"Component(s) added to data availability checker";
"block_root" => ?self.block_root,
"received_block" => block_kzg_commitments_count_opt.is_some(),
"received_columns" => num_received_columns,
"expected_columns" => expected_columns_msg,
);
expected_columns_opt.map_or(false, |num_expected_columns| {
num_expected_columns == num_received_columns
})
}
}
}
@@ -280,7 +316,7 @@ impl<E: EthSpec> PendingComponents<E> {
None,
)
}
BlockImportRequirement::CustodyColumns(_) => {
BlockImportRequirement::ColumnSampling(_) => {
let verified_data_columns = verified_data_columns
.into_iter()
.map(|d| d.into_inner())
@@ -310,10 +346,6 @@ impl<E: EthSpec> PendingComponents<E> {
)))
}
pub fn reconstruction_started(&mut self) {
self.reconstruction_started = true;
}
/// Returns the epoch of the block if it is cached, otherwise returns the epoch of the first blob.
pub fn epoch(&self) -> Option<Epoch> {
self.executed_block
@@ -352,28 +384,37 @@ pub struct DataAvailabilityCheckerInner<T: BeaconChainTypes> {
/// This cache holds a limited number of states in memory and reconstructs them
/// from disk when necessary. This is necessary until we merge tree-states
state_cache: StateLRUCache<T>,
/// The number of data columns the node is custodying.
custody_column_count: usize,
/// The number of data columns the node is sampling via subnet sampling.
sampling_column_count: usize,
spec: Arc<ChainSpec>,
}
// This enum is only used internally within the crate in the reconstruction function to improve
// readability, so it's OK to not box the variant value, and it shouldn't impact memory much with
// the current usage, as it's deconstructed immediately.
#[allow(clippy::large_enum_variant)]
pub(crate) enum ReconstructColumnsDecision<E: EthSpec> {
Yes(PendingComponents<E>),
No(&'static str),
}
impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
pub fn new(
capacity: NonZeroUsize,
beacon_store: BeaconStore<T>,
custody_column_count: usize,
sampling_column_count: usize,
spec: Arc<ChainSpec>,
) -> Result<Self, AvailabilityCheckError> {
Ok(Self {
critical: RwLock::new(LruCache::new(capacity)),
state_cache: StateLRUCache::new(beacon_store, spec.clone()),
custody_column_count,
sampling_column_count,
spec,
})
}
pub fn custody_subnet_count(&self) -> usize {
self.custody_column_count
pub fn sampling_column_count(&self) -> usize {
self.sampling_column_count
}
/// Returns true if the block root is known, without altering the LRU ordering
@@ -439,41 +480,20 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
) -> Result<BlockImportRequirement, AvailabilityCheckError> {
let peer_das_enabled = self.spec.is_peer_das_enabled_for_epoch(epoch);
if peer_das_enabled {
Ok(BlockImportRequirement::CustodyColumns(
self.custody_column_count,
Ok(BlockImportRequirement::ColumnSampling(
self.sampling_column_count,
))
} else {
Ok(BlockImportRequirement::AllBlobs)
}
}
/// Potentially trigger reconstruction if:
/// - Our custody requirement is all columns
/// - We >= 50% of columns, but not all columns
fn should_reconstruct(
&self,
block_import_requirement: &BlockImportRequirement,
pending_components: &PendingComponents<T::EthSpec>,
) -> bool {
let BlockImportRequirement::CustodyColumns(num_expected_columns) = block_import_requirement
else {
return false;
};
let num_of_columns = self.spec.number_of_columns;
let has_missing_columns = pending_components.verified_data_columns.len() < num_of_columns;
has_missing_columns
&& !pending_components.reconstruction_started
&& *num_expected_columns == num_of_columns
&& pending_components.verified_data_columns.len() >= num_of_columns / 2
}
pub fn put_kzg_verified_blobs<I: IntoIterator<Item = KzgVerifiedBlob<T::EthSpec>>>(
&self,
block_root: Hash256,
epoch: Epoch,
kzg_verified_blobs: I,
log: &Logger,
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
let mut fixed_blobs =
RuntimeFixedList::new(vec![None; self.spec.max_blobs_per_block(epoch) as usize]);
@@ -498,7 +518,7 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
pending_components.merge_blobs(fixed_blobs);
let block_import_requirement = self.block_import_requirement(epoch)?;
if pending_components.is_available(&block_import_requirement) {
if pending_components.is_available(&block_import_requirement, log) {
write_lock.put(block_root, pending_components.clone());
// No need to hold the write lock anymore
drop(write_lock);
@@ -516,12 +536,11 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
I: IntoIterator<Item = KzgVerifiedCustodyDataColumn<T::EthSpec>>,
>(
&self,
kzg: &Kzg,
block_root: Hash256,
epoch: Epoch,
kzg_verified_data_columns: I,
) -> Result<(Availability<T::EthSpec>, DataColumnsToPublish<T::EthSpec>), AvailabilityCheckError>
{
log: &Logger,
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
let mut write_lock = self.critical.write();
// Grab existing entry or create a new entry.
@@ -537,64 +556,67 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
let block_import_requirement = self.block_import_requirement(epoch)?;
// Potentially trigger reconstruction if:
// - Our custody requirement is all columns
// - We >= 50% of columns
let data_columns_to_publish =
if self.should_reconstruct(&block_import_requirement, &pending_components) {
pending_components.reconstruction_started();
let timer = metrics::start_timer(&metrics::DATA_AVAILABILITY_RECONSTRUCTION_TIME);
let existing_column_indices = pending_components
.verified_data_columns
.iter()
.map(|d| d.index())
.collect::<HashSet<_>>();
// Will only return an error if:
// - < 50% of columns
// - There are duplicates
let all_data_columns = KzgVerifiedCustodyDataColumn::reconstruct_columns(
kzg,
pending_components.verified_data_columns.as_slice(),
&self.spec,
)?;
let data_columns_to_publish = all_data_columns
.iter()
.filter(|d| !existing_column_indices.contains(&d.index()))
.map(|d| d.clone_arc())
.collect::<Vec<_>>();
pending_components.verified_data_columns = all_data_columns;
metrics::stop_timer(timer);
metrics::inc_counter_by(
&metrics::DATA_AVAILABILITY_RECONSTRUCTED_COLUMNS,
data_columns_to_publish.len() as u64,
);
Some(data_columns_to_publish)
} else {
None
};
if pending_components.is_available(&block_import_requirement) {
if pending_components.is_available(&block_import_requirement, log) {
write_lock.put(block_root, pending_components.clone());
// No need to hold the write lock anymore
drop(write_lock);
pending_components
.make_available(block_import_requirement, &self.spec, |diet_block| {
self.state_cache.recover_pending_executed_block(diet_block)
})
.map(|availability| (availability, data_columns_to_publish))
pending_components.make_available(block_import_requirement, &self.spec, |diet_block| {
self.state_cache.recover_pending_executed_block(diet_block)
})
} else {
write_lock.put(block_root, pending_components);
Ok((
Availability::MissingComponents(block_root),
data_columns_to_publish,
))
Ok(Availability::MissingComponents(block_root))
}
}
/// Check whether data column reconstruction should be attempted.
///
/// Potentially trigger reconstruction if:
/// - Our custody requirement is all columns (supernode), and we haven't got all columns
/// - We have >= 50% of columns, but not all columns
/// - Reconstruction hasn't been started for the block
///
/// If reconstruction is required, returns `PendingComponents` which contains the
/// components to be used as inputs to reconstruction, otherwise returns a `reason`.
pub fn check_and_set_reconstruction_started(
&self,
block_root: &Hash256,
) -> ReconstructColumnsDecision<T::EthSpec> {
let mut write_lock = self.critical.write();
let Some(pending_components) = write_lock.get_mut(block_root) else {
// Block may have been imported as it does not exist in availability cache.
return ReconstructColumnsDecision::No("block already imported");
};
// If we're sampling all columns, it means we must be custodying all columns.
let custody_column_count = self.sampling_column_count();
let total_column_count = self.spec.number_of_columns;
let received_column_count = pending_components.verified_data_columns.len();
if pending_components.reconstruction_started {
return ReconstructColumnsDecision::No("already started");
}
if custody_column_count != total_column_count {
return ReconstructColumnsDecision::No("not required for full node");
}
if received_column_count == self.spec.number_of_columns {
return ReconstructColumnsDecision::No("all columns received");
}
if received_column_count < total_column_count / 2 {
return ReconstructColumnsDecision::No("not enough columns");
}
pending_components.reconstruction_started = true;
ReconstructColumnsDecision::Yes(pending_components.clone())
}
/// This could mean some invalid data columns made it through to the `DataAvailabilityChecker`.
/// In this case, we remove all data columns in `PendingComponents`, reset reconstruction
/// status so that we can attempt to retrieve columns from peers again.
pub fn handle_reconstruction_failure(&self, block_root: &Hash256) {
if let Some(pending_components_mut) = self.critical.write().get_mut(block_root) {
pending_components_mut.verified_data_columns = vec![];
pending_components_mut.reconstruction_started = false;
}
}
@@ -603,6 +625,7 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
pub fn put_pending_executed_block(
&self,
executed_block: AvailabilityPendingExecutedBlock<T::EthSpec>,
log: &Logger,
) -> Result<Availability<T::EthSpec>, AvailabilityCheckError> {
let mut write_lock = self.critical.write();
let block_root = executed_block.import_data.block_root;
@@ -626,7 +649,7 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
// Check if we have all components and entire set is consistent.
let block_import_requirement = self.block_import_requirement(epoch)?;
if pending_components.is_available(&block_import_requirement) {
if pending_components.is_available(&block_import_requirement, log) {
write_lock.put(block_root, pending_components.clone());
// No need to hold the write lock anymore
drop(write_lock);
@@ -709,7 +732,7 @@ mod test {
fn get_store_with_spec<E: EthSpec>(
db_path: &TempDir,
spec: ChainSpec,
spec: Arc<ChainSpec>,
log: Logger,
) -> Arc<HotColdDB<E, LevelDB<E>, LevelDB<E>>> {
let hot_path = db_path.path().join("hot_db");
@@ -746,6 +769,7 @@ mod test {
spec.bellatrix_fork_epoch = Some(bellatrix_fork_epoch);
spec.capella_fork_epoch = Some(capella_fork_epoch);
spec.deneb_fork_epoch = Some(deneb_fork_epoch);
let spec = Arc::new(spec);
let chain_store = get_store_with_spec::<E>(db_path, spec.clone(), log.clone());
let validators_keypairs =
@@ -890,7 +914,7 @@ mod test {
let log = test_logger();
let chain_db_path = tempdir().expect("should get temp dir");
let harness = get_deneb_chain(log.clone(), &chain_db_path).await;
let spec = Arc::new(harness.spec.clone());
let spec = harness.spec.clone();
let test_store = harness.chain.store.clone();
let capacity_non_zero = new_non_zero_usize(capacity);
let cache = Arc::new(
@@ -924,7 +948,7 @@ mod test {
);
assert!(cache.critical.read().is_empty(), "cache should be empty");
let availability = cache
.put_pending_executed_block(pending_block)
.put_pending_executed_block(pending_block, harness.logger())
.expect("should put block");
if blobs_expected == 0 {
assert!(
@@ -963,7 +987,7 @@ mod test {
for (blob_index, gossip_blob) in blobs.into_iter().enumerate() {
kzg_verified_blobs.push(gossip_blob.into_inner());
let availability = cache
.put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone())
.put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone(), harness.logger())
.expect("should put blob");
if blob_index == blobs_expected - 1 {
assert!(matches!(availability, Availability::Available(_)));
@@ -992,7 +1016,7 @@ mod test {
for gossip_blob in blobs {
kzg_verified_blobs.push(gossip_blob.into_inner());
let availability = cache
.put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone())
.put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone(), harness.logger())
.expect("should put blob");
assert_eq!(
availability,
@@ -1002,7 +1026,7 @@ mod test {
assert_eq!(cache.critical.read().len(), 1);
}
let availability = cache
.put_pending_executed_block(pending_block)
.put_pending_executed_block(pending_block, harness.logger())
.expect("should put block");
assert!(
matches!(availability, Availability::Available(_)),
@@ -1070,7 +1094,7 @@ mod test {
// put the block in the cache
let availability = cache
.put_pending_executed_block(pending_block)
.put_pending_executed_block(pending_block, harness.logger())
.expect("should put block");
// grab the diet block from the cache for later testing