Drop overflow cache (#5891)

* Drop overflow cache

* Update docs

* Update beacon_node/store/src/lib.rs

* Update data_availability_checker.rs

* Lint
This commit is contained in:
Lion - dapplion
2024-07-11 08:10:24 +02:00
committed by GitHub
parent 8aa02860ed
commit 880523d8d7
6 changed files with 73 additions and 1145 deletions

View File

@@ -723,13 +723,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
Ok(()) Ok(())
} }
pub fn persist_data_availability_checker(&self) -> Result<(), Error> {
let _timer = metrics::start_timer(&metrics::PERSIST_DATA_AVAILABILITY_CHECKER);
self.data_availability_checker.persist_all()?;
Ok(())
}
/// Returns the slot _right now_ according to `self.slot_clock`. Returns `Err` if the slot is /// Returns the slot _right now_ according to `self.slot_clock`. Returns `Err` if the slot is
/// unavailable. /// unavailable.
/// ///
@@ -6753,7 +6746,6 @@ impl<T: BeaconChainTypes> Drop for BeaconChain<T> {
let drop = || -> Result<(), Error> { let drop = || -> Result<(), Error> {
self.persist_head_and_fork_choice()?; self.persist_head_and_fork_choice()?;
self.persist_op_pool()?; self.persist_op_pool()?;
self.persist_data_availability_checker()?;
self.persist_eth1_cache() self.persist_eth1_cache()
}; };

View File

@@ -2,7 +2,7 @@ use crate::blob_verification::{verify_kzg_for_blob_list, GossipVerifiedBlob, Kzg
use crate::block_verification_types::{ use crate::block_verification_types::{
AvailabilityPendingExecutedBlock, AvailableExecutedBlock, RpcBlock, AvailabilityPendingExecutedBlock, AvailableExecutedBlock, RpcBlock,
}; };
use crate::data_availability_checker::overflow_lru_cache::OverflowLRUCache; use crate::data_availability_checker::overflow_lru_cache::DataAvailabilityCheckerInner;
use crate::{BeaconChain, BeaconChainTypes, BeaconStore}; use crate::{BeaconChain, BeaconChainTypes, BeaconStore};
use kzg::Kzg; use kzg::Kzg;
use slog::{debug, error, Logger}; use slog::{debug, error, Logger};
@@ -33,12 +33,32 @@ pub const OVERFLOW_LRU_CAPACITY: NonZeroUsize = new_non_zero_usize(1024);
pub const STATE_LRU_CAPACITY_NON_ZERO: NonZeroUsize = new_non_zero_usize(2); pub const STATE_LRU_CAPACITY_NON_ZERO: NonZeroUsize = new_non_zero_usize(2);
pub const STATE_LRU_CAPACITY: usize = STATE_LRU_CAPACITY_NON_ZERO.get(); pub const STATE_LRU_CAPACITY: usize = STATE_LRU_CAPACITY_NON_ZERO.get();
/// This includes a cache for any blocks or blobs that have been received over gossip or RPC /// Cache to hold fully valid data that can't be imported to fork-choice yet. After Dencun hard-fork
/// and are awaiting more components before they can be imported. Additionally the /// blocks have a sidecar of data that is received separately from the network. We call the concept
/// `DataAvailabilityChecker` is responsible for KZG verification of block components as well as /// of a block "becoming available" when all of its import dependencies are inserted into this
/// checking whether a "availability check" is required at all. /// cache.
///
/// Usually a block becomes available on its slot within a second of receiving its first component
/// over gossip. However, a block may never become available if a malicious proposer does not
/// publish its data, or there are network issues that prevent us from receiving it. If the block
/// does not become available after some time we can safely forget about it. Consider these two
/// cases:
///
/// - Global unavailability: If nobody has received the block components it's likely that the
/// proposer never made the block available. So we can safely forget about the block as it will
/// never become available.
/// - Local unavailability: Some fraction of the network has received all block components, but not us.
/// Some of our peers will eventually attest to a descendant of that block and lookup sync will
/// fetch its components. Therefore it's not strictly necessary to hold to the partially available
/// block for too long as we can recover from other peers.
///
/// Even in periods of non-finality, the proposer is expected to publish the block's data
/// immediately. Because this cache only holds fully valid data, its capacity is bound to 1 block
/// per slot and fork: before inserting into this cache we check the proposer signature and correct
/// proposer. Having a capacity > 1 is an optimization to prevent sync lookup from having re-fetch
/// data during moments of unstable network conditions.
pub struct DataAvailabilityChecker<T: BeaconChainTypes> { pub struct DataAvailabilityChecker<T: BeaconChainTypes> {
availability_cache: Arc<OverflowLRUCache<T>>, availability_cache: Arc<DataAvailabilityCheckerInner<T>>,
slot_clock: T::SlotClock, slot_clock: T::SlotClock,
kzg: Option<Arc<Kzg>>, kzg: Option<Arc<Kzg>>,
log: Logger, log: Logger,
@@ -74,7 +94,8 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
log: &Logger, log: &Logger,
spec: ChainSpec, spec: ChainSpec,
) -> Result<Self, AvailabilityCheckError> { ) -> Result<Self, AvailabilityCheckError> {
let overflow_cache = OverflowLRUCache::new(OVERFLOW_LRU_CAPACITY, store, spec.clone())?; let overflow_cache =
DataAvailabilityCheckerInner::new(OVERFLOW_LRU_CAPACITY, store, spec.clone())?;
Ok(Self { Ok(Self {
availability_cache: Arc::new(overflow_cache), availability_cache: Arc::new(overflow_cache),
slot_clock, slot_clock,
@@ -329,15 +350,9 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
}) })
} }
/// Persist all in memory components to disk
pub fn persist_all(&self) -> Result<(), AvailabilityCheckError> {
self.availability_cache.write_all_to_disk()
}
/// Collects metrics from the data availability checker. /// Collects metrics from the data availability checker.
pub fn metrics(&self) -> DataAvailabilityCheckerMetrics { pub fn metrics(&self) -> DataAvailabilityCheckerMetrics {
DataAvailabilityCheckerMetrics { DataAvailabilityCheckerMetrics {
num_store_entries: self.availability_cache.num_store_entries(),
state_cache_size: self.availability_cache.state_cache_size(), state_cache_size: self.availability_cache.state_cache_size(),
block_cache_size: self.availability_cache.block_cache_size(), block_cache_size: self.availability_cache.block_cache_size(),
} }
@@ -346,7 +361,6 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
/// Helper struct to group data availability checker metrics. /// Helper struct to group data availability checker metrics.
pub struct DataAvailabilityCheckerMetrics { pub struct DataAvailabilityCheckerMetrics {
pub num_store_entries: usize,
pub state_cache_size: usize, pub state_cache_size: usize,
pub block_cache_size: usize, pub block_cache_size: usize,
} }
@@ -372,7 +386,7 @@ pub fn start_availability_cache_maintenance_service<T: BeaconChainTypes>(
async fn availability_cache_maintenance_service<T: BeaconChainTypes>( async fn availability_cache_maintenance_service<T: BeaconChainTypes>(
chain: Arc<BeaconChain<T>>, chain: Arc<BeaconChain<T>>,
overflow_cache: Arc<OverflowLRUCache<T>>, overflow_cache: Arc<DataAvailabilityCheckerInner<T>>,
) { ) {
let epoch_duration = chain.slot_clock.slot_duration() * T::EthSpec::slots_per_epoch() as u32; let epoch_duration = chain.slot_clock.slot_duration() * T::EthSpec::slots_per_epoch() as u32;
loop { loop {

View File

@@ -114,38 +114,12 @@ impl<T: BeaconChainTypes> StateLRUCache<T> {
&self, &self,
diet_executed_block: DietAvailabilityPendingExecutedBlock<T::EthSpec>, diet_executed_block: DietAvailabilityPendingExecutedBlock<T::EthSpec>,
) -> Result<AvailabilityPendingExecutedBlock<T::EthSpec>, AvailabilityCheckError> { ) -> Result<AvailabilityPendingExecutedBlock<T::EthSpec>, AvailabilityCheckError> {
let maybe_state = self.states.write().pop(&diet_executed_block.state_root); let state = if let Some(state) = self.states.write().pop(&diet_executed_block.state_root) {
if let Some(state) = maybe_state { state
let block_root = diet_executed_block.block.canonical_root();
Ok(AvailabilityPendingExecutedBlock {
block: diet_executed_block.block,
import_data: BlockImportData {
block_root,
state,
parent_block: diet_executed_block.parent_block,
parent_eth1_finalization_data: diet_executed_block
.parent_eth1_finalization_data,
confirmed_state_roots: diet_executed_block.confirmed_state_roots,
consensus_context: diet_executed_block
.consensus_context
.into_consensus_context(),
},
payload_verification_outcome: diet_executed_block.payload_verification_outcome,
})
} else { } else {
self.reconstruct_pending_executed_block(diet_executed_block) self.reconstruct_state(&diet_executed_block)?
} };
}
/// Reconstruct the `AvailabilityPendingExecutedBlock` by loading the parent
/// state from disk and replaying the block. This function does NOT check the
/// LRU cache.
pub fn reconstruct_pending_executed_block(
&self,
diet_executed_block: DietAvailabilityPendingExecutedBlock<T::EthSpec>,
) -> Result<AvailabilityPendingExecutedBlock<T::EthSpec>, AvailabilityCheckError> {
let block_root = diet_executed_block.block.canonical_root(); let block_root = diet_executed_block.block.canonical_root();
let state = self.reconstruct_state(&diet_executed_block)?;
Ok(AvailabilityPendingExecutedBlock { Ok(AvailabilityPendingExecutedBlock {
block: diet_executed_block.block, block: diet_executed_block.block,
import_data: BlockImportData { import_data: BlockImportData {

View File

@@ -401,8 +401,6 @@ lazy_static! {
try_create_histogram("beacon_persist_eth1_cache", "Time taken to persist the eth1 caches"); try_create_histogram("beacon_persist_eth1_cache", "Time taken to persist the eth1 caches");
pub static ref PERSIST_FORK_CHOICE: Result<Histogram> = pub static ref PERSIST_FORK_CHOICE: Result<Histogram> =
try_create_histogram("beacon_persist_fork_choice", "Time taken to persist the fork choice struct"); try_create_histogram("beacon_persist_fork_choice", "Time taken to persist the fork choice struct");
pub static ref PERSIST_DATA_AVAILABILITY_CHECKER: Result<Histogram> =
try_create_histogram("beacon_persist_data_availability_checker", "Time taken to persist the data availability checker");
/* /*
* Eth1 * Eth1
@@ -1213,10 +1211,6 @@ pub fn scrape_for_metrics<T: BeaconChainTypes>(beacon_chain: &BeaconChain<T>) {
&DATA_AVAILABILITY_OVERFLOW_MEMORY_STATE_CACHE_SIZE, &DATA_AVAILABILITY_OVERFLOW_MEMORY_STATE_CACHE_SIZE,
da_checker_metrics.state_cache_size, da_checker_metrics.state_cache_size,
); );
set_gauge_by_usize(
&DATA_AVAILABILITY_OVERFLOW_STORE_CACHE_SIZE,
da_checker_metrics.num_store_entries,
);
if let Some((size, num_lookups)) = beacon_chain.pre_finalization_block_cache.metrics() { if let Some((size, num_lookups)) = beacon_chain.pre_finalization_block_cache.metrics() {
set_gauge_by_usize(&PRE_FINALIZATION_BLOCK_CACHE_SIZE, size); set_gauge_by_usize(&PRE_FINALIZATION_BLOCK_CACHE_SIZE, size);

View File

@@ -289,7 +289,7 @@ impl DBColumn {
/// This function returns the number of bytes used by keys in a given column. /// This function returns the number of bytes used by keys in a given column.
pub fn key_size(self) -> usize { pub fn key_size(self) -> usize {
match self { match self {
Self::OverflowLRUCache => 33, // See `OverflowKey` encode impl. Self::OverflowLRUCache => 33, // DEPRECATED
Self::BeaconMeta Self::BeaconMeta
| Self::BeaconBlock | Self::BeaconBlock
| Self::BeaconState | Self::BeaconState