From 06025228ae489ac55137a238d0c16e9c76005da2 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Thu, 19 Mar 2026 20:09:13 +1100 Subject: [PATCH] Gloas cold DB (#8991) Closes: - https://github.com/sigp/lighthouse/issues/8958 - Update the `HotColdStore` to handle storage of cold states. - Update `BeaconSnapshot` to hold the execution envelope. This is required to make `chain_dump`-related checks sane, and will be generally useful (see: https://github.com/sigp/lighthouse/issues/8956). - Bug fix in the `BlockReplayer` for the case where the starting state is already `Full` (we should not try to apply another payload). This happens on the cold DB path because we try to replay from the closest cached state (which is often full). - Update `test_gloas_hot_state_hierarchy` to cover the cold DB migration. Co-Authored-By: Michael Sproul Co-Authored-By: Michael Sproul --- beacon_node/beacon_chain/src/beacon_chain.rs | 49 ++++++++++++++--- .../beacon_chain/src/beacon_snapshot.rs | 16 +++++- beacon_node/beacon_chain/src/builder.rs | 4 ++ .../beacon_chain/src/canonical_head.rs | 2 + .../beacon_chain/tests/block_verification.rs | 2 + beacon_node/beacon_chain/tests/store_tests.rs | 6 +- beacon_node/store/src/hot_cold_store.rs | 55 +++++++++++++++++-- beacon_node/store/src/invariants.rs | 29 +++++++--- .../state_processing/src/block_replayer.rs | 1 + 9 files changed, 139 insertions(+), 25 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 20af7b4630..c7009fc6dc 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -6689,6 +6689,9 @@ impl BeaconChain { let mut prev_block_root = None; let mut prev_beacon_state = None; + // Collect all blocks. + let mut blocks = vec![]; + for res in self.forwards_iter_block_roots(from_slot)? { let (beacon_block_root, _) = res?; @@ -6704,16 +6707,42 @@ impl BeaconChain { .ok_or_else(|| { Error::DBInconsistent(format!("Missing block {}", beacon_block_root)) })?; - let beacon_state_root = beacon_block.state_root(); + blocks.push((beacon_block_root, Arc::new(beacon_block))); + } + + // Collect states, using the next blocks to determine if states are full (have Gloas + // payloads). + for (i, (block_root, block)) in blocks.iter().enumerate() { + let (opt_envelope, state_root) = if block.fork_name_unchecked().gloas_enabled() { + let opt_envelope = self.store.get_payload_envelope(block_root)?.map(Arc::new); + + if let Some((_, next_block)) = blocks.get(i + 1) { + let block_hash = block.payload_bid_block_hash()?; + if next_block.is_parent_block_full(block_hash) { + let envelope = opt_envelope.ok_or_else(|| { + Error::DBInconsistent(format!("Missing envelope {block_root:?}")) + })?; + let state_root = envelope.message.state_root; + (Some(envelope), state_root) + } else { + (None, block.state_root()) + } + } else { + // TODO(gloas): should use fork choice/cached head for last block in sequence + opt_envelope + .as_ref() + .map_or((None, block.state_root()), |envelope| { + (Some(envelope.clone()), envelope.message.state_root) + }) + } + } else { + (None, block.state_root()) + }; - // This branch is reached from the HTTP API. We assume the user wants - // to cache states so that future calls are faster. let mut beacon_state = self .store - .get_state(&beacon_state_root, Some(beacon_block.slot()), true)? - .ok_or_else(|| { - Error::DBInconsistent(format!("Missing state {:?}", beacon_state_root)) - })?; + .get_state(&state_root, Some(block.slot()), true)? + .ok_or_else(|| Error::DBInconsistent(format!("Missing state {:?}", state_root)))?; // This beacon state might come from the freezer DB, which means it could have pending // updates or lots of untethered memory. We rebase it on the previous state in order to @@ -6726,12 +6755,14 @@ impl BeaconChain { prev_beacon_state = Some(beacon_state.clone()); let snapshot = BeaconSnapshot { - beacon_block: Arc::new(beacon_block), - beacon_block_root, + beacon_block: block.clone(), + execution_envelope: opt_envelope, + beacon_block_root: *block_root, beacon_state, }; dump.push(snapshot); } + Ok(dump) } diff --git a/beacon_node/beacon_chain/src/beacon_snapshot.rs b/beacon_node/beacon_chain/src/beacon_snapshot.rs index e9fde48ac6..566713e3f3 100644 --- a/beacon_node/beacon_chain/src/beacon_snapshot.rs +++ b/beacon_node/beacon_chain/src/beacon_snapshot.rs @@ -2,7 +2,7 @@ use serde::Serialize; use std::sync::Arc; use types::{ AbstractExecPayload, BeaconState, EthSpec, FullPayload, Hash256, SignedBeaconBlock, - SignedBlindedBeaconBlock, + SignedBlindedBeaconBlock, SignedExecutionPayloadEnvelope, }; /// Represents some block and its associated state. Generally, this will be used for tracking the @@ -10,6 +10,7 @@ use types::{ #[derive(Clone, Serialize, PartialEq, Debug)] pub struct BeaconSnapshot = FullPayload> { pub beacon_block: Arc>, + pub execution_envelope: Option>>, pub beacon_block_root: Hash256, pub beacon_state: BeaconState, } @@ -31,33 +32,42 @@ impl> BeaconSnapshot { /// Create a new checkpoint. pub fn new( beacon_block: Arc>, + execution_envelope: Option>>, beacon_block_root: Hash256, beacon_state: BeaconState, ) -> Self { Self { beacon_block, + execution_envelope, beacon_block_root, beacon_state, } } - /// Returns the state root from `self.beacon_block`. + /// Returns the state root from `self.beacon_block` or `self.execution_envelope` as + /// appropriate. /// /// ## Caution /// /// It is not strictly enforced that `root(self.beacon_state) == self.beacon_state_root()`. pub fn beacon_state_root(&self) -> Hash256 { - self.beacon_block.message().state_root() + if let Some(ref envelope) = self.execution_envelope { + envelope.message.state_root + } else { + self.beacon_block.message().state_root() + } } /// Update all fields of the checkpoint. pub fn update( &mut self, beacon_block: Arc>, + execution_envelope: Option>>, beacon_block_root: Hash256, beacon_state: BeaconState, ) { self.beacon_block = beacon_block; + self.execution_envelope = execution_envelope; self.beacon_block_root = beacon_block_root; self.beacon_state = beacon_state; } diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 59fa5ec9ec..7eb92060a2 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -358,6 +358,7 @@ where Ok(( BeaconSnapshot { beacon_block_root, + execution_envelope: None, beacon_block: Arc::new(beacon_block), beacon_state, }, @@ -616,8 +617,10 @@ where .map_err(|e| format!("Failed to initialize data column info: {:?}", e))?, ); + // TODO(gloas): add check that checkpoint state is Pending let snapshot = BeaconSnapshot { beacon_block_root: weak_subj_block_root, + execution_envelope: None, beacon_block: Arc::new(weak_subj_block), beacon_state: weak_subj_state, }; @@ -800,6 +803,7 @@ where let mut head_snapshot = BeaconSnapshot { beacon_block_root: head_block_root, + execution_envelope: None, beacon_block: Arc::new(head_block), beacon_state: head_state, }; diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index fd060e2b59..0faddd1792 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -319,6 +319,7 @@ impl CanonicalHead { let snapshot = BeaconSnapshot { beacon_block_root, + execution_envelope: None, beacon_block: Arc::new(beacon_block), beacon_state, }; @@ -695,6 +696,7 @@ impl BeaconChain { BeaconSnapshot { beacon_block: Arc::new(beacon_block), + execution_envelope: None, beacon_block_root: new_view.head_block_root, beacon_state, } diff --git a/beacon_node/beacon_chain/tests/block_verification.rs b/beacon_node/beacon_chain/tests/block_verification.rs index 8981b20a55..2bb60f111a 100644 --- a/beacon_node/beacon_chain/tests/block_verification.rs +++ b/beacon_node/beacon_chain/tests/block_verification.rs @@ -77,8 +77,10 @@ async fn get_chain_segment() -> (Vec>, Vec>(); + let some_validators = (0..LOW_VALIDATOR_COUNT).collect::>(); let (genesis_state, _genesis_state_root) = harness.get_current_state_and_root(); @@ -5886,6 +5889,7 @@ async fn test_gloas_hot_state_hierarchy() { // Verify chain dump and iterators work with Gloas states. check_chain_dump(&harness, num_blocks + 1); check_iterators(&harness); + check_db_invariants(&harness); } /// Check that the HotColdDB's split_slot is equal to the start slot of the last finalized epoch. diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 428086c464..8ef91b3c74 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -1906,6 +1906,51 @@ impl, Cold: ItemStore> HotColdDB } } + /// Recompute the payload status for a state at `slot` that is stored in the cold DB. + /// + /// This function returns an error for any `slot` that is outside the range of slots stored in + /// the freezer DB. + /// + /// For all slots prior to Gloas, it returns `Pending`. + /// + /// For post-Gloas slots the algorithm is: + /// + /// 1. Load the most recently applied block at `slot` (may not be from `slot` in case of a skip) + /// 2. Load the canonical `state_root` at the slot of the block. If this `state_root` matches + /// the one in the block then we know the state at *that* slot is canonically empty (no + /// payload). Conversely, if it is different, we know that the block's slot is full (assuming + /// no database corruption). + /// 3. The payload status of `slot` is the same as the payload status of `block.slot()`, because + /// we only care about whether a beacon block or payload was applied most recently, and + /// `block` is by definition the most-recently-applied block. + /// + /// All of this mucking around could be avoided if we do a schema migration to record the + /// payload status in the database. For now, this is simpler. + fn get_cold_state_payload_status(&self, slot: Slot) -> Result { + // Pre-Gloas states are always `Pending`. + if !self.spec.fork_name_at_slot::(slot).gloas_enabled() { + return Ok(StatePayloadStatus::Pending); + } + + let block_root = self + .get_cold_block_root(slot)? + .ok_or(HotColdDBError::MissingFrozenBlock(slot))?; + + let block = self + .get_blinded_block(&block_root)? + .ok_or(Error::MissingBlock(block_root))?; + + let state_root = self + .get_cold_state_root(block.slot())? + .ok_or(HotColdDBError::MissingRestorePointState(block.slot()))?; + + if block.state_root() != state_root { + Ok(StatePayloadStatus::Full) + } else { + Ok(StatePayloadStatus::Pending) + } + } + fn load_hot_hdiff_buffer(&self, state_root: Hash256) -> Result { if let Some(buffer) = self .state_cache @@ -2454,8 +2499,7 @@ impl, Cold: ItemStore> HotColdDB self.forwards_state_roots_iterator_until(base_state.slot(), slot, || { Err(Error::StateShouldNotBeRequired(slot)) })?; - // TODO(gloas): calculate correct payload status for cold states - let payload_status = StatePayloadStatus::Pending; + let payload_status = self.get_cold_state_payload_status(slot)?; let state = self.replay_blocks( base_state, blocks, @@ -2591,9 +2635,10 @@ impl, Cold: ItemStore> HotColdDB { return Ok((blocks, vec![])); } - // TODO(gloas): wire this up - let end_block_root = Hash256::ZERO; - let desired_payload_status = StatePayloadStatus::Pending; + let end_block_root = self + .get_cold_block_root(end_slot)? + .ok_or(HotColdDBError::MissingFrozenBlock(end_slot))?; + let desired_payload_status = self.get_cold_state_payload_status(end_slot)?; let envelopes = self.load_payload_envelopes_for_blocks( &blocks, end_block_root, diff --git a/beacon_node/store/src/invariants.rs b/beacon_node/store/src/invariants.rs index eb5232d344..d251fb8800 100644 --- a/beacon_node/store/src/invariants.rs +++ b/beacon_node/store/src/invariants.rs @@ -319,6 +319,10 @@ impl, Cold: ItemStore> HotColdDB .spec .fulu_fork_epoch .map(|epoch| epoch.start_slot(E::slots_per_epoch())); + let gloas_fork_slot = self + .spec + .gloas_fork_epoch + .map(|epoch| epoch.start_slot(E::slots_per_epoch())); let oldest_blob_slot = self.get_blob_info().oldest_blob_slot; let oldest_data_column_slot = self.get_data_column_info().oldest_data_column_slot; @@ -343,17 +347,28 @@ impl, Cold: ItemStore> HotColdDB } // Invariant 5: execution payload consistency. - // TODO(gloas): reconsider this invariant if check_payloads && let Some(bellatrix_slot) = bellatrix_fork_slot && slot >= bellatrix_slot - && !self.execution_payload_exists(&block_root)? - && !self.payload_envelope_exists(&block_root)? { - result.add_violation(InvariantViolation::ExecutionPayloadMissing { - block_root, - slot, - }); + if let Some(gloas_slot) = gloas_fork_slot + && slot >= gloas_slot + { + // For Gloas there is never a true payload stored at slot 0. + // TODO(gloas): still need to account for non-canonical payloads once pruning + // is implemented. + if slot != 0 && !self.payload_envelope_exists(&block_root)? { + result.add_violation(InvariantViolation::ExecutionPayloadMissing { + block_root, + slot, + }); + } + } else if !self.execution_payload_exists(&block_root)? { + result.add_violation(InvariantViolation::ExecutionPayloadMissing { + block_root, + slot, + }); + } } // Invariant 6: blob sidecar consistency. diff --git a/consensus/state_processing/src/block_replayer.rs b/consensus/state_processing/src/block_replayer.rs index a10d6179fe..f5f06d1cb9 100644 --- a/consensus/state_processing/src/block_replayer.rs +++ b/consensus/state_processing/src/block_replayer.rs @@ -313,6 +313,7 @@ where // indicates that the parent is full (and it hasn't already been applied). state_root = if block.fork_name_unchecked().gloas_enabled() && self.state.slot() == self.state.latest_block_header().slot + && self.state.payload_status() == StatePayloadStatus::Pending { let latest_bid_block_hash = self .state