diff --git a/beacon_node/beacon_chain/src/migrate.rs b/beacon_node/beacon_chain/src/migrate.rs index 94fa0a1890..03c468a35e 100644 --- a/beacon_node/beacon_chain/src/migrate.rs +++ b/beacon_node/beacon_chain/src/migrate.rs @@ -572,7 +572,8 @@ impl, Cold: ItemStore> BackgroundMigrator 1 { warn!( state_summaries_dag_roots = ?state_summaries_dag_roots, - "Prune state summaries dag found more than one root" + error = "summaries dag found more than one root", + "Notify the devs your hot DB has some inconsistency. Pruning will fix it but devs want to know about it", ); } diff --git a/beacon_node/beacon_chain/src/schema_change/migration_schema_v22.rs b/beacon_node/beacon_chain/src/schema_change/migration_schema_v22.rs index 0b64fdbe08..a995f9d6b4 100644 --- a/beacon_node/beacon_chain/src/schema_change/migration_schema_v22.rs +++ b/beacon_node/beacon_chain/src/schema_change/migration_schema_v22.rs @@ -41,7 +41,7 @@ pub fn upgrade_to_v22( db: Arc>, genesis_state_root: Option, ) -> Result<(), Error> { - info!("Upgrading from v21 to v22"); + info!("Upgrading DB schema from v21 to v22"); let old_anchor = db.get_anchor_info(); diff --git a/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs b/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs index e66178df53..d0f8202679 100644 --- a/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs +++ b/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs @@ -8,6 +8,7 @@ use ssz::{Decode, Encode}; use ssz_derive::{Decode, Encode}; use std::sync::Arc; use store::{DBColumn, Error, HotColdDB, KeyValueStore, KeyValueStoreOp, StoreItem}; +use tracing::{debug, info}; use types::{Hash256, Slot}; /// Dummy value to use for the canonical head block root, see below. @@ -16,6 +17,8 @@ pub const DUMMY_CANONICAL_HEAD_BLOCK_ROOT: Hash256 = Hash256::repeat_byte(0xff); pub fn upgrade_to_v23( db: Arc>, ) -> Result, Error> { + info!("Upgrading DB schema from v22 to v23"); + // 1) Set the head-tracker to empty let Some(persisted_beacon_chain_v22) = db.get_item::(&BEACON_CHAIN_DB_KEY)? @@ -37,10 +40,24 @@ pub fn upgrade_to_v23( .hot_db .iter_column_keys::(DBColumn::BeaconStateTemporary) { + let state_root = state_root_result?; + debug!( + ?state_root, + "Deleting temporary state flag on v23 schema migration" + ); ops.push(KeyValueStoreOp::DeleteKey( DBColumn::BeaconStateTemporary, - state_root_result?.as_slice().to_vec(), + state_root.as_slice().to_vec(), )); + // Here we SHOULD delete the items for key `state_root` in columns `BeaconState` and + // `BeaconStateSummary`. However, in the event we have dangling temporary states at the time + // of the migration, the first pruning routine will prune them. They will be a tree branch / + // root not part of the finalized tree and trigger a warning log once. + // + // We believe there may be race conditions concerning temporary flags where a necessary + // canonical state is marked as temporary. In current stable, a restart with that DB will + // corrupt the DB. In the unlikely case this happens we choose to leave the states and + // allow pruning to clean them. } Ok(ops) diff --git a/beacon_node/beacon_chain/src/summaries_dag.rs b/beacon_node/beacon_chain/src/summaries_dag.rs index ab379d1eb2..8dff2ac7be 100644 --- a/beacon_node/beacon_chain/src/summaries_dag.rs +++ b/beacon_node/beacon_chain/src/summaries_dag.rs @@ -43,13 +43,6 @@ pub enum Error { state_root: Hash256, latest_block_root: Hash256, }, - StateSummariesNotContiguous { - state_root: Hash256, - state_slot: Slot, - latest_block_root: Hash256, - parent_block_root: Box, - parent_block_latest_state_summary: Box>, - }, MissingChildStateRoot(Hash256), RequestedSlotAboveSummary { starting_state_root: Hash256, @@ -163,34 +156,17 @@ impl StateSummariesDAG { **state_root } else { // Common case: not a skipped slot. + // + // If we can't find a state summmary for the parent block and previous slot, + // then there is some amount of disjointedness in the DAG. We set the parent + // state root to 0x0 in this case, and will prune any dangling states. let parent_block_root = summary.block_parent_root; - if let Some(parent_block_summaries) = - state_summaries_by_block_root.get(&parent_block_root) - { - *parent_block_summaries - .get(&previous_slot) - // Should never error: summaries are contiguous, so if there's an - // entry it must contain at least one summary at the previous slot. - .ok_or(Error::StateSummariesNotContiguous { - state_root: *state_root, - state_slot: summary.slot, - latest_block_root: summary.latest_block_root, - parent_block_root: parent_block_root.into(), - parent_block_latest_state_summary: parent_block_summaries - .iter() - .max_by(|a, b| a.0.cmp(b.0)) - .map(|(slot, (state_root, _))| (*slot, **state_root)) - .into(), - })? - .0 - } else { - // We don't know of any summary with this parent block root. We'll - // consider this summary to be a root of `state_summaries_v22` - // collection and mark it as zero. - // The test store_tests::finalizes_non_epoch_start_slot manages to send two - // disjoint trees on its second migration. - Hash256::ZERO - } + state_summaries_by_block_root + .get(&parent_block_root) + .and_then(|parent_block_summaries| { + parent_block_summaries.get(&previous_slot) + }) + .map_or(Hash256::ZERO, |(parent_state_root, _)| **parent_state_root) } };