mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-19 13:58:28 +00:00
Restore crash safety for database pruning (#4975)
* Add some DB sanity checks * Restore crash safety for database pruning
This commit is contained in:
@@ -34,7 +34,7 @@ use std::time::Duration;
|
||||
use store::{Error as StoreError, HotColdDB, ItemStore, KeyValueStoreOp};
|
||||
use task_executor::{ShutdownReason, TaskExecutor};
|
||||
use types::{
|
||||
BeaconBlock, BeaconState, ChainSpec, Checkpoint, Epoch, EthSpec, Graffiti, Hash256, Signature,
|
||||
BeaconBlock, BeaconState, ChainSpec, Epoch, EthSpec, Graffiti, Hash256, Signature,
|
||||
SignedBeaconBlock, Slot,
|
||||
};
|
||||
|
||||
@@ -559,16 +559,6 @@ where
|
||||
.map_err(|e| format!("Failed to initialize blob info: {:?}", e))?,
|
||||
);
|
||||
|
||||
// Store pruning checkpoint to prevent attempting to prune before the anchor state.
|
||||
self.pending_io_batch.push(
|
||||
store
|
||||
.pruning_checkpoint_store_op(Checkpoint {
|
||||
root: weak_subj_block_root,
|
||||
epoch: weak_subj_state.slot().epoch(TEthSpec::slots_per_epoch()),
|
||||
})
|
||||
.map_err(|e| format!("{:?}", e))?,
|
||||
);
|
||||
|
||||
let snapshot = BeaconSnapshot {
|
||||
beacon_block_root: weak_subj_block_root,
|
||||
beacon_block: Arc::new(weak_subj_block),
|
||||
|
||||
@@ -512,13 +512,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
genesis_block_root: Hash256,
|
||||
log: &Logger,
|
||||
) -> Result<PruningOutcome, BeaconChainError> {
|
||||
let old_finalized_checkpoint =
|
||||
store
|
||||
.load_pruning_checkpoint()?
|
||||
.unwrap_or_else(|| Checkpoint {
|
||||
epoch: Epoch::new(0),
|
||||
root: Hash256::zero(),
|
||||
});
|
||||
let old_finalized_checkpoint = store.get_pruning_checkpoint();
|
||||
|
||||
let old_finalized_slot = old_finalized_checkpoint
|
||||
.epoch
|
||||
@@ -572,6 +566,21 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
})
|
||||
.collect::<Result<_, _>>()?;
|
||||
|
||||
// Quick sanity check. If the canonical block & state roots are incorrect then we could
|
||||
// incorrectly delete canonical states, which would corrupt the database.
|
||||
let expected_canonical_block_roots = new_finalized_slot
|
||||
.saturating_sub(old_finalized_slot)
|
||||
.as_usize()
|
||||
.saturating_add(1);
|
||||
if newly_finalized_chain.len() != expected_canonical_block_roots {
|
||||
return Err(BeaconChainError::DBInconsistent(format!(
|
||||
"canonical chain iterator is corrupt; \
|
||||
expected {} but got {} block roots",
|
||||
expected_canonical_block_roots,
|
||||
newly_finalized_chain.len()
|
||||
)));
|
||||
}
|
||||
|
||||
// We don't know which blocks are shared among abandoned chains, so we buffer and delete
|
||||
// everything in one fell swoop.
|
||||
let mut abandoned_blocks: HashSet<SignedBeaconBlockHash> = HashSet::new();
|
||||
@@ -735,11 +744,6 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
persisted_head.as_kv_store_op(BEACON_CHAIN_DB_KEY)?,
|
||||
));
|
||||
|
||||
// Persist the new finalized checkpoint as the pruning checkpoint.
|
||||
batch.push(StoreOp::KeyValueOp(
|
||||
store.pruning_checkpoint_store_op(new_finalized_checkpoint)?,
|
||||
));
|
||||
|
||||
store.do_atomically_with_block_and_blobs_cache(batch)?;
|
||||
debug!(
|
||||
log,
|
||||
@@ -753,19 +757,26 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
||||
let (state_root, summary) = res?;
|
||||
|
||||
if summary.slot <= new_finalized_slot {
|
||||
// If state root doesn't match state root from canonical chain, or this slot
|
||||
// is not part of the recently finalized chain, then delete.
|
||||
// If state root doesn't match state root from canonical chain, then delete.
|
||||
// We may also find older states here that should have been deleted by `migrate_db`
|
||||
// but weren't due to wonky I/O atomicity.
|
||||
if newly_finalized_chain
|
||||
.get(&summary.slot)
|
||||
.map_or(true, |(_, canonical_state_root)| {
|
||||
state_root != Hash256::from(*canonical_state_root)
|
||||
})
|
||||
{
|
||||
let reason = if summary.slot < old_finalized_slot {
|
||||
"old dangling state"
|
||||
} else {
|
||||
"non-canonical"
|
||||
};
|
||||
debug!(
|
||||
log,
|
||||
"Deleting state";
|
||||
"state_root" => ?state_root,
|
||||
"slot" => summary.slot,
|
||||
"reason" => reason,
|
||||
);
|
||||
state_delete_batch.push(StoreOp::DeleteState(state_root, Some(summary.slot)));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user