Fix I/O atomicity issues with checkpoint sync (#2671)

## Issue Addressed

This PR addresses an issue found by @YorickDowne during testing of v2.0.0-rc.0.

Due to a lack of atomic database writes on checkpoint sync start-up, it was possible for the database to get into an inconsistent state from which it couldn't recover without `--purge-db`. The core of the issue was that the store's anchor info was being stored _before_ the `PersistedBeaconChain`. If a crash occured so that anchor info was stored but _not_ the `PersistedBeaconChain`, then on restart Lighthouse would think the database was unitialized and attempt to compare-and-swap a `None` value, but would actually find the stale info from the previous run.

## Proposed Changes

The issue is fixed by writing the anchor info, the split point, and the `PersistedBeaconChain` atomically on start-up. Some type-hinting ugliness was required, which could possibly be cleaned up in future refactors.
This commit is contained in:
Michael Sproul
2021-10-05 03:53:17 +00:00
parent 28b79084cd
commit ed1fc7cca6
7 changed files with 95 additions and 36 deletions

View File

@@ -339,24 +339,41 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
Ok(())
}
/// Return a `PersistedBeaconChain` representing the current head.
pub fn make_persisted_head(&self) -> PersistedBeaconChain {
/// Return a `PersistedBeaconChain` without reference to a `BeaconChain`.
pub fn make_persisted_head(
genesis_block_root: Hash256,
head_tracker: &HeadTracker,
) -> PersistedBeaconChain {
PersistedBeaconChain {
_canonical_head_block_root: DUMMY_CANONICAL_HEAD_BLOCK_ROOT,
genesis_block_root: self.genesis_block_root,
ssz_head_tracker: self.head_tracker.to_ssz_container(),
genesis_block_root,
ssz_head_tracker: head_tracker.to_ssz_container(),
}
}
/// Return a database operation for writing the beacon chain head to disk.
pub fn persist_head_in_batch(&self) -> KeyValueStoreOp {
self.make_persisted_head()
Self::persist_head_in_batch_standalone(self.genesis_block_root, &self.head_tracker)
}
pub fn persist_head_in_batch_standalone(
genesis_block_root: Hash256,
head_tracker: &HeadTracker,
) -> KeyValueStoreOp {
Self::make_persisted_head(genesis_block_root, head_tracker)
.as_kv_store_op(BEACON_CHAIN_DB_KEY)
}
/// Return a database operation for writing fork choice to disk.
pub fn persist_fork_choice_in_batch(&self) -> KeyValueStoreOp {
let fork_choice = self.fork_choice.read();
Self::persist_fork_choice_in_batch_standalone(&fork_choice)
}
/// Return a database operation for writing fork choice to disk.
pub fn persist_fork_choice_in_batch_standalone(
fork_choice: &BeaconForkChoice<T>,
) -> KeyValueStoreOp {
let persisted_fork_choice = PersistedForkChoice {
fork_choice: fork_choice.to_persisted(),
fork_choice_store: fork_choice.fc_store().to_persisted(),

View File

@@ -25,7 +25,7 @@ use slot_clock::{SlotClock, TestingSlotClock};
use std::marker::PhantomData;
use std::sync::Arc;
use std::time::Duration;
use store::{Error as StoreError, HotColdDB, ItemStore};
use store::{Error as StoreError, HotColdDB, ItemStore, KeyValueStoreOp};
use task_executor::ShutdownReason;
use types::{
BeaconBlock, BeaconState, ChainSpec, Checkpoint, EthSpec, Graffiti, Hash256, PublicKeyBytes,
@@ -87,6 +87,9 @@ pub struct BeaconChainBuilder<T: BeaconChainTypes> {
graffiti: Graffiti,
slasher: Option<Arc<Slasher<T::EthSpec>>>,
validator_monitor: Option<ValidatorMonitor<T::EthSpec>>,
// Pending I/O batch that is constructed during building and should be executed atomically
// alongside `PersistedBeaconChain` storage when `BeaconChainBuilder::build` is called.
pending_io_batch: Vec<KeyValueStoreOp>,
}
impl<TSlotClock, TEth1Backend, TEthSpec, THotStore, TColdStore>
@@ -124,6 +127,7 @@ where
graffiti: Graffiti::default(),
slasher: None,
validator_monitor: None,
pending_io_batch: vec![],
}
}
@@ -416,13 +420,11 @@ where
// Set the store's split point *before* storing genesis so that genesis is stored
// immediately in the freezer DB.
store.set_split(weak_subj_slot, weak_subj_state_root);
store
.store_split()
.map_err(|e| format!("Error storing DB split point: {:?}", e))?;
let (_, updated_builder) = self.set_genesis_state(genesis_state)?;
self = updated_builder;
// Write the state and block non-atomically, it doesn't matter if they're forgotten
// about on a crash restart.
store
.put_state(&weak_subj_state_root, &weak_subj_state)
.map_err(|e| format!("Failed to store weak subjectivity state: {:?}", e))?;
@@ -430,18 +432,22 @@ where
.put_block(&weak_subj_block_root, weak_subj_block.clone())
.map_err(|e| format!("Failed to store weak subjectivity block: {:?}", e))?;
// Store anchor info (context for weak subj sync).
store
.init_anchor_info(weak_subj_block.message())
.map_err(|e| format!("Failed to initialize anchor info: {:?}", e))?;
// Stage the database's metadata fields for atomic storage when `build` is called.
// This prevents the database from restarting in an inconsistent state if the anchor
// info or split point is written before the `PersistedBeaconChain`.
self.pending_io_batch.push(store.store_split_in_batch());
self.pending_io_batch.push(
store
.init_anchor_info(weak_subj_block.message())
.map_err(|e| format!("Failed to initialize anchor info: {:?}", e))?,
);
// Store pruning checkpoint to prevent attempting to prune before the anchor state.
store
.store_pruning_checkpoint(Checkpoint {
self.pending_io_batch
.push(store.pruning_checkpoint_store_op(Checkpoint {
root: weak_subj_block_root,
epoch: weak_subj_state.slot().epoch(TEthSpec::slots_per_epoch()),
})
.map_err(|e| format!("Failed to write pruning checkpoint: {:?}", e))?;
}));
let snapshot = BeaconSnapshot {
beacon_block_root: weak_subj_block_root,
@@ -542,7 +548,7 @@ where
/// configured.
#[allow(clippy::type_complexity)] // I think there's nothing to be gained here from a type alias.
pub fn build(
self,
mut self,
) -> Result<
BeaconChain<Witness<TSlotClock, TEth1Backend, TEthSpec, THotStore, TColdStore>>,
String,
@@ -679,6 +685,26 @@ where
);
}
// Store the `PersistedBeaconChain` in the database atomically with the metadata so that on
// restart we can correctly detect the presence of an initialized database.
//
// This *must* be stored before constructing the `BeaconChain`, so that its `Drop` instance
// doesn't write a `PersistedBeaconChain` without the rest of the batch.
self.pending_io_batch.push(BeaconChain::<
Witness<TSlotClock, TEth1Backend, TEthSpec, THotStore, TColdStore>,
>::persist_head_in_batch_standalone(
genesis_block_root, &head_tracker
));
self.pending_io_batch.push(BeaconChain::<
Witness<TSlotClock, TEth1Backend, TEthSpec, THotStore, TColdStore>,
>::persist_fork_choice_in_batch_standalone(
&fork_choice
));
store
.hot_db
.do_atomically(self.pending_io_batch)
.map_err(|e| format!("Error writing chain & metadata to disk: {:?}", e))?;
let beacon_chain = BeaconChain {
spec: self.spec,
config: self.chain_config,

View File

@@ -182,7 +182,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
};
let backfill_complete = new_anchor.block_backfill_complete();
self.store
.compare_and_set_anchor_info(Some(anchor_info), Some(new_anchor))?;
.compare_and_set_anchor_info_with_write(Some(anchor_info), Some(new_anchor))?;
// If backfill has completed and the chain is configured to reconstruct historic states,
// send a message to the background migrator instructing it to begin reconstruction.