Implement checkpoint sync (#2244)

## Issue Addressed

Closes #1891
Closes #1784

## Proposed Changes

Implement checkpoint sync for Lighthouse, enabling it to start from a weak subjectivity checkpoint.

## Additional Info

- [x] Return unavailable status for out-of-range blocks requested by peers (#2561)
- [x] Implement sync daemon for fetching historical blocks (#2561)
- [x] Verify chain hashes (either in `historical_blocks.rs` or the calling module)
- [x] Consistency check for initial block + state
- [x] Fetch the initial state and block from a beacon node HTTP endpoint
- [x] Don't crash fetching beacon states by slot from the API
- [x] Background service for state reconstruction, triggered by CLI flag or API call.

Considered out of scope for this PR:

- Drop the requirement to provide the `--checkpoint-block` (this would require some pretty heavy refactoring of block verification)


Co-authored-by: Diva M <divma@protonmail.com>
This commit is contained in:
Michael Sproul
2021-09-22 00:37:28 +00:00
parent 280e4fe23d
commit 9667dc2f03
71 changed files with 4012 additions and 459 deletions

View File

@@ -14,6 +14,7 @@ use crate::errors::{BeaconChainError as Error, BlockProductionError};
use crate::eth1_chain::{Eth1Chain, Eth1ChainBackend};
use crate::events::ServerSentEventHandler;
use crate::head_tracker::HeadTracker;
use crate::historical_blocks::HistoricalBlockError;
use crate::migrate::BackgroundMigrator;
use crate::naive_aggregation_pool::{
AggregatedAttestationMap, Error as NaiveAggregationError, NaiveAggregationPool,
@@ -431,10 +432,23 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
/// - Skipped slots contain the root of the closest prior
/// non-skipped slot (identical to the way they are stored in `state.block_roots`).
/// - Iterator returns `(Hash256, Slot)`.
///
/// Will return a `BlockOutOfRange` error if the requested start slot is before the period of
/// history for which we have blocks stored. See `get_oldest_block_slot`.
pub fn forwards_iter_block_roots(
&self,
start_slot: Slot,
) -> Result<impl Iterator<Item = Result<(Hash256, Slot), Error>>, Error> {
let oldest_block_slot = self.store.get_oldest_block_slot();
if start_slot < oldest_block_slot {
return Err(Error::HistoricalBlockError(
HistoricalBlockError::BlockOutOfRange {
slot: start_slot,
oldest_block_slot,
},
));
}
let local_head = self.head()?;
let iter = HotColdDB::forwards_block_roots_iterator(
@@ -620,6 +634,12 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
return Ok(Some(self.genesis_state_root));
}
// Check limits w.r.t historic state bounds.
let (historic_lower_limit, historic_upper_limit) = self.store.get_historic_state_limits();
if request_slot > historic_lower_limit && request_slot < historic_upper_limit {
return Ok(None);
}
// Try an optimized path of reading the root directly from the head state.
let fast_lookup: Option<Hash256> = self.with_head(|head| {
if head.beacon_block.slot() <= request_slot {
@@ -657,7 +677,8 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
/// ## Notes
///
/// - Use the `skips` parameter to define the behaviour when `request_slot` is a skipped slot.
/// - Returns `Ok(None)` for any slot higher than the current wall-clock slot.
/// - Returns `Ok(None)` for any slot higher than the current wall-clock slot, or less than
/// the oldest known block slot.
pub fn block_root_at_slot(
&self,
request_slot: Slot,
@@ -667,6 +688,10 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
WhenSlotSkipped::None => self.block_root_at_slot_skips_none(request_slot),
WhenSlotSkipped::Prev => self.block_root_at_slot_skips_prev(request_slot),
}
.or_else(|e| match e {
Error::HistoricalBlockError(_) => Ok(None),
e => Err(e),
})
}
/// Returns the block root at the given slot, if any. Only returns roots in the canonical chain.

View File

@@ -501,6 +501,9 @@ impl<T: BeaconChainTypes> GossipVerifiedBlock<T> {
let block_root = get_block_root(&block);
// Disallow blocks that conflict with the anchor (weak subjectivity checkpoint), if any.
check_block_against_anchor_slot(block.message(), chain)?;
// Do not gossip a block from a finalized slot.
check_block_against_finalized_slot(block.message(), chain)?;
@@ -708,6 +711,9 @@ impl<T: BeaconChainTypes> SignatureVerifiedBlock<T> {
.fork_name(&chain.spec)
.map_err(BlockError::InconsistentFork)?;
// Check the anchor slot before loading the parent, to avoid spurious lookups.
check_block_against_anchor_slot(block.message(), chain)?;
let (mut parent, block) = load_parent(block, chain)?;
// Reject any block that exceeds our limit on skipped slots.
@@ -1115,6 +1121,19 @@ fn check_block_skip_slots<T: BeaconChainTypes>(
Ok(())
}
/// Returns `Ok(())` if the block's slot is greater than the anchor block's slot (if any).
fn check_block_against_anchor_slot<T: BeaconChainTypes>(
block: BeaconBlockRef<'_, T::EthSpec>,
chain: &BeaconChain<T>,
) -> Result<(), BlockError<T::EthSpec>> {
if let Some(anchor_slot) = chain.store.get_anchor_slot() {
if block.slot() <= anchor_slot {
return Err(BlockError::WeakSubjectivityConflict);
}
}
Ok(())
}
/// Returns `Ok(())` if the block is later than the finalized slot on `chain`.
///
/// Returns an error if the block is earlier or equal to the finalized slot, or there was an error

View File

@@ -28,8 +28,8 @@ use std::time::Duration;
use store::{Error as StoreError, HotColdDB, ItemStore};
use task_executor::ShutdownReason;
use types::{
BeaconBlock, BeaconState, ChainSpec, EthSpec, Graffiti, Hash256, PublicKeyBytes, Signature,
SignedBeaconBlock, Slot,
BeaconBlock, BeaconState, ChainSpec, Checkpoint, EthSpec, Graffiti, Hash256, PublicKeyBytes,
Signature, SignedBeaconBlock, Slot,
};
/// An empty struct used to "witness" all the `BeaconChainTypes` traits. It has no user-facing
@@ -282,12 +282,19 @@ where
Ok(self)
}
/// Starts a new chain from a genesis state.
pub fn genesis_state(
/// Store the genesis state & block in the DB.
///
/// Do *not* initialize fork choice, or do anything that assumes starting from genesis.
///
/// Return the `BeaconSnapshot` representing genesis as well as the mutated builder.
fn set_genesis_state(
mut self,
mut beacon_state: BeaconState<TEthSpec>,
) -> Result<Self, String> {
let store = self.store.clone().ok_or("genesis_state requires a store")?;
) -> Result<(BeaconSnapshot<TEthSpec>, Self), String> {
let store = self
.store
.clone()
.ok_or("set_genesis_state requires a store")?;
let beacon_block = genesis_block(&mut beacon_state, &self.spec)?;
@@ -298,9 +305,6 @@ where
let beacon_state_root = beacon_block.message().state_root();
let beacon_block_root = beacon_block.canonical_root();
self.genesis_state_root = Some(beacon_state_root);
self.genesis_block_root = Some(beacon_block_root);
store
.put_state(&beacon_state_root, &beacon_state)
.map_err(|e| format!("Failed to store genesis state: {:?}", e))?;
@@ -318,11 +322,26 @@ where
)
})?;
let genesis = BeaconSnapshot {
beacon_block,
beacon_block_root,
beacon_state,
};
self.genesis_state_root = Some(beacon_state_root);
self.genesis_block_root = Some(beacon_block_root);
self.genesis_time = Some(beacon_state.genesis_time());
Ok((
BeaconSnapshot {
beacon_block_root,
beacon_block,
beacon_state,
},
self,
))
}
/// Starts a new chain from a genesis state.
pub fn genesis_state(mut self, beacon_state: BeaconState<TEthSpec>) -> Result<Self, String> {
let store = self.store.clone().ok_or("genesis_state requires a store")?;
let (genesis, updated_builder) = self.set_genesis_state(beacon_state)?;
self = updated_builder;
let fc_store = BeaconForkChoiceStore::get_forkchoice_store(store, &genesis);
@@ -332,10 +351,115 @@ where
&genesis.beacon_block,
&genesis.beacon_state,
)
.map_err(|e| format!("Unable to build initialize ForkChoice: {:?}", e))?;
.map_err(|e| format!("Unable to initialize ForkChoice: {:?}", e))?;
self.fork_choice = Some(fork_choice);
Ok(self.empty_op_pool())
}
/// Start the chain from a weak subjectivity state.
pub fn weak_subjectivity_state(
mut self,
mut weak_subj_state: BeaconState<TEthSpec>,
weak_subj_block: SignedBeaconBlock<TEthSpec>,
genesis_state: BeaconState<TEthSpec>,
) -> Result<Self, String> {
let store = self.store.clone().ok_or("genesis_state requires a store")?;
let weak_subj_slot = weak_subj_state.slot();
let weak_subj_block_root = weak_subj_block.canonical_root();
let weak_subj_state_root = weak_subj_block.state_root();
// Check that the given block lies on an epoch boundary. Due to the database only storing
// full states on epoch boundaries and at restore points it would be difficult to support
// starting from a mid-epoch state.
if weak_subj_slot % TEthSpec::slots_per_epoch() != 0 {
return Err(format!(
"Checkpoint block at slot {} is not aligned to epoch start. \
Please supply an aligned checkpoint with block.slot % 32 == 0",
weak_subj_block.slot(),
));
}
// Check that the block and state have consistent slots and state roots.
if weak_subj_state.slot() != weak_subj_block.slot() {
return Err(format!(
"Slot of snapshot block ({}) does not match snapshot state ({})",
weak_subj_block.slot(),
weak_subj_state.slot(),
));
}
let computed_state_root = weak_subj_state
.update_tree_hash_cache()
.map_err(|e| format!("Error computing checkpoint state root: {:?}", e))?;
if weak_subj_state_root != computed_state_root {
return Err(format!(
"Snapshot state root does not match block, expected: {:?}, got: {:?}",
weak_subj_state_root, computed_state_root
));
}
// Check that the checkpoint state is for the same network as the genesis state.
// This check doesn't do much for security but should prevent mistakes.
if weak_subj_state.genesis_validators_root() != genesis_state.genesis_validators_root() {
return Err(format!(
"Snapshot state appears to be from the wrong network. Genesis validators root \
is {:?} but should be {:?}",
weak_subj_state.genesis_validators_root(),
genesis_state.genesis_validators_root()
));
}
// Set the store's split point *before* storing genesis so that genesis is stored
// immediately in the freezer DB.
store.set_split(weak_subj_slot, weak_subj_state_root);
store
.store_split()
.map_err(|e| format!("Error storing DB split point: {:?}", e))?;
let (_, updated_builder) = self.set_genesis_state(genesis_state)?;
self = updated_builder;
store
.put_state(&weak_subj_state_root, &weak_subj_state)
.map_err(|e| format!("Failed to store weak subjectivity state: {:?}", e))?;
store
.put_block(&weak_subj_block_root, weak_subj_block.clone())
.map_err(|e| format!("Failed to store weak subjectivity block: {:?}", e))?;
// Store anchor info (context for weak subj sync).
store
.init_anchor_info(weak_subj_block.message())
.map_err(|e| format!("Failed to initialize anchor info: {:?}", e))?;
// Store pruning checkpoint to prevent attempting to prune before the anchor state.
store
.store_pruning_checkpoint(Checkpoint {
root: weak_subj_block_root,
epoch: weak_subj_state.slot().epoch(TEthSpec::slots_per_epoch()),
})
.map_err(|e| format!("Failed to write pruning checkpoint: {:?}", e))?;
let snapshot = BeaconSnapshot {
beacon_block_root: weak_subj_block_root,
beacon_block: weak_subj_block,
beacon_state: weak_subj_state,
};
let fc_store = BeaconForkChoiceStore::get_forkchoice_store(store, &snapshot);
let fork_choice = ForkChoice::from_anchor(
fc_store,
snapshot.beacon_block_root,
&snapshot.beacon_block,
&snapshot.beacon_state,
)
.map_err(|e| format!("Unable to initialize ForkChoice: {:?}", e))?;
self.fork_choice = Some(fork_choice);
self.genesis_time = Some(genesis.beacon_state.genesis_time());
Ok(self.empty_op_pool())
}
@@ -520,12 +644,13 @@ where
let fc_finalized = fork_choice.finalized_checkpoint();
let head_finalized = canonical_head.beacon_state.finalized_checkpoint();
if fc_finalized != head_finalized {
if head_finalized.root == Hash256::zero()
let is_genesis = head_finalized.root.is_zero()
&& head_finalized.epoch == fc_finalized.epoch
&& fc_finalized.root == genesis_block_root
{
// This is a legal edge-case encountered during genesis.
} else {
&& fc_finalized.root == genesis_block_root;
let is_wss = store.get_anchor_slot().map_or(false, |anchor_slot| {
fc_finalized.epoch == anchor_slot.epoch(TEthSpec::slots_per_epoch())
});
if !is_genesis && !is_wss {
return Err(format!(
"Database corrupt: fork choice is finalized at {:?} whilst head is finalized at \
{:?}",
@@ -654,6 +779,11 @@ where
"head_slot" => format!("{}", head.beacon_block.slot()),
);
// Check for states to reconstruct (in the background).
if beacon_chain.config.reconstruct_historic_states {
beacon_chain.store_migrator.process_reconstruction();
}
Ok(beacon_chain)
}
}

View File

@@ -12,6 +12,8 @@ pub struct ChainConfig {
///
/// If `None`, there is no weak subjectivity verification.
pub weak_subjectivity_checkpoint: Option<Checkpoint>,
/// Determine whether to reconstruct historic states, usually after a checkpoint sync.
pub reconstruct_historic_states: bool,
}
impl Default for ChainConfig {
@@ -19,6 +21,7 @@ impl Default for ChainConfig {
Self {
import_max_skip_slots: None,
weak_subjectivity_checkpoint: None,
reconstruct_historic_states: false,
}
}
}

View File

@@ -2,6 +2,7 @@ use crate::attester_cache::Error as AttesterCacheError;
use crate::beacon_chain::ForkChoiceError;
use crate::beacon_fork_choice_store::Error as ForkChoiceStoreError;
use crate::eth1_chain::Error as Eth1ChainError;
use crate::historical_blocks::HistoricalBlockError;
use crate::migrate::PruningError;
use crate::naive_aggregation_pool::Error as NaiveAggregationError;
use crate::observed_aggregates::Error as ObservedAttestationsError;
@@ -117,6 +118,7 @@ pub enum BeaconChainError {
block_slot: Slot,
state_slot: Slot,
},
HistoricalBlockError(HistoricalBlockError),
InvalidStateForShuffling {
state_epoch: Epoch,
shuffling_epoch: Epoch,
@@ -150,6 +152,7 @@ easy_from_to!(BlockSignatureVerifierError, BeaconChainError);
easy_from_to!(PruningError, BeaconChainError);
easy_from_to!(ArithError, BeaconChainError);
easy_from_to!(ForkChoiceStoreError, BeaconChainError);
easy_from_to!(HistoricalBlockError, BeaconChainError);
easy_from_to!(StateAdvanceError, BeaconChainError);
#[derive(Debug)]

View File

@@ -0,0 +1,195 @@
use crate::{errors::BeaconChainError as Error, BeaconChain, BeaconChainTypes};
use itertools::Itertools;
use slog::debug;
use state_processing::{
per_block_processing::ParallelSignatureSets,
signature_sets::{block_proposal_signature_set_from_parts, Error as SignatureSetError},
};
use std::borrow::Cow;
use std::iter;
use std::time::Duration;
use store::{chunked_vector::BlockRoots, AnchorInfo, ChunkWriter, KeyValueStore};
use types::{Hash256, SignedBeaconBlock, Slot};
/// Use a longer timeout on the pubkey cache.
///
/// It's ok if historical sync is stalled due to writes from forwards block processing.
const PUBKEY_CACHE_LOCK_TIMEOUT: Duration = Duration::from_secs(30);
#[derive(Debug)]
pub enum HistoricalBlockError {
/// Block is not available (only returned when fetching historic blocks).
BlockOutOfRange { slot: Slot, oldest_block_slot: Slot },
/// Block root mismatch, caller should retry with different blocks.
MismatchedBlockRoot {
block_root: Hash256,
expected_block_root: Hash256,
},
/// Bad signature, caller should retry with different blocks.
SignatureSet(SignatureSetError),
/// Bad signature, caller should retry with different blocks.
InvalidSignature,
/// Transitory error, caller should retry with the same blocks.
ValidatorPubkeyCacheTimeout,
/// No historical sync needed.
NoAnchorInfo,
/// Logic error: should never occur.
IndexOutOfBounds,
}
impl<T: BeaconChainTypes> BeaconChain<T> {
/// Store a batch of historical blocks in the database.
///
/// The `blocks` should be given in slot-ascending order. One of the blocks should have a block
/// root corresponding to the `oldest_block_parent` from the store's `AnchorInfo`.
///
/// The block roots and proposer signatures are verified. If any block doesn't match the parent
/// root listed in its successor, then the whole batch will be discarded and
/// `MismatchedBlockRoot` will be returned. If any proposer signature is invalid then
/// `SignatureSetError` or `InvalidSignature` will be returned.
///
/// To align with sync we allow some excess blocks with slots greater than or equal to
/// `oldest_block_slot` to be provided. They will be ignored without being checked.
///
/// This function should not be called concurrently with any other function that mutates
/// the anchor info (including this function itself). If a concurrent mutation occurs that
/// would violate consistency then an `AnchorInfoConcurrentMutation` error will be returned.
///
/// Return the number of blocks successfully imported.
pub fn import_historical_block_batch(
&self,
blocks: &[SignedBeaconBlock<T::EthSpec>],
) -> Result<usize, Error> {
let anchor_info = self
.store
.get_anchor_info()
.ok_or(HistoricalBlockError::NoAnchorInfo)?;
// Take all blocks with slots less than the oldest block slot.
let num_relevant =
blocks.partition_point(|block| block.slot() < anchor_info.oldest_block_slot);
let blocks_to_import = &blocks
.get(..num_relevant)
.ok_or(HistoricalBlockError::IndexOutOfBounds)?;
if blocks_to_import.len() != blocks.len() {
debug!(
self.log,
"Ignoring some historic blocks";
"oldest_block_slot" => anchor_info.oldest_block_slot,
"total_blocks" => blocks.len(),
"ignored" => blocks.len().saturating_sub(blocks_to_import.len()),
);
}
if blocks_to_import.is_empty() {
return Ok(0);
}
let mut expected_block_root = anchor_info.oldest_block_parent;
let mut prev_block_slot = anchor_info.oldest_block_slot;
let mut chunk_writer =
ChunkWriter::<BlockRoots, _, _>::new(&self.store.cold_db, prev_block_slot.as_usize())?;
let mut cold_batch = Vec::with_capacity(blocks.len());
let mut hot_batch = Vec::with_capacity(blocks.len());
for block in blocks_to_import.iter().rev() {
// Check chain integrity.
let block_root = block.canonical_root();
if block_root != expected_block_root {
return Err(HistoricalBlockError::MismatchedBlockRoot {
block_root,
expected_block_root,
}
.into());
}
// Store block in the hot database.
hot_batch.push(self.store.block_as_kv_store_op(&block_root, block));
// Store block roots, including at all skip slots in the freezer DB.
for slot in (block.slot().as_usize()..prev_block_slot.as_usize()).rev() {
chunk_writer.set(slot, block_root, &mut cold_batch)?;
}
prev_block_slot = block.slot();
expected_block_root = block.message().parent_root();
// If we've reached genesis, add the genesis block root to the batch and set the
// anchor slot to 0 to indicate completion.
if expected_block_root == self.genesis_block_root {
let genesis_slot = self.spec.genesis_slot;
chunk_writer.set(
genesis_slot.as_usize(),
self.genesis_block_root,
&mut cold_batch,
)?;
prev_block_slot = genesis_slot;
expected_block_root = Hash256::zero();
break;
}
}
chunk_writer.write(&mut cold_batch)?;
// Verify signatures in one batch, holding the pubkey cache lock for the shortest duration
// possible. For each block fetch the parent root from its successor. Slicing from index 1
// is safe because we've already checked that `blocks_to_import` is non-empty.
let pubkey_cache = self
.validator_pubkey_cache
.try_read_for(PUBKEY_CACHE_LOCK_TIMEOUT)
.ok_or(HistoricalBlockError::ValidatorPubkeyCacheTimeout)?;
let block_roots = blocks_to_import
.get(1..)
.ok_or(HistoricalBlockError::IndexOutOfBounds)?
.iter()
.map(|block| block.parent_root())
.chain(iter::once(anchor_info.oldest_block_parent));
let signature_set = blocks_to_import
.iter()
.zip_eq(block_roots)
.map(|(block, block_root)| {
block_proposal_signature_set_from_parts(
block,
Some(block_root),
block.message().proposer_index(),
&self.spec.fork_at_epoch(block.message().epoch()),
self.genesis_validators_root,
|validator_index| pubkey_cache.get(validator_index).map(Cow::Borrowed),
&self.spec,
)
})
.collect::<Result<Vec<_>, _>>()
.map_err(HistoricalBlockError::SignatureSet)
.map(ParallelSignatureSets::from)?;
if !signature_set.verify() {
return Err(HistoricalBlockError::InvalidSignature.into());
}
drop(pubkey_cache);
// Write the I/O batches to disk, writing the blocks themselves first, as it's better
// for the hot DB to contain extra blocks than for the cold DB to point to blocks that
// do not exist.
self.store.hot_db.do_atomically(hot_batch)?;
self.store.cold_db.do_atomically(cold_batch)?;
// Update the anchor.
let new_anchor = AnchorInfo {
oldest_block_slot: prev_block_slot,
oldest_block_parent: expected_block_root,
..anchor_info
};
let backfill_complete = new_anchor.block_backfill_complete();
self.store
.compare_and_set_anchor_info(Some(anchor_info), Some(new_anchor))?;
// If backfill has completed and the chain is configured to reconstruct historic states,
// send a message to the background migrator instructing it to begin reconstruction.
if backfill_complete && self.config.reconstruct_historic_states {
self.store_migrator.process_reconstruction();
}
Ok(blocks_to_import.len())
}
}

View File

@@ -13,6 +13,7 @@ pub mod eth1_chain;
pub mod events;
pub mod fork_revert;
mod head_tracker;
pub mod historical_blocks;
mod metrics;
pub mod migrate;
mod naive_aggregation_pool;
@@ -39,6 +40,7 @@ pub use self::beacon_chain::{
pub use self::beacon_snapshot::BeaconSnapshot;
pub use self::chain_config::ChainConfig;
pub use self::errors::{BeaconChainError, BlockProductionError};
pub use self::historical_blocks::HistoricalBlockError;
pub use attestation_verification::Error as AttestationError;
pub use beacon_fork_choice_store::{BeaconForkChoiceStore, Error as ForkChoiceStoreError};
pub use block_verification::{BlockError, GossipVerifiedBlock};

View File

@@ -30,7 +30,7 @@ const COMPACTION_FINALITY_DISTANCE: u64 = 1024;
pub struct BackgroundMigrator<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> {
db: Arc<HotColdDB<E, Hot, Cold>>,
#[allow(clippy::type_complexity)]
tx_thread: Option<Mutex<(mpsc::Sender<MigrationNotification>, thread::JoinHandle<()>)>>,
tx_thread: Option<Mutex<(mpsc::Sender<Notification>, thread::JoinHandle<()>)>>,
/// Genesis block root, for persisting the `PersistedBeaconChain`.
genesis_block_root: Hash256,
log: Logger,
@@ -73,7 +73,12 @@ pub enum PruningError {
}
/// Message sent to the migration thread containing the information it needs to run.
pub struct MigrationNotification {
pub enum Notification {
Finalization(FinalizationNotification),
Reconstruction,
}
pub struct FinalizationNotification {
finalized_state_root: BeaconStateHash,
finalized_checkpoint: Checkpoint,
head_tracker: Arc<HeadTracker>,
@@ -112,13 +117,46 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
finalized_checkpoint: Checkpoint,
head_tracker: Arc<HeadTracker>,
) -> Result<(), BeaconChainError> {
let notif = MigrationNotification {
let notif = FinalizationNotification {
finalized_state_root,
finalized_checkpoint,
head_tracker,
genesis_block_root: self.genesis_block_root,
};
// Send to background thread if configured, otherwise run in foreground.
if let Some(Notification::Finalization(notif)) =
self.send_background_notification(Notification::Finalization(notif))
{
Self::run_migration(self.db.clone(), notif, &self.log);
}
Ok(())
}
pub fn process_reconstruction(&self) {
if let Some(Notification::Reconstruction) =
self.send_background_notification(Notification::Reconstruction)
{
Self::run_reconstruction(self.db.clone(), &self.log);
}
}
pub fn run_reconstruction(db: Arc<HotColdDB<E, Hot, Cold>>, log: &Logger) {
if let Err(e) = db.reconstruct_historic_states() {
error!(
log,
"State reconstruction failed";
"error" => ?e,
);
}
}
/// If configured to run in the background, send `notif` to the background thread.
///
/// Return `None` if the message was sent to the background thread, `Some(notif)` otherwise.
#[must_use = "Message is not processed when this function returns `Some`"]
fn send_background_notification(&self, notif: Notification) -> Option<Notification> {
// Async path, on the background thread.
if let Some(tx_thread) = &self.tx_thread {
let (ref mut tx, ref mut thread) = *tx_thread.lock();
@@ -143,17 +181,21 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
// Retry at most once, we could recurse but that would risk overflowing the stack.
let _ = tx.send(tx_err.0);
}
}
None
// Synchronous path, on the current thread.
else {
Self::run_migration(self.db.clone(), notif, &self.log)
} else {
Some(notif)
}
Ok(())
}
/// Perform the actual work of `process_finalization`.
fn run_migration(db: Arc<HotColdDB<E, Hot, Cold>>, notif: MigrationNotification, log: &Logger) {
fn run_migration(
db: Arc<HotColdDB<E, Hot, Cold>>,
notif: FinalizationNotification,
log: &Logger,
) {
debug!(log, "Database consolidation started");
let finalized_state_root = notif.finalized_state_root;
let finalized_state = match db.get_state(&finalized_state_root.into(), None) {
@@ -223,31 +265,44 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
) {
warn!(log, "Database compaction failed"; "error" => format!("{:?}", e));
}
debug!(log, "Database consolidation complete");
}
/// Spawn a new child thread to run the migration process.
///
/// Return a channel handle for sending new finalized states to the thread.
/// Return a channel handle for sending requests to the thread.
fn spawn_thread(
db: Arc<HotColdDB<E, Hot, Cold>>,
log: Logger,
) -> (mpsc::Sender<MigrationNotification>, thread::JoinHandle<()>) {
) -> (mpsc::Sender<Notification>, thread::JoinHandle<()>) {
let (tx, rx) = mpsc::channel();
let thread = thread::spawn(move || {
while let Ok(notif) = rx.recv() {
// Read the rest of the messages in the channel, ultimately choosing the `notif`
// with the highest finalized epoch.
let notif = rx
.try_iter()
.fold(notif, |best, other: MigrationNotification| {
if other.finalized_checkpoint.epoch > best.finalized_checkpoint.epoch {
other
} else {
best
}
});
// Read the rest of the messages in the channel, preferring any reconstruction
// notification, or the finalization notification with the greatest finalized epoch.
let notif =
rx.try_iter()
.fold(notif, |best, other: Notification| match (&best, &other) {
(Notification::Reconstruction, _)
| (_, Notification::Reconstruction) => Notification::Reconstruction,
(
Notification::Finalization(fin1),
Notification::Finalization(fin2),
) => {
if fin2.finalized_checkpoint.epoch > fin1.finalized_checkpoint.epoch
{
other
} else {
best
}
}
});
Self::run_migration(db.clone(), notif, &log);
match notif {
Notification::Reconstruction => Self::run_reconstruction(db.clone(), &log),
Notification::Finalization(fin) => Self::run_migration(db.clone(), fin, &log),
}
}
});
(tx, thread)

View File

@@ -2,12 +2,15 @@
use crate::beacon_chain::{BeaconChainTypes, OP_POOL_DB_KEY};
use crate::validator_pubkey_cache::ValidatorPubkeyCache;
use operation_pool::{PersistedOperationPool, PersistedOperationPoolBase};
use ssz::{Decode, Encode};
use ssz_derive::{Decode, Encode};
use std::fs;
use std::path::Path;
use std::sync::Arc;
use store::config::OnDiskStoreConfig;
use store::hot_cold_store::{HotColdDB, HotColdDBError};
use store::metadata::{SchemaVersion, CURRENT_SCHEMA_VERSION};
use store::Error as StoreError;
use store::metadata::{SchemaVersion, CONFIG_KEY, CURRENT_SCHEMA_VERSION};
use store::{DBColumn, Error as StoreError, ItemStore, StoreItem};
const PUBKEY_CACHE_FILENAME: &str = "pubkey_cache.ssz";
@@ -73,6 +76,23 @@ pub fn migrate_schema<T: BeaconChainTypes>(
Ok(())
}
// Migration for weak subjectivity sync support and clean up of `OnDiskStoreConfig` (#1784).
(SchemaVersion(4), SchemaVersion(5)) => {
if let Some(OnDiskStoreConfigV4 {
slots_per_restore_point,
..
}) = db.hot_db.get(&CONFIG_KEY)?
{
let new_config = OnDiskStoreConfig {
slots_per_restore_point,
};
db.hot_db.put(&CONFIG_KEY, &new_config)?;
}
db.store_schema_version(to)?;
Ok(())
}
// Anything else is an error.
(_, _) => Err(HotColdDBError::UnsupportedSchemaVersion {
target_version: to,
@@ -81,3 +101,24 @@ pub fn migrate_schema<T: BeaconChainTypes>(
.into()),
}
}
// Store config used in v4 schema and earlier.
#[derive(Debug, Clone, PartialEq, Eq, Encode, Decode)]
pub struct OnDiskStoreConfigV4 {
pub slots_per_restore_point: u64,
pub _block_cache_size: usize,
}
impl StoreItem for OnDiskStoreConfigV4 {
fn db_column() -> DBColumn {
DBColumn::BeaconMeta
}
fn as_store_bytes(&self) -> Vec<u8> {
self.as_ssz_bytes()
}
fn from_store_bytes(bytes: &[u8]) -> Result<Self, StoreError> {
Ok(Self::from_ssz_bytes(bytes)?)
}
}

View File

@@ -12,7 +12,7 @@ use crate::{
};
use bls::get_withdrawal_credentials;
use futures::channel::mpsc::Receiver;
use genesis::interop_genesis_state;
pub use genesis::interop_genesis_state;
use int_to_bytes::int_to_bytes32;
use merkle_proof::MerkleTree;
use parking_lot::Mutex;