mirror of
https://github.com/sigp/lighthouse.git
synced 2026-07-03 04:44:28 +00:00
Gradual state reconstruction
Co-authored-by: Michael Sproul <michael@sigmaprime.io>
This commit is contained in:
committed by
Michael Sproul
parent
2f6ffff8d6
commit
481e792898
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -569,6 +569,7 @@ version = "0.2.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"bitvec 0.20.4",
|
"bitvec 0.20.4",
|
||||||
"bls",
|
"bls",
|
||||||
|
"crossbeam-channel",
|
||||||
"derivative",
|
"derivative",
|
||||||
"environment",
|
"environment",
|
||||||
"eth1",
|
"eth1",
|
||||||
|
|||||||
@@ -64,6 +64,7 @@ hex = "0.4.2"
|
|||||||
exit-future = "0.2.0"
|
exit-future = "0.2.0"
|
||||||
unused_port = {path = "../../common/unused_port"}
|
unused_port = {path = "../../common/unused_port"}
|
||||||
oneshot_broadcast = { path = "../../common/oneshot_broadcast" }
|
oneshot_broadcast = { path = "../../common/oneshot_broadcast" }
|
||||||
|
crossbeam-channel = "0.5.5"
|
||||||
|
|
||||||
[[test]]
|
[[test]]
|
||||||
name = "beacon_chain_tests"
|
name = "beacon_chain_tests"
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
use slog::{debug, error, info, trace, warn, Logger};
|
use slog::{debug, error, info, trace, warn, Logger};
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::sync::{mpsc, Arc};
|
use std::sync::Arc;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
use store::hot_cold_store::{migrate_database, HotColdDBError};
|
use store::hot_cold_store::{migrate_database, HotColdDBError};
|
||||||
@@ -25,6 +25,7 @@ const MAX_COMPACTION_PERIOD_SECONDS: u64 = 604800;
|
|||||||
const MIN_COMPACTION_PERIOD_SECONDS: u64 = 7200;
|
const MIN_COMPACTION_PERIOD_SECONDS: u64 = 7200;
|
||||||
/// Compact after a large finality gap, if we respect `MIN_COMPACTION_PERIOD_SECONDS`.
|
/// Compact after a large finality gap, if we respect `MIN_COMPACTION_PERIOD_SECONDS`.
|
||||||
const COMPACTION_FINALITY_DISTANCE: u64 = 1024;
|
const COMPACTION_FINALITY_DISTANCE: u64 = 1024;
|
||||||
|
const BLOCKS_PER_RECONSTRUCTION: usize = 8192 * 4;
|
||||||
|
|
||||||
/// Default number of epochs to wait between finalization migrations.
|
/// Default number of epochs to wait between finalization migrations.
|
||||||
pub const DEFAULT_EPOCHS_PER_RUN: u64 = 4;
|
pub const DEFAULT_EPOCHS_PER_RUN: u64 = 4;
|
||||||
@@ -33,10 +34,14 @@ pub const DEFAULT_EPOCHS_PER_RUN: u64 = 4;
|
|||||||
/// to the cold database.
|
/// to the cold database.
|
||||||
pub struct BackgroundMigrator<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> {
|
pub struct BackgroundMigrator<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> {
|
||||||
db: Arc<HotColdDB<E, Hot, Cold>>,
|
db: Arc<HotColdDB<E, Hot, Cold>>,
|
||||||
#[allow(clippy::type_complexity)]
|
|
||||||
tx_thread: Option<Mutex<(mpsc::Sender<Notification>, thread::JoinHandle<()>)>>,
|
|
||||||
/// Record of when the last migration ran, for enforcing `epochs_per_run`.
|
/// Record of when the last migration ran, for enforcing `epochs_per_run`.
|
||||||
prev_migration: Arc<Mutex<PrevMigration>>,
|
prev_migration: Arc<Mutex<PrevMigration>>,
|
||||||
|
tx_thread: Option<
|
||||||
|
Mutex<(
|
||||||
|
crossbeam_channel::Sender<Notification>,
|
||||||
|
thread::JoinHandle<()>,
|
||||||
|
)>,
|
||||||
|
>,
|
||||||
/// Genesis block root, for persisting the `PersistedBeaconChain`.
|
/// Genesis block root, for persisting the `PersistedBeaconChain`.
|
||||||
genesis_block_root: Hash256,
|
genesis_block_root: Hash256,
|
||||||
log: Logger,
|
log: Logger,
|
||||||
@@ -112,11 +117,13 @@ pub enum PruningError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Message sent to the migration thread containing the information it needs to run.
|
/// Message sent to the migration thread containing the information it needs to run.
|
||||||
|
#[derive(Debug)]
|
||||||
pub enum Notification {
|
pub enum Notification {
|
||||||
Finalization(FinalizationNotification),
|
Finalization(FinalizationNotification),
|
||||||
Reconstruction,
|
Reconstruction,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
pub struct FinalizationNotification {
|
pub struct FinalizationNotification {
|
||||||
finalized_state_root: BeaconStateHash,
|
finalized_state_root: BeaconStateHash,
|
||||||
finalized_checkpoint: Checkpoint,
|
finalized_checkpoint: Checkpoint,
|
||||||
@@ -203,7 +210,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn run_reconstruction(db: Arc<HotColdDB<E, Hot, Cold>>, log: &Logger) {
|
pub fn run_reconstruction(db: Arc<HotColdDB<E, Hot, Cold>>, log: &Logger) {
|
||||||
if let Err(e) = db.reconstruct_historic_states() {
|
if let Err(e) = db.reconstruct_historic_states(None) {
|
||||||
error!(
|
error!(
|
||||||
log,
|
log,
|
||||||
"State reconstruction failed";
|
"State reconstruction failed";
|
||||||
@@ -359,39 +366,83 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
|||||||
db: Arc<HotColdDB<E, Hot, Cold>>,
|
db: Arc<HotColdDB<E, Hot, Cold>>,
|
||||||
prev_migration: Arc<Mutex<PrevMigration>>,
|
prev_migration: Arc<Mutex<PrevMigration>>,
|
||||||
log: Logger,
|
log: Logger,
|
||||||
) -> (mpsc::Sender<Notification>, thread::JoinHandle<()>) {
|
) -> (
|
||||||
let (tx, rx) = mpsc::channel();
|
crossbeam_channel::Sender<Notification>,
|
||||||
|
thread::JoinHandle<()>,
|
||||||
|
) {
|
||||||
|
let (tx, rx) = crossbeam_channel::unbounded();
|
||||||
|
let tx_thread = tx.clone();
|
||||||
let thread = thread::spawn(move || {
|
let thread = thread::spawn(move || {
|
||||||
while let Ok(notif) = rx.recv() {
|
let mut sel = crossbeam_channel::Select::new();
|
||||||
// Read the rest of the messages in the channel, preferring any reconstruction
|
sel.recv(&rx);
|
||||||
// notification, or the finalization notification with the greatest finalized epoch.
|
|
||||||
let notif =
|
loop {
|
||||||
rx.try_iter()
|
// Block until sth is in queue
|
||||||
.fold(notif, |best, other: Notification| match (&best, &other) {
|
let _queue_size = sel.ready();
|
||||||
(Notification::Reconstruction, _)
|
let queue: Vec<Notification> = rx.try_iter().collect();
|
||||||
| (_, Notification::Reconstruction) => Notification::Reconstruction,
|
debug!(
|
||||||
(
|
log,
|
||||||
Notification::Finalization(fin1),
|
"New worker thread poll";
|
||||||
Notification::Finalization(fin2),
|
"queue" => ?queue
|
||||||
) => {
|
);
|
||||||
if fin2.finalized_checkpoint.epoch > fin1.finalized_checkpoint.epoch
|
|
||||||
{
|
// Find a reconstruction notification and best finalization notification.
|
||||||
other
|
let reconstruction_notif = queue
|
||||||
} else {
|
.iter()
|
||||||
best
|
.find(|n| matches!(n, Notification::Reconstruction));
|
||||||
}
|
let migrate_notif = queue
|
||||||
}
|
.iter()
|
||||||
});
|
.filter_map(|n| match n {
|
||||||
|
// should not be present anymore
|
||||||
|
Notification::Reconstruction => None,
|
||||||
|
Notification::Finalization(f) => Some(f),
|
||||||
|
})
|
||||||
|
.max_by_key(|f| f.finalized_checkpoint.epoch);
|
||||||
|
|
||||||
|
// Do a bit of state reconstruction first if required.
|
||||||
|
if let Some(_) = reconstruction_notif {
|
||||||
|
let timer = std::time::Instant::now();
|
||||||
|
|
||||||
|
match db.reconstruct_historic_states(Some(BLOCKS_PER_RECONSTRUCTION)) {
|
||||||
|
Err(Error::StateReconstructionDidNotComplete) => {
|
||||||
|
info!(
|
||||||
|
log,
|
||||||
|
"Finished reconstruction batch";
|
||||||
|
"batch_time_ms" => timer.elapsed().as_millis()
|
||||||
|
);
|
||||||
|
// Handle send error
|
||||||
|
let _ = tx_thread.send(Notification::Reconstruction);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!(
|
||||||
|
log,
|
||||||
|
"State reconstruction failed";
|
||||||
|
"error" => ?e,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(()) => {
|
||||||
|
info!(
|
||||||
|
log,
|
||||||
|
"Finished state reconstruction";
|
||||||
|
"batch_time_ms" => timer.elapsed().as_millis()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do the finalization migration.
|
||||||
|
if let Some(notif) = migrate_notif {
|
||||||
|
let timer = std::time::Instant::now();
|
||||||
|
|
||||||
// Do not run too frequently.
|
|
||||||
if let Some(epoch) = notif.epoch() {
|
|
||||||
let mut prev_migration = prev_migration.lock();
|
let mut prev_migration = prev_migration.lock();
|
||||||
|
|
||||||
|
// Do not run too frequently.
|
||||||
|
let epoch = notif.finalized_checkpoint.epoch;
|
||||||
if let Some(prev_epoch) = prev_migration.epoch {
|
if let Some(prev_epoch) = prev_migration.epoch {
|
||||||
if epoch < prev_epoch + prev_migration.epochs_per_run {
|
if epoch < prev_epoch + prev_migration.epochs_per_run {
|
||||||
debug!(
|
debug!(
|
||||||
log,
|
log,
|
||||||
"Database consolidation deferred";
|
"Finalization migration deferred";
|
||||||
"last_finalized_epoch" => prev_epoch,
|
"last_finalized_epoch" => prev_epoch,
|
||||||
"new_finalized_epoch" => epoch,
|
"new_finalized_epoch" => epoch,
|
||||||
"epochs_per_run" => prev_migration.epochs_per_run,
|
"epochs_per_run" => prev_migration.epochs_per_run,
|
||||||
@@ -404,11 +455,14 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
|||||||
// at which we ran. This value isn't tracked on disk so we will always migrate
|
// at which we ran. This value isn't tracked on disk so we will always migrate
|
||||||
// on the first finalization after startup.
|
// on the first finalization after startup.
|
||||||
prev_migration.epoch = Some(epoch);
|
prev_migration.epoch = Some(epoch);
|
||||||
}
|
|
||||||
|
|
||||||
match notif {
|
Self::run_migration(db.clone(), notif.to_owned(), &log);
|
||||||
Notification::Reconstruction => Self::run_reconstruction(db.clone(), &log),
|
|
||||||
Notification::Finalization(fin) => Self::run_migration(db.clone(), fin, &log),
|
info!(
|
||||||
|
log,
|
||||||
|
"Finished finalization migration";
|
||||||
|
"running_time_ms" => timer.elapsed().as_millis()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -534,6 +588,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
|
|||||||
|
|
||||||
for maybe_tuple in iter {
|
for maybe_tuple in iter {
|
||||||
let (block_root, state_root, slot) = maybe_tuple?;
|
let (block_root, state_root, slot) = maybe_tuple?;
|
||||||
|
|
||||||
let block_root = SignedBeaconBlockHash::from(block_root);
|
let block_root = SignedBeaconBlockHash::from(block_root);
|
||||||
let state_root = BeaconStateHash::from(state_root);
|
let state_root = BeaconStateHash::from(state_root);
|
||||||
|
|
||||||
|
|||||||
@@ -444,7 +444,10 @@ fn range_query<S: KeyValueStore<E>, E: EthSpec, T: Decode + Encode>(
|
|||||||
|
|
||||||
for chunk_index in range {
|
for chunk_index in range {
|
||||||
let key = &chunk_key(chunk_index)[..];
|
let key = &chunk_key(chunk_index)[..];
|
||||||
let chunk = Chunk::load(store, column, key)?.ok_or(ChunkError::Missing { chunk_index })?;
|
let chunk = Chunk::load(store, column, key)?.ok_or(ChunkError::Missing {
|
||||||
|
column,
|
||||||
|
chunk_index,
|
||||||
|
})?;
|
||||||
result.push(chunk);
|
result.push(chunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -675,6 +678,7 @@ pub enum ChunkError {
|
|||||||
actual: usize,
|
actual: usize,
|
||||||
},
|
},
|
||||||
Missing {
|
Missing {
|
||||||
|
column: DBColumn,
|
||||||
chunk_index: usize,
|
chunk_index: usize,
|
||||||
},
|
},
|
||||||
MissingGenesisValue,
|
MissingGenesisValue,
|
||||||
|
|||||||
@@ -16,7 +16,10 @@ where
|
|||||||
Hot: KeyValueStore<E> + ItemStore<E>,
|
Hot: KeyValueStore<E> + ItemStore<E>,
|
||||||
Cold: KeyValueStore<E> + ItemStore<E>,
|
Cold: KeyValueStore<E> + ItemStore<E>,
|
||||||
{
|
{
|
||||||
pub fn reconstruct_historic_states(self: &Arc<Self>) -> Result<(), Error> {
|
pub fn reconstruct_historic_states(
|
||||||
|
self: &Arc<Self>,
|
||||||
|
num_blocks: Option<usize>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
let mut anchor = if let Some(anchor) = self.get_anchor_info() {
|
let mut anchor = if let Some(anchor) = self.get_anchor_info() {
|
||||||
anchor
|
anchor
|
||||||
} else {
|
} else {
|
||||||
@@ -48,12 +51,15 @@ where
|
|||||||
// Use a dummy root, as we never read the block for the upper limit state.
|
// Use a dummy root, as we never read the block for the upper limit state.
|
||||||
let upper_limit_block_root = Hash256::repeat_byte(0xff);
|
let upper_limit_block_root = Hash256::repeat_byte(0xff);
|
||||||
|
|
||||||
let block_root_iter = self.forwards_block_roots_iterator(
|
// If `num_blocks` is not specified iterate all blocks.
|
||||||
|
let block_root_iter = self
|
||||||
|
.forwards_block_roots_iterator(
|
||||||
lower_limit_slot,
|
lower_limit_slot,
|
||||||
upper_limit_state,
|
upper_limit_state,
|
||||||
upper_limit_block_root,
|
upper_limit_block_root,
|
||||||
&self.spec,
|
&self.spec,
|
||||||
)?;
|
)?
|
||||||
|
.take(num_blocks.unwrap_or(usize::MAX));
|
||||||
|
|
||||||
// The state to be advanced.
|
// The state to be advanced.
|
||||||
let mut state = self
|
let mut state = self
|
||||||
|
|||||||
Reference in New Issue
Block a user