mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-09 19:51:47 +00:00
Implement checkpoint sync (#2244)
## Issue Addressed Closes #1891 Closes #1784 ## Proposed Changes Implement checkpoint sync for Lighthouse, enabling it to start from a weak subjectivity checkpoint. ## Additional Info - [x] Return unavailable status for out-of-range blocks requested by peers (#2561) - [x] Implement sync daemon for fetching historical blocks (#2561) - [x] Verify chain hashes (either in `historical_blocks.rs` or the calling module) - [x] Consistency check for initial block + state - [x] Fetch the initial state and block from a beacon node HTTP endpoint - [x] Don't crash fetching beacon states by slot from the API - [x] Background service for state reconstruction, triggered by CLI flag or API call. Considered out of scope for this PR: - Drop the requirement to provide the `--checkpoint-block` (this would require some pretty heavy refactoring of block verification) Co-authored-by: Diva M <divma@protonmail.com>
This commit is contained in:
@@ -12,6 +12,10 @@ use beacon_chain::{
|
||||
};
|
||||
use environment::RuntimeContext;
|
||||
use eth1::{Config as Eth1Config, Service as Eth1Service};
|
||||
use eth2::{
|
||||
types::{BlockId, StateId},
|
||||
BeaconNodeHttpClient, Error as ApiError, Timeouts,
|
||||
};
|
||||
use eth2_libp2p::NetworkGlobals;
|
||||
use genesis::{interop_genesis_state, Eth1GenesisService};
|
||||
use monitoring_api::{MonitoringHttpClient, ProcessType};
|
||||
@@ -25,11 +29,16 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use timer::spawn_timer;
|
||||
use tokio::sync::{mpsc::UnboundedSender, oneshot};
|
||||
use types::{test_utils::generate_deterministic_keypairs, BeaconState, ChainSpec, EthSpec};
|
||||
use types::{
|
||||
test_utils::generate_deterministic_keypairs, BeaconState, ChainSpec, EthSpec, SignedBeaconBlock,
|
||||
};
|
||||
|
||||
/// Interval between polling the eth1 node for genesis information.
|
||||
pub const ETH1_GENESIS_UPDATE_INTERVAL_MILLIS: u64 = 7_000;
|
||||
|
||||
/// Timeout for checkpoint sync HTTP requests.
|
||||
pub const CHECKPOINT_SYNC_HTTP_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
/// Builds a `Client` instance.
|
||||
///
|
||||
/// ## Notes
|
||||
@@ -168,11 +177,22 @@ where
|
||||
//
|
||||
// Alternatively, if there's a beacon chain in the database then always resume
|
||||
// using it.
|
||||
let client_genesis = if client_genesis == ClientGenesis::FromStore && !chain_exists {
|
||||
let client_genesis = if matches!(client_genesis, ClientGenesis::FromStore) && !chain_exists
|
||||
{
|
||||
info!(context.log(), "Defaulting to deposit contract genesis");
|
||||
|
||||
ClientGenesis::DepositContract
|
||||
} else if chain_exists {
|
||||
if matches!(client_genesis, ClientGenesis::WeakSubjSszBytes { .. })
|
||||
|| matches!(client_genesis, ClientGenesis::CheckpointSyncUrl { .. })
|
||||
{
|
||||
info!(
|
||||
context.log(),
|
||||
"Refusing to checkpoint sync";
|
||||
"msg" => "database already exists, use --purge-db to force checkpoint sync"
|
||||
);
|
||||
}
|
||||
|
||||
ClientGenesis::FromStore
|
||||
} else {
|
||||
client_genesis
|
||||
@@ -200,6 +220,103 @@ where
|
||||
|
||||
builder.genesis_state(genesis_state).map(|v| (v, None))?
|
||||
}
|
||||
ClientGenesis::WeakSubjSszBytes {
|
||||
anchor_state_bytes,
|
||||
anchor_block_bytes,
|
||||
genesis_state_bytes,
|
||||
} => {
|
||||
info!(context.log(), "Starting checkpoint sync");
|
||||
|
||||
let anchor_state = BeaconState::from_ssz_bytes(&anchor_state_bytes, &spec)
|
||||
.map_err(|e| format!("Unable to parse weak subj state SSZ: {:?}", e))?;
|
||||
let anchor_block = SignedBeaconBlock::from_ssz_bytes(&anchor_block_bytes, &spec)
|
||||
.map_err(|e| format!("Unable to parse weak subj block SSZ: {:?}", e))?;
|
||||
let genesis_state = BeaconState::from_ssz_bytes(&genesis_state_bytes, &spec)
|
||||
.map_err(|e| format!("Unable to parse genesis state SSZ: {:?}", e))?;
|
||||
|
||||
builder
|
||||
.weak_subjectivity_state(anchor_state, anchor_block, genesis_state)
|
||||
.map(|v| (v, None))?
|
||||
}
|
||||
ClientGenesis::CheckpointSyncUrl {
|
||||
genesis_state_bytes,
|
||||
url,
|
||||
} => {
|
||||
info!(
|
||||
context.log(),
|
||||
"Starting checkpoint sync";
|
||||
"remote_url" => %url,
|
||||
);
|
||||
|
||||
let remote =
|
||||
BeaconNodeHttpClient::new(url, Timeouts::set_all(CHECKPOINT_SYNC_HTTP_TIMEOUT));
|
||||
let slots_per_epoch = TEthSpec::slots_per_epoch();
|
||||
|
||||
// Find a suitable finalized block on an epoch boundary.
|
||||
let mut block = remote
|
||||
.get_beacon_blocks_ssz::<TEthSpec>(BlockId::Finalized, &spec)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
ApiError::InvalidSsz(e) => format!(
|
||||
"Unable to parse SSZ: {:?}. Ensure the checkpoint-sync-url refers to a \
|
||||
node for the correct network",
|
||||
e
|
||||
),
|
||||
e => format!("Error fetching finalized block from remote: {:?}", e),
|
||||
})?
|
||||
.ok_or("Finalized block missing from remote, it returned 404")?;
|
||||
|
||||
let mut block_slot = block.slot();
|
||||
|
||||
while block.slot() % slots_per_epoch != 0 {
|
||||
block_slot = (block_slot / slots_per_epoch - 1) * slots_per_epoch;
|
||||
|
||||
debug!(
|
||||
context.log(),
|
||||
"Searching for aligned checkpoint block";
|
||||
"block_slot" => block_slot,
|
||||
);
|
||||
|
||||
if let Some(found_block) = remote
|
||||
.get_beacon_blocks_ssz::<TEthSpec>(BlockId::Slot(block_slot), &spec)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
format!("Error fetching block at slot {}: {:?}", block_slot, e)
|
||||
})?
|
||||
{
|
||||
block = found_block;
|
||||
}
|
||||
}
|
||||
|
||||
let state_root = block.state_root();
|
||||
let state = remote
|
||||
.get_debug_beacon_states_ssz::<TEthSpec>(StateId::Root(state_root), &spec)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
format!(
|
||||
"Error loading checkpoint state from remote {:?}: {:?}",
|
||||
state_root, e
|
||||
)
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
format!("Checkpoint state missing from remote: {:?}", state_root)
|
||||
})?;
|
||||
|
||||
let genesis_state = BeaconState::from_ssz_bytes(&genesis_state_bytes, &spec)
|
||||
.map_err(|e| format!("Unable to parse genesis state SSZ: {:?}", e))?;
|
||||
|
||||
info!(
|
||||
context.log(),
|
||||
"Loaded checkpoint block and state";
|
||||
"slot" => block.slot(),
|
||||
"block_root" => ?block.canonical_root(),
|
||||
"state_root" => ?state_root,
|
||||
);
|
||||
|
||||
builder
|
||||
.weak_subjectivity_state(state, block, genesis_state)
|
||||
.map(|v| (v, None))?
|
||||
}
|
||||
ClientGenesis::DepositContract => {
|
||||
info!(
|
||||
context.log(),
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use directory::DEFAULT_ROOT_DIR;
|
||||
use network::NetworkConfig;
|
||||
use sensitive_url::SensitiveUrl;
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
@@ -9,7 +10,7 @@ use types::{Graffiti, PublicKeyBytes};
|
||||
const DEFAULT_FREEZER_DB_DIR: &str = "freezer_db";
|
||||
|
||||
/// Defines how the client should initialize the `BeaconChain` and other components.
|
||||
#[derive(PartialEq, Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum ClientGenesis {
|
||||
/// Creates a genesis state as per the 2019 Canada interop specifications.
|
||||
Interop {
|
||||
@@ -26,6 +27,15 @@ pub enum ClientGenesis {
|
||||
/// We include the bytes instead of the `BeaconState<E>` because the `EthSpec` type
|
||||
/// parameter would be very annoying.
|
||||
SszBytes { genesis_state_bytes: Vec<u8> },
|
||||
WeakSubjSszBytes {
|
||||
genesis_state_bytes: Vec<u8>,
|
||||
anchor_state_bytes: Vec<u8>,
|
||||
anchor_block_bytes: Vec<u8>,
|
||||
},
|
||||
CheckpointSyncUrl {
|
||||
genesis_state_bytes: Vec<u8>,
|
||||
url: SensitiveUrl,
|
||||
},
|
||||
}
|
||||
|
||||
impl Default for ClientGenesis {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::metrics;
|
||||
use beacon_chain::{BeaconChain, BeaconChainTypes};
|
||||
use eth2_libp2p::NetworkGlobals;
|
||||
use eth2_libp2p::{types::SyncState, NetworkGlobals};
|
||||
use parking_lot::Mutex;
|
||||
use slog::{debug, error, info, warn, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
@@ -19,6 +19,9 @@ const MINUTES_PER_HOUR: i64 = 60;
|
||||
/// The number of historical observations that should be used to determine the average sync time.
|
||||
const SPEEDO_OBSERVATIONS: usize = 4;
|
||||
|
||||
/// The number of slots between logs that give detail about backfill process.
|
||||
const BACKFILL_LOG_INTERVAL: u64 = 5;
|
||||
|
||||
/// Spawns a notifier service which periodically logs information about the node.
|
||||
pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
executor: task_executor::TaskExecutor,
|
||||
@@ -42,6 +45,16 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
let log = executor.log().clone();
|
||||
let mut interval = tokio::time::interval_at(start_instant, interval_duration);
|
||||
|
||||
// Keep track of sync state and reset the speedo on specific sync state changes.
|
||||
// Specifically, if we switch between a sync and a backfill sync, reset the speedo.
|
||||
let mut current_sync_state = network.sync_state();
|
||||
|
||||
// Store info if we are required to do a backfill sync.
|
||||
let original_anchor_slot = beacon_chain
|
||||
.store
|
||||
.get_anchor_info()
|
||||
.map(|ai| ai.oldest_block_slot);
|
||||
|
||||
let interval_future = async move {
|
||||
// Perform pre-genesis logging.
|
||||
loop {
|
||||
@@ -63,11 +76,30 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
}
|
||||
|
||||
// Perform post-genesis logging.
|
||||
let mut last_backfill_log_slot = None;
|
||||
loop {
|
||||
interval.tick().await;
|
||||
let connected_peer_count = network.connected_peers();
|
||||
let sync_state = network.sync_state();
|
||||
|
||||
// Determine if we have switched syncing chains
|
||||
if sync_state != current_sync_state {
|
||||
match (current_sync_state, &sync_state) {
|
||||
(_, SyncState::BackFillSyncing { .. }) => {
|
||||
// We have transitioned to a backfill sync. Reset the speedo.
|
||||
let mut speedo = speedo.lock();
|
||||
speedo.clear();
|
||||
}
|
||||
(SyncState::BackFillSyncing { .. }, _) => {
|
||||
// We have transitioned from a backfill sync, reset the speedo
|
||||
let mut speedo = speedo.lock();
|
||||
speedo.clear();
|
||||
}
|
||||
(_, _) => {}
|
||||
}
|
||||
current_sync_state = sync_state;
|
||||
}
|
||||
|
||||
let head_info = match beacon_chain.head_info() {
|
||||
Ok(head_info) => head_info,
|
||||
Err(e) => {
|
||||
@@ -97,17 +129,46 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
let finalized_root = head_info.finalized_checkpoint.root;
|
||||
let head_root = head_info.block_root;
|
||||
|
||||
let mut speedo = speedo.lock();
|
||||
speedo.observe(head_slot, Instant::now());
|
||||
// The default is for regular sync but this gets modified if backfill sync is in
|
||||
// progress.
|
||||
let mut sync_distance = current_slot - head_slot;
|
||||
|
||||
let mut speedo = speedo.lock();
|
||||
match current_sync_state {
|
||||
SyncState::BackFillSyncing { .. } => {
|
||||
// Observe backfilling sync info.
|
||||
if let Some(oldest_slot) = original_anchor_slot {
|
||||
if let Some(current_anchor_slot) = beacon_chain
|
||||
.store
|
||||
.get_anchor_info()
|
||||
.map(|ai| ai.oldest_block_slot)
|
||||
{
|
||||
sync_distance = current_anchor_slot;
|
||||
speedo
|
||||
// For backfill sync use a fake slot which is the distance we've progressed from the starting `oldest_block_slot`.
|
||||
.observe(
|
||||
oldest_slot.saturating_sub(current_anchor_slot),
|
||||
Instant::now(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
SyncState::SyncingFinalized { .. }
|
||||
| SyncState::SyncingHead { .. }
|
||||
| SyncState::SyncTransition => {
|
||||
speedo.observe(head_slot, Instant::now());
|
||||
}
|
||||
SyncState::Stalled | SyncState::Synced => {}
|
||||
}
|
||||
|
||||
// NOTE: This is going to change based on which sync we are currently performing. A
|
||||
// backfill sync should process slots significantly faster than the other sync
|
||||
// processes.
|
||||
metrics::set_gauge(
|
||||
&metrics::SYNC_SLOTS_PER_SECOND,
|
||||
speedo.slots_per_second().unwrap_or(0_f64) as i64,
|
||||
);
|
||||
|
||||
// The next two lines take advantage of saturating subtraction on `Slot`.
|
||||
let head_distance = current_slot - head_slot;
|
||||
|
||||
if connected_peer_count <= WARN_PEER_COUNT {
|
||||
warn!(log, "Low peer count"; "peer_count" => peer_count_pretty(connected_peer_count));
|
||||
}
|
||||
@@ -121,16 +182,57 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
"head_block" => format!("{}", head_root),
|
||||
"head_slot" => head_slot,
|
||||
"current_slot" => current_slot,
|
||||
"sync_state" =>format!("{}", sync_state)
|
||||
"sync_state" =>format!("{}", current_sync_state)
|
||||
);
|
||||
|
||||
// Log if we are backfilling.
|
||||
let is_backfilling = matches!(current_sync_state, SyncState::BackFillSyncing { .. });
|
||||
if is_backfilling
|
||||
&& last_backfill_log_slot
|
||||
.map_or(true, |slot| slot + BACKFILL_LOG_INTERVAL <= current_slot)
|
||||
{
|
||||
last_backfill_log_slot = Some(current_slot);
|
||||
|
||||
let distance = format!(
|
||||
"{} slots ({})",
|
||||
sync_distance.as_u64(),
|
||||
slot_distance_pretty(sync_distance, slot_duration)
|
||||
);
|
||||
|
||||
let speed = speedo.slots_per_second();
|
||||
let display_speed = speed.map_or(false, |speed| speed != 0.0);
|
||||
|
||||
if display_speed {
|
||||
info!(
|
||||
log,
|
||||
"Downloading historical blocks";
|
||||
"distance" => distance,
|
||||
"speed" => sync_speed_pretty(speed),
|
||||
"est_time" => estimated_time_pretty(speedo.estimated_time_till_slot(original_anchor_slot.unwrap_or(current_slot))),
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
log,
|
||||
"Downloading historical blocks";
|
||||
"distance" => distance,
|
||||
"est_time" => estimated_time_pretty(speedo.estimated_time_till_slot(original_anchor_slot.unwrap_or(current_slot))),
|
||||
);
|
||||
}
|
||||
} else if !is_backfilling && last_backfill_log_slot.is_some() {
|
||||
last_backfill_log_slot = None;
|
||||
info!(
|
||||
log,
|
||||
"Historical block download complete";
|
||||
);
|
||||
}
|
||||
|
||||
// Log if we are syncing
|
||||
if sync_state.is_syncing() {
|
||||
if current_sync_state.is_syncing() {
|
||||
metrics::set_gauge(&metrics::IS_SYNCED, 0);
|
||||
let distance = format!(
|
||||
"{} slots ({})",
|
||||
head_distance.as_u64(),
|
||||
slot_distance_pretty(head_distance, slot_duration)
|
||||
sync_distance.as_u64(),
|
||||
slot_distance_pretty(sync_distance, slot_duration)
|
||||
);
|
||||
|
||||
let speed = speedo.slots_per_second();
|
||||
@@ -154,7 +256,7 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
"est_time" => estimated_time_pretty(speedo.estimated_time_till_slot(current_slot)),
|
||||
);
|
||||
}
|
||||
} else if sync_state.is_synced() {
|
||||
} else if current_sync_state.is_synced() {
|
||||
metrics::set_gauge(&metrics::IS_SYNCED, 1);
|
||||
let block_info = if current_slot > head_slot {
|
||||
" … empty".to_string()
|
||||
@@ -397,4 +499,9 @@ impl Speedo {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears all past observations to be used for an alternative sync (i.e backfill sync).
|
||||
pub fn clear(&mut self) {
|
||||
self.0.clear()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user