mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-15 10:52:43 +00:00
Implement checkpoint sync (#2244)
## Issue Addressed Closes #1891 Closes #1784 ## Proposed Changes Implement checkpoint sync for Lighthouse, enabling it to start from a weak subjectivity checkpoint. ## Additional Info - [x] Return unavailable status for out-of-range blocks requested by peers (#2561) - [x] Implement sync daemon for fetching historical blocks (#2561) - [x] Verify chain hashes (either in `historical_blocks.rs` or the calling module) - [x] Consistency check for initial block + state - [x] Fetch the initial state and block from a beacon node HTTP endpoint - [x] Don't crash fetching beacon states by slot from the API - [x] Background service for state reconstruction, triggered by CLI flag or API call. Considered out of scope for this PR: - Drop the requirement to provide the `--checkpoint-block` (this would require some pretty heavy refactoring of block verification) Co-authored-by: Diva M <divma@protonmail.com>
This commit is contained in:
@@ -14,15 +14,34 @@ const MAX_BATCH_DOWNLOAD_ATTEMPTS: u8 = 5;
|
||||
/// after `MAX_BATCH_PROCESSING_ATTEMPTS` times, it is considered faulty.
|
||||
const MAX_BATCH_PROCESSING_ATTEMPTS: u8 = 3;
|
||||
|
||||
/// Allows customisation of the above constants used in other sync methods such as BackFillSync.
|
||||
pub trait BatchConfig {
|
||||
/// The maximum batch download attempts.
|
||||
fn max_batch_download_attempts() -> u8;
|
||||
/// The max batch processing attempts.
|
||||
fn max_batch_processing_attempts() -> u8;
|
||||
}
|
||||
|
||||
pub struct RangeSyncBatchConfig {}
|
||||
|
||||
impl BatchConfig for RangeSyncBatchConfig {
|
||||
fn max_batch_download_attempts() -> u8 {
|
||||
MAX_BATCH_DOWNLOAD_ATTEMPTS
|
||||
}
|
||||
fn max_batch_processing_attempts() -> u8 {
|
||||
MAX_BATCH_PROCESSING_ATTEMPTS
|
||||
}
|
||||
}
|
||||
|
||||
/// Error type of a batch in a wrong state.
|
||||
// Such errors should never be encountered.
|
||||
pub struct WrongState(pub(super) String);
|
||||
pub struct WrongState(pub(crate) String);
|
||||
|
||||
/// Auxiliary type alias for readability.
|
||||
type IsFailed = bool;
|
||||
|
||||
/// A segment of a chain.
|
||||
pub struct BatchInfo<T: EthSpec> {
|
||||
pub struct BatchInfo<T: EthSpec, B: BatchConfig = RangeSyncBatchConfig> {
|
||||
/// Start slot of the batch.
|
||||
start_slot: Slot,
|
||||
/// End slot of the batch.
|
||||
@@ -33,6 +52,8 @@ pub struct BatchInfo<T: EthSpec> {
|
||||
failed_download_attempts: Vec<PeerId>,
|
||||
/// State of the batch.
|
||||
state: BatchState<T>,
|
||||
/// Pin the generic
|
||||
marker: std::marker::PhantomData<B>,
|
||||
}
|
||||
|
||||
/// Current state of a batch
|
||||
@@ -73,7 +94,7 @@ impl<T: EthSpec> BatchState<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: EthSpec> BatchInfo<T> {
|
||||
impl<T: EthSpec, B: BatchConfig> BatchInfo<T, B> {
|
||||
/// Batches are downloaded excluding the first block of the epoch assuming it has already been
|
||||
/// downloaded.
|
||||
///
|
||||
@@ -91,6 +112,7 @@ impl<T: EthSpec> BatchInfo<T> {
|
||||
failed_processing_attempts: Vec::new(),
|
||||
failed_download_attempts: Vec::new(),
|
||||
state: BatchState::AwaitingDownload,
|
||||
marker: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,6 +142,7 @@ impl<T: EthSpec> BatchInfo<T> {
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns the peer that is currently responsible for progressing the state of the batch.
|
||||
pub fn current_peer(&self) -> Option<&PeerId> {
|
||||
match &self.state {
|
||||
BatchState::AwaitingDownload | BatchState::Failed => None,
|
||||
@@ -131,6 +154,7 @@ impl<T: EthSpec> BatchInfo<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a BlocksByRange request associated with the batch.
|
||||
pub fn to_blocks_by_range_request(&self) -> BlocksByRangeRequest {
|
||||
BlocksByRangeRequest {
|
||||
start_slot: self.start_slot.into(),
|
||||
@@ -192,7 +216,7 @@ impl<T: EthSpec> BatchInfo<T> {
|
||||
// can be tried again
|
||||
self.failed_download_attempts.push(peer);
|
||||
self.state = if self.failed_download_attempts.len()
|
||||
>= MAX_BATCH_DOWNLOAD_ATTEMPTS as usize
|
||||
>= B::max_batch_download_attempts() as usize
|
||||
{
|
||||
BatchState::Failed
|
||||
} else {
|
||||
@@ -219,14 +243,21 @@ impl<T: EthSpec> BatchInfo<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark the batch as failed and return whether we can attempt a re-download.
|
||||
///
|
||||
/// This can happen if a peer disconnects or some error occurred that was not the peers fault.
|
||||
/// THe `mark_failed` parameter, when set to false, does not increment the failed attempts of
|
||||
/// this batch and register the peer, rather attempts a re-download.
|
||||
#[must_use = "Batch may have failed"]
|
||||
pub fn download_failed(&mut self) -> Result<IsFailed, WrongState> {
|
||||
pub fn download_failed(&mut self, mark_failed: bool) -> Result<IsFailed, WrongState> {
|
||||
match self.state.poison() {
|
||||
BatchState::Downloading(peer, _, _request_id) => {
|
||||
// register the attempt and check if the batch can be tried again
|
||||
self.failed_download_attempts.push(peer);
|
||||
if mark_failed {
|
||||
self.failed_download_attempts.push(peer);
|
||||
}
|
||||
self.state = if self.failed_download_attempts.len()
|
||||
>= MAX_BATCH_DOWNLOAD_ATTEMPTS as usize
|
||||
>= B::max_batch_download_attempts as usize
|
||||
{
|
||||
BatchState::Failed
|
||||
} else {
|
||||
@@ -294,7 +325,7 @@ impl<T: EthSpec> BatchInfo<T> {
|
||||
|
||||
// check if the batch can be downloaded again
|
||||
if self.failed_processing_attempts.len()
|
||||
>= MAX_BATCH_PROCESSING_ATTEMPTS as usize
|
||||
>= B::max_batch_processing_attempts() as usize
|
||||
{
|
||||
BatchState::Failed
|
||||
} else {
|
||||
@@ -324,7 +355,7 @@ impl<T: EthSpec> BatchInfo<T> {
|
||||
|
||||
// check if the batch can be downloaded again
|
||||
self.state = if self.failed_processing_attempts.len()
|
||||
>= MAX_BATCH_PROCESSING_ATTEMPTS as usize
|
||||
>= B::max_batch_processing_attempts() as usize
|
||||
{
|
||||
BatchState::Failed
|
||||
} else {
|
||||
@@ -365,7 +396,7 @@ impl Attempt {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: EthSpec> slog::KV for &mut BatchInfo<T> {
|
||||
impl<T: EthSpec, B: BatchConfig> slog::KV for &mut BatchInfo<T, B> {
|
||||
fn serialize(
|
||||
&self,
|
||||
record: &slog::Record,
|
||||
@@ -375,7 +406,7 @@ impl<T: EthSpec> slog::KV for &mut BatchInfo<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: EthSpec> slog::KV for BatchInfo<T> {
|
||||
impl<T: EthSpec, B: BatchConfig> slog::KV for BatchInfo<T, B> {
|
||||
fn serialize(
|
||||
&self,
|
||||
record: &slog::Record,
|
||||
|
||||
@@ -181,7 +181,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// fail the batches
|
||||
for id in batch_ids {
|
||||
if let Some(batch) = self.batches.get_mut(&id) {
|
||||
if batch.download_failed()? {
|
||||
if batch.download_failed(true)? {
|
||||
return Err(RemoveChain::ChainFailed(id));
|
||||
}
|
||||
self.retry_batch_download(network, id)?;
|
||||
@@ -273,7 +273,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sends to process the batch with the given id.
|
||||
/// Processes the batch with the given id.
|
||||
/// The batch must exist and be ready for processing
|
||||
fn process_batch(
|
||||
&mut self,
|
||||
@@ -794,7 +794,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
if let Some(active_requests) = self.peers.get_mut(peer_id) {
|
||||
active_requests.remove(&batch_id);
|
||||
}
|
||||
if batch.download_failed()? {
|
||||
if batch.download_failed(true)? {
|
||||
return Err(RemoveChain::ChainFailed(batch_id));
|
||||
}
|
||||
self.retry_batch_download(network, batch_id)
|
||||
@@ -837,7 +837,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Requests the batch asigned to the given id from a given peer.
|
||||
/// Requests the batch assigned to the given id from a given peer.
|
||||
pub fn send_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T::EthSpec>,
|
||||
@@ -883,7 +883,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.peers
|
||||
.get_mut(&peer)
|
||||
.map(|request| request.remove(&batch_id));
|
||||
if batch.download_failed()? {
|
||||
if batch.download_failed(true)? {
|
||||
return Err(RemoveChain::ChainFailed(batch_id));
|
||||
} else {
|
||||
return self.retry_batch_download(network, batch_id);
|
||||
@@ -990,7 +990,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// this batch could have been included already being an optimistic batch
|
||||
match self.batches.entry(batch_id) {
|
||||
Entry::Occupied(_) => {
|
||||
// this batch doesn't need downlading, let this same function decide the next batch
|
||||
// this batch doesn't need downloading, let this same function decide the next batch
|
||||
self.to_be_downloaded += EPOCHS_PER_BATCH;
|
||||
self.include_next_batch()
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ mod chain_collection;
|
||||
mod range;
|
||||
mod sync_type;
|
||||
|
||||
pub use batch::BatchInfo;
|
||||
pub use batch::{BatchConfig, BatchInfo, BatchState};
|
||||
pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
|
||||
pub use range::RangeSync;
|
||||
pub use sync_type::RangeSyncType;
|
||||
|
||||
@@ -39,7 +39,7 @@
|
||||
//! Each chain is downloaded in batches of blocks. The batched blocks are processed sequentially
|
||||
//! and further batches are requested as current blocks are being processed.
|
||||
|
||||
use super::chain::{ChainId, RemoveChain, SyncingChain};
|
||||
use super::chain::{BatchId, ChainId, RemoveChain, SyncingChain};
|
||||
use super::chain_collection::ChainCollection;
|
||||
use super::sync_type::RangeSyncType;
|
||||
use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
|
||||
@@ -194,34 +194,29 @@ impl<T: BeaconChainTypes> RangeSync<T> {
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T::EthSpec>,
|
||||
peer_id: PeerId,
|
||||
chain_id: ChainId,
|
||||
batch_id: BatchId,
|
||||
request_id: RequestId,
|
||||
beacon_block: Option<SignedBeaconBlock<T::EthSpec>>,
|
||||
) {
|
||||
// get the chain and batch for which this response belongs
|
||||
if let Some((chain_id, batch_id)) =
|
||||
network.blocks_by_range_response(request_id, beacon_block.is_none())
|
||||
{
|
||||
// check if this chunk removes the chain
|
||||
match self.chains.call_by_id(chain_id, |chain| {
|
||||
chain.on_block_response(network, batch_id, &peer_id, request_id, beacon_block)
|
||||
}) {
|
||||
Ok((removed_chain, sync_type)) => {
|
||||
if let Some((removed_chain, remove_reason)) = removed_chain {
|
||||
self.on_chain_removed(
|
||||
removed_chain,
|
||||
sync_type,
|
||||
remove_reason,
|
||||
network,
|
||||
"block response",
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
// check if this chunk removes the chain
|
||||
match self.chains.call_by_id(chain_id, |chain| {
|
||||
chain.on_block_response(network, batch_id, &peer_id, request_id, beacon_block)
|
||||
}) {
|
||||
Ok((removed_chain, sync_type)) => {
|
||||
if let Some((removed_chain, remove_reason)) = removed_chain {
|
||||
self.on_chain_removed(
|
||||
removed_chain,
|
||||
sync_type,
|
||||
remove_reason,
|
||||
network,
|
||||
"block response",
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -298,31 +293,28 @@ impl<T: BeaconChainTypes> RangeSync<T> {
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T::EthSpec>,
|
||||
peer_id: PeerId,
|
||||
batch_id: BatchId,
|
||||
chain_id: ChainId,
|
||||
request_id: RequestId,
|
||||
) {
|
||||
// get the chain and batch for which this response belongs
|
||||
if let Some((chain_id, batch_id)) = network.blocks_by_range_response(request_id, true) {
|
||||
// check that this request is pending
|
||||
match self.chains.call_by_id(chain_id, |chain| {
|
||||
chain.inject_error(network, batch_id, &peer_id, request_id)
|
||||
}) {
|
||||
Ok((removed_chain, sync_type)) => {
|
||||
if let Some((removed_chain, remove_reason)) = removed_chain {
|
||||
self.on_chain_removed(
|
||||
removed_chain,
|
||||
sync_type,
|
||||
remove_reason,
|
||||
network,
|
||||
"RPC error",
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
// check that this request is pending
|
||||
match self.chains.call_by_id(chain_id, |chain| {
|
||||
chain.inject_error(network, batch_id, &peer_id, request_id)
|
||||
}) {
|
||||
Ok((removed_chain, sync_type)) => {
|
||||
if let Some((removed_chain, remove_reason)) = removed_chain {
|
||||
self.on_chain_removed(
|
||||
removed_chain,
|
||||
sync_type,
|
||||
remove_reason,
|
||||
network,
|
||||
"RPC error",
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user