Implement checkpoint sync (#2244)

## Issue Addressed

Closes #1891
Closes #1784

## Proposed Changes

Implement checkpoint sync for Lighthouse, enabling it to start from a weak subjectivity checkpoint.

## Additional Info

- [x] Return unavailable status for out-of-range blocks requested by peers (#2561)
- [x] Implement sync daemon for fetching historical blocks (#2561)
- [x] Verify chain hashes (either in `historical_blocks.rs` or the calling module)
- [x] Consistency check for initial block + state
- [x] Fetch the initial state and block from a beacon node HTTP endpoint
- [x] Don't crash fetching beacon states by slot from the API
- [x] Background service for state reconstruction, triggered by CLI flag or API call.

Considered out of scope for this PR:

- Drop the requirement to provide the `--checkpoint-block` (this would require some pretty heavy refactoring of block verification)


Co-authored-by: Diva M <divma@protonmail.com>
This commit is contained in:
Michael Sproul
2021-09-22 00:37:28 +00:00
parent 280e4fe23d
commit 9667dc2f03
71 changed files with 4012 additions and 459 deletions

View File

@@ -2,7 +2,7 @@ use crate::beacon_processor::worker::FUTURE_SLOT_TOLERANCE;
use crate::service::NetworkMessage;
use crate::status::ToStatusMessage;
use crate::sync::SyncMessage;
use beacon_chain::{BeaconChainError, BeaconChainTypes, WhenSlotSkipped};
use beacon_chain::{BeaconChainError, BeaconChainTypes, HistoricalBlockError, WhenSlotSkipped};
use eth2_libp2p::rpc::StatusMessage;
use eth2_libp2p::rpc::*;
use eth2_libp2p::{PeerId, PeerRequestId, ReportSource, Response, SyncInfo};
@@ -38,6 +38,21 @@ impl<T: BeaconChainTypes> Worker<T> {
})
}
pub fn send_error_response(
&self,
peer_id: PeerId,
error: RPCResponseErrorCode,
reason: String,
id: PeerRequestId,
) {
self.send_network_message(NetworkMessage::SendErrorResponse {
peer_id,
error,
reason,
id,
})
}
/* Processing functions */
/// Process a `Status` message to determine if a peer is relevant to us. If the peer is
@@ -163,6 +178,20 @@ impl<T: BeaconChainTypes> Worker<T> {
.forwards_iter_block_roots(Slot::from(req.start_slot))
{
Ok(iter) => iter,
Err(BeaconChainError::HistoricalBlockError(
HistoricalBlockError::BlockOutOfRange {
slot,
oldest_block_slot,
},
)) => {
debug!(self.log, "Range request failed during backfill"; "requested_slot" => slot, "oldest_known_slot" => oldest_block_slot);
return self.send_error_response(
peer_id,
RPCResponseErrorCode::ResourceUnavailable,
"Backfilling".into(),
request_id,
);
}
Err(e) => return error!(self.log, "Unable to obtain root iter"; "error" => ?e),
};

View File

@@ -2,9 +2,11 @@ use super::{super::work_reprocessing_queue::ReprocessQueueMessage, Worker};
use crate::beacon_processor::worker::FUTURE_SLOT_TOLERANCE;
use crate::beacon_processor::BlockResultSender;
use crate::metrics;
use crate::sync::manager::SyncMessage;
use crate::sync::manager::{SyncMessage, SyncRequestType};
use crate::sync::{BatchProcessResult, ChainId};
use beacon_chain::{BeaconChainTypes, BlockError, ChainSegmentResult};
use beacon_chain::{
BeaconChainError, BeaconChainTypes, BlockError, ChainSegmentResult, HistoricalBlockError,
};
use eth2_libp2p::PeerId;
use slog::{crit, debug, error, info, trace, warn};
use tokio::sync::mpsc;
@@ -15,6 +17,8 @@ use types::{Epoch, Hash256, SignedBeaconBlock};
pub enum ProcessId {
/// Processing Id of a range syncing batch.
RangeBatchId(ChainId, Epoch),
/// Processing ID for a backfill syncing batch.
BackSyncBatchId(Epoch),
/// Processing Id of the parent lookup of a block.
ParentLookup(PeerId, Hash256),
}
@@ -99,11 +103,40 @@ impl<T: BeaconChainTypes> Worker<T> {
}
};
self.send_sync_message(SyncMessage::BatchProcessed {
chain_id,
epoch,
result,
});
let sync_type = SyncRequestType::RangeSync(epoch, chain_id);
self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result });
}
// this a request from the Backfill sync
ProcessId::BackSyncBatchId(epoch) => {
let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64());
let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64());
let sent_blocks = downloaded_blocks.len();
let result = match self.process_backfill_blocks(&downloaded_blocks) {
(_, Ok(_)) => {
debug!(self.log, "Backfill batch processed";
"batch_epoch" => epoch,
"first_block_slot" => start_slot,
"last_block_slot" => end_slot,
"processed_blocks" => sent_blocks,
"service"=> "sync");
BatchProcessResult::Success(sent_blocks > 0)
}
(_, Err(e)) => {
debug!(self.log, "Backfill batch processing failed";
"batch_epoch" => epoch,
"first_block_slot" => start_slot,
"last_block_slot" => end_slot,
"error" => e,
"service" => "sync");
BatchProcessResult::Failed(false)
}
};
let sync_type = SyncRequestType::BackFillSync(epoch);
self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result });
}
// this is a parent lookup request from the sync manager
ProcessId::ParentLookup(peer_id, chain_head) => {
@@ -160,6 +193,80 @@ impl<T: BeaconChainTypes> Worker<T> {
}
}
/// Helper function to process backfill block batches which only consumes the chain and blocks to process.
fn process_backfill_blocks(
&self,
blocks: &[SignedBeaconBlock<T::EthSpec>],
) -> (usize, Result<(), String>) {
match self.chain.import_historical_block_batch(blocks) {
Ok(imported_blocks) => {
metrics::inc_counter(
&metrics::BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_SUCCESS_TOTAL,
);
(imported_blocks, Ok(()))
}
Err(error) => {
metrics::inc_counter(
&metrics::BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_FAILED_TOTAL,
);
let err = match error {
// Handle the historical block errors specifically
BeaconChainError::HistoricalBlockError(e) => match e {
HistoricalBlockError::MismatchedBlockRoot {
block_root,
expected_block_root,
} => {
debug!(
self.log,
"Backfill batch processing error";
"error" => "mismatched_block_root",
"block_root" => ?block_root,
"expected_root" => ?expected_block_root
);
String::from("mismatched_block_root")
}
HistoricalBlockError::InvalidSignature
| HistoricalBlockError::SignatureSet(_) => {
warn!(
self.log,
"Backfill batch processing error";
"error" => ?e
);
"invalid_signature".into()
}
HistoricalBlockError::ValidatorPubkeyCacheTimeout => {
warn!(
self.log,
"Backfill batch processing error";
"error" => "pubkey_cache_timeout"
);
"pubkey_cache_timeout".into()
}
HistoricalBlockError::NoAnchorInfo => {
warn!(self.log, "Backfill not required");
String::from("no_anchor_info")
}
HistoricalBlockError::IndexOutOfBounds
| HistoricalBlockError::BlockOutOfRange { .. } => {
error!(
self.log,
"Backfill batch processing error";
"error" => ?e,
);
String::from("logic_error")
}
},
other => {
warn!(self.log, "Backfill batch processing error"; "error" => ?other);
format!("{:?}", other)
}
};
(0, Err(err))
}
}
}
/// Runs fork-choice on a given chain. This is used during block processing after one successful
/// block import.
fn run_fork_choice(&self) {