mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-16 11:22:56 +00:00
Implement checkpoint sync (#2244)
## Issue Addressed Closes #1891 Closes #1784 ## Proposed Changes Implement checkpoint sync for Lighthouse, enabling it to start from a weak subjectivity checkpoint. ## Additional Info - [x] Return unavailable status for out-of-range blocks requested by peers (#2561) - [x] Implement sync daemon for fetching historical blocks (#2561) - [x] Verify chain hashes (either in `historical_blocks.rs` or the calling module) - [x] Consistency check for initial block + state - [x] Fetch the initial state and block from a beacon node HTTP endpoint - [x] Don't crash fetching beacon states by slot from the API - [x] Background service for state reconstruction, triggered by CLI flag or API call. Considered out of scope for this PR: - Drop the requirement to provide the `--checkpoint-block` (this would require some pretty heavy refactoring of block verification) Co-authored-by: Diva M <divma@protonmail.com>
This commit is contained in:
@@ -522,7 +522,7 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
}
|
||||
|
||||
/// Inform the peer that their request produced an error.
|
||||
pub fn _send_error_reponse(
|
||||
pub fn send_error_reponse(
|
||||
&mut self,
|
||||
peer_id: PeerId,
|
||||
id: PeerRequestId,
|
||||
|
||||
@@ -578,8 +578,17 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
RPCResponseErrorCode::Unknown => PeerAction::HighToleranceError,
|
||||
RPCResponseErrorCode::ResourceUnavailable => {
|
||||
// NOTE: This error only makes sense for the `BlocksByRange` and `BlocksByRoot`
|
||||
// protocols. For the time being, there is no reason why a peer should send
|
||||
// this error.
|
||||
// protocols.
|
||||
//
|
||||
// If we are syncing, there is no point keeping these peers around and
|
||||
// continually failing to request blocks. We instantly ban them and hope that
|
||||
// by the time the ban lifts, the peers will have completed their backfill
|
||||
// sync.
|
||||
//
|
||||
// TODO: Potentially a more graceful way of handling such peers, would be to
|
||||
// implement a new sync type which tracks these peers and prevents the sync
|
||||
// algorithms from requesting blocks from them (at least for a set period of
|
||||
// time, multiple failures would then lead to a ban).
|
||||
PeerAction::Fatal
|
||||
}
|
||||
RPCResponseErrorCode::ServerError => PeerAction::MidToleranceError,
|
||||
|
||||
@@ -211,16 +211,13 @@ mod tests {
|
||||
let _ = snappy_buf.split_to(1);
|
||||
|
||||
// decode message just as snappy message
|
||||
let snappy_decoded_message = snappy_outbound_codec.decode(&mut snappy_buf).unwrap();
|
||||
let _snappy_decoded_message = snappy_outbound_codec.decode(&mut snappy_buf).unwrap();
|
||||
|
||||
// build codecs for entire chunk
|
||||
let mut snappy_base_outbound_codec = BaseOutboundCodec::new(snappy_outbound_codec);
|
||||
|
||||
// decode message as ssz snappy chunk
|
||||
let snappy_decoded_chunk = snappy_base_outbound_codec.decode(&mut buf).unwrap();
|
||||
|
||||
dbg!(snappy_decoded_message);
|
||||
dbg!(snappy_decoded_chunk);
|
||||
let _snappy_decoded_chunk = snappy_base_outbound_codec.decode(&mut buf).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -275,7 +275,7 @@ impl<TSpec: EthSpec> Service<TSpec> {
|
||||
) {
|
||||
self.swarm
|
||||
.behaviour_mut()
|
||||
._send_error_reponse(peer_id, id, error, reason);
|
||||
.send_error_reponse(peer_id, id, error, reason);
|
||||
}
|
||||
|
||||
/// Report a peer's action.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! A collection of variables that are accessible outside of the network thread itself.
|
||||
use crate::peer_manager::PeerDB;
|
||||
use crate::rpc::MetaData;
|
||||
use crate::types::SyncState;
|
||||
use crate::types::{BackFillState, SyncState};
|
||||
use crate::Client;
|
||||
use crate::EnrExt;
|
||||
use crate::{Enr, GossipTopic, Multiaddr, PeerId};
|
||||
@@ -29,6 +29,8 @@ pub struct NetworkGlobals<TSpec: EthSpec> {
|
||||
pub gossipsub_subscriptions: RwLock<HashSet<GossipTopic>>,
|
||||
/// The current sync status of the node.
|
||||
pub sync_state: RwLock<SyncState>,
|
||||
/// The current state of the backfill sync.
|
||||
pub backfill_state: RwLock<BackFillState>,
|
||||
}
|
||||
|
||||
impl<TSpec: EthSpec> NetworkGlobals<TSpec> {
|
||||
@@ -50,6 +52,7 @@ impl<TSpec: EthSpec> NetworkGlobals<TSpec> {
|
||||
peers: RwLock::new(PeerDB::new(trusted_peers, log)),
|
||||
gossipsub_subscriptions: RwLock::new(HashSet::new()),
|
||||
sync_state: RwLock::new(SyncState::Stalled),
|
||||
backfill_state: RwLock::new(BackFillState::NotRequired),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,6 +107,11 @@ impl<TSpec: EthSpec> NetworkGlobals<TSpec> {
|
||||
self.sync_state.read().clone()
|
||||
}
|
||||
|
||||
/// Returns the current backfill state.
|
||||
pub fn backfill_state(&self) -> BackFillState {
|
||||
self.backfill_state.read().clone()
|
||||
}
|
||||
|
||||
/// Returns a `Client` type if one is known for the `PeerId`.
|
||||
pub fn client(&self, peer_id: &PeerId) -> Client {
|
||||
self.peers
|
||||
|
||||
@@ -15,5 +15,5 @@ pub type Enr = discv5::enr::Enr<discv5::enr::CombinedKey>;
|
||||
pub use globals::NetworkGlobals;
|
||||
pub use pubsub::{PubsubMessage, SnappyTransform};
|
||||
pub use subnet::{Subnet, SubnetDiscovery};
|
||||
pub use sync_state::SyncState;
|
||||
pub use sync_state::{BackFillState, SyncState};
|
||||
pub use topics::{subnet_from_topic_hash, GossipEncoding, GossipKind, GossipTopic, CORE_TOPICS};
|
||||
|
||||
@@ -10,8 +10,13 @@ pub enum SyncState {
|
||||
/// The node is performing a long-range (batch) sync over one or many head chains.
|
||||
/// In this state parent lookups are disabled.
|
||||
SyncingHead { start_slot: Slot, target_slot: Slot },
|
||||
/// The node has identified the need for is sync operations and is transitioning to a syncing
|
||||
/// state.
|
||||
/// The node is undertaking a backfill sync. This occurs when a user has specified a trusted
|
||||
/// state. The node first syncs "forward" by downloading blocks up to the current head as
|
||||
/// specified by its peers. Once completed, the node enters this sync state and attempts to
|
||||
/// download all required historical blocks to complete its chain.
|
||||
BackFillSyncing { completed: usize, remaining: usize },
|
||||
/// The node has completed syncing a finalized chain and is in the process of re-evaluating
|
||||
/// which sync state to progress to.
|
||||
SyncTransition,
|
||||
/// The node is up to date with all known peers and is connected to at least one
|
||||
/// fully synced peer. In this state, parent lookups are enabled.
|
||||
@@ -21,6 +26,21 @@ pub enum SyncState {
|
||||
Stalled,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug, Clone, Serialize, Deserialize)]
|
||||
/// The state of the backfill sync.
|
||||
pub enum BackFillState {
|
||||
/// The sync is partially completed and currently paused.
|
||||
Paused,
|
||||
/// We are currently backfilling.
|
||||
Syncing,
|
||||
/// A backfill sync has completed.
|
||||
Completed,
|
||||
/// A backfill sync is not required.
|
||||
NotRequired,
|
||||
/// Too many failed attempts at backfilling. Consider it failed.
|
||||
Failed,
|
||||
}
|
||||
|
||||
impl PartialEq for SyncState {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
matches!(
|
||||
@@ -32,6 +52,10 @@ impl PartialEq for SyncState {
|
||||
| (SyncState::Synced, SyncState::Synced)
|
||||
| (SyncState::Stalled, SyncState::Stalled)
|
||||
| (SyncState::SyncTransition, SyncState::SyncTransition)
|
||||
| (
|
||||
SyncState::BackFillSyncing { .. },
|
||||
SyncState::BackFillSyncing { .. }
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -43,14 +67,18 @@ impl SyncState {
|
||||
SyncState::SyncingFinalized { .. } => true,
|
||||
SyncState::SyncingHead { .. } => true,
|
||||
SyncState::SyncTransition => true,
|
||||
// Backfill doesn't effect any logic, we consider this state, not syncing.
|
||||
SyncState::BackFillSyncing { .. } => false,
|
||||
SyncState::Synced => false,
|
||||
SyncState::Stalled => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the node is synced.
|
||||
///
|
||||
/// NOTE: We consider the node synced if it is fetching old historical blocks.
|
||||
pub fn is_synced(&self) -> bool {
|
||||
matches!(self, SyncState::Synced)
|
||||
matches!(self, SyncState::Synced | SyncState::BackFillSyncing { .. })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,7 +89,8 @@ impl std::fmt::Display for SyncState {
|
||||
SyncState::SyncingHead { .. } => write!(f, "Syncing Head Chain"),
|
||||
SyncState::Synced { .. } => write!(f, "Synced"),
|
||||
SyncState::Stalled { .. } => write!(f, "Stalled"),
|
||||
SyncState::SyncTransition => write!(f, "Searching syncing peers"),
|
||||
SyncState::SyncTransition => write!(f, "Evaluating known peers"),
|
||||
SyncState::BackFillSyncing { .. } => write!(f, "Syncing Historical Blocks"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user