Global Sync access (#994)

* Connect sync logic to network globals

* Add further sync info to sync status

* Build new syncing HTTP API methods

* Fix bug in updating sync state

* Highest slot is current slot

* Update book for syncing API
This commit is contained in:
Age Manning
2020-04-14 18:17:35 +10:00
committed by GitHub
parent db7847c34a
commit e5874f4565
22 changed files with 818 additions and 399 deletions

View File

@@ -8,7 +8,7 @@ use eth2_libp2p::PeerId;
use rand::prelude::*;
use slog::{crit, debug, warn};
use std::collections::HashSet;
use std::sync::Weak;
use std::sync::Arc;
use tokio::sync::mpsc;
use types::{Hash256, SignedBeaconBlock, Slot};
@@ -82,7 +82,8 @@ pub struct SyncingChain<T: BeaconChainTypes> {
/// back once batch processing has completed.
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
chain: Weak<BeaconChain<T>>,
/// A reference to the underlying beacon chain.
chain: Arc<BeaconChain<T>>,
/// A reference to the sync logger.
log: slog::Logger,
@@ -103,7 +104,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
target_head_root: Hash256,
peer_id: PeerId,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
chain: Weak<BeaconChain<T>>,
chain: Arc<BeaconChain<T>>,
log: slog::Logger,
) -> Self {
let mut peer_pool = HashSet::new();
@@ -244,7 +245,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
let batch_id = ProcessId::RangeBatchId(batch.id.clone());
self.current_processing_batch = Some(batch);
spawn_block_processor(
self.chain.clone(),
Arc::downgrade(&self.chain.clone()),
batch_id,
downloaded_blocks,
self.sync_send.clone(),

View File

@@ -8,48 +8,138 @@ use crate::router::processor::PeerSyncInfo;
use crate::sync::manager::SyncMessage;
use crate::sync::network_context::SyncNetworkContext;
use beacon_chain::{BeaconChain, BeaconChainTypes};
use eth2_libp2p::PeerId;
use slog::{debug, error, warn};
use std::sync::Weak;
use eth2_libp2p::{types::SyncState, NetworkGlobals, PeerId};
use slog::{debug, error, info};
use std::sync::Arc;
use tokio::sync::mpsc;
use types::EthSpec;
use types::{Hash256, Slot};
/// The state of the long range/batch sync.
pub enum SyncState {
#[derive(Clone)]
pub enum RangeSyncState {
/// A finalized chain is being synced.
Finalized,
Finalized {
/// The start of the finalized chain.
start_slot: Slot,
/// The target head slot of the finalized chain.
head_slot: Slot,
/// The target head root of the finalized chain.
head_root: Hash256,
},
/// There are no finalized chains and we are syncing one more head chains.
Head,
Head {
/// The last finalized checkpoint for all head chains.
start_slot: Slot,
/// The largest known slot to sync to.
head_slot: Slot,
},
/// There are no head or finalized chains and no long range sync is in progress.
Idle,
}
impl PartialEq for RangeSyncState {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(RangeSyncState::Finalized { .. }, RangeSyncState::Finalized { .. }) => true,
(RangeSyncState::Head { .. }, RangeSyncState::Head { .. }) => true,
(RangeSyncState::Idle, RangeSyncState::Idle) => true,
_ => false,
}
}
}
impl Into<SyncState> for RangeSyncState {
fn into(self) -> SyncState {
match self {
RangeSyncState::Finalized {
start_slot,
head_slot,
head_root,
} => SyncState::SyncingFinalized {
start_slot,
head_slot,
head_root,
},
RangeSyncState::Head {
start_slot,
head_slot,
} => SyncState::SyncingHead {
start_slot,
head_slot,
},
RangeSyncState::Idle => SyncState::Stalled, // this should never really be used
}
}
}
/// A collection of finalized and head chains currently being processed.
pub struct ChainCollection<T: BeaconChainTypes> {
/// The beacon chain for processing.
beacon_chain: Weak<BeaconChain<T>>,
beacon_chain: Arc<BeaconChain<T>>,
/// A reference to the global network parameters.
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
/// The set of finalized chains being synced.
finalized_chains: Vec<SyncingChain<T>>,
/// The set of head chains being synced.
head_chains: Vec<SyncingChain<T>>,
/// The current sync state of the process.
sync_state: SyncState,
state: RangeSyncState,
/// Logger for the collection.
log: slog::Logger,
}
impl<T: BeaconChainTypes> ChainCollection<T> {
pub fn new(beacon_chain: Weak<BeaconChain<T>>) -> Self {
pub fn new(
beacon_chain: Arc<BeaconChain<T>>,
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
log: slog::Logger,
) -> Self {
ChainCollection {
sync_state: SyncState::Idle,
beacon_chain,
network_globals,
finalized_chains: Vec::new(),
head_chains: Vec::new(),
beacon_chain,
state: RangeSyncState::Idle,
log,
}
}
/// The current syncing state.
pub fn sync_state(&self) -> &SyncState {
&self.sync_state
pub fn state(&self) -> &RangeSyncState {
&self.state
}
/// Updates the global sync state and logs any changes.
fn update_sync_state(&mut self, state: RangeSyncState) {
// if there is no range sync occurring, the state is either synced or not based on
// connected peers.
self.state = state;
if self.state == RangeSyncState::Idle {
// there is no range sync, let the state of peers determine the global node sync state
let new_state = self
.network_globals
.peers
.read()
.synced_peers()
.next()
.map(|_| SyncState::Synced)
.unwrap_or_else(|| SyncState::Stalled);
let mut peer_state = self.network_globals.sync_state.write();
if new_state != *peer_state {
info!(self.log, "Sync state updated"; "old_state" => format!("{}",peer_state), "new_state" => format!("{}",new_state));
}
*peer_state = new_state;
} else {
// The state is based on a range sync state, update it
let mut node_sync_state = self.network_globals.sync_state.write();
let new_state: SyncState = self.state.clone().into();
if *node_sync_state != new_state {
// we are updating the state, inform the user
info!(self.log, "Sync state updated"; "old_state" => format!("{}",node_sync_state), "new_state" => format!("{}",new_state));
}
*node_sync_state = new_state;
}
}
/// A fully synced peer has joined.
@@ -57,9 +147,10 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
/// We could be awaiting a head sync. If we are in the head syncing state, without any head
/// chains, then update the state to idle.
pub fn fully_synced_peer_found(&mut self) {
if let SyncState::Head = self.sync_state {
if let RangeSyncState::Head { .. } = self.state {
if self.head_chains.is_empty() {
self.sync_state = SyncState::Idle;
// Update the global network state to either synced or stalled.
self.update_sync_state(RangeSyncState::Idle);
}
}
}
@@ -68,8 +159,19 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
/// `SyncState::Head` indicating we are awaiting new peers to connect before we can consider
/// the state as idle.
pub fn set_head_sync(&mut self) {
if let SyncState::Idle = self.sync_state {
self.sync_state = SyncState::Head;
if let RangeSyncState::Idle = self.state {
let current_slot = self
.beacon_chain
.head_info()
.map(|info| info.slot)
.unwrap_or_else(|_| Slot::from(0u64));
// NOTE: This will modify the /node/syncing API to show current slot for all fields
// while we update peers to look for new potentially HEAD chains.
let temp_head_state = RangeSyncState::Head {
start_slot: current_slot,
head_slot: current_slot,
};
self.update_sync_state(temp_head_state);
}
}
@@ -103,36 +205,26 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
///
/// This removes any out-dated chains, swaps to any higher priority finalized chains and
/// updates the state of the collection.
pub fn update_finalized(
&mut self,
network: &mut SyncNetworkContext<T::EthSpec>,
log: &slog::Logger,
) {
let local_slot = match self.beacon_chain.upgrade() {
Some(chain) => {
let local = match PeerSyncInfo::from_chain(&chain) {
Some(local) => local,
None => {
return error!(
log,
"Failed to get peer sync info";
"msg" => "likely due to head lock contention"
)
}
};
pub fn update_finalized(&mut self, network: &mut SyncNetworkContext<T::EthSpec>) {
let local_slot = {
let local = match PeerSyncInfo::from_chain(&self.beacon_chain) {
Some(local) => local,
None => {
return error!(
self.log,
"Failed to get peer sync info";
"msg" => "likely due to head lock contention"
)
}
};
local
.finalized_epoch
.start_slot(T::EthSpec::slots_per_epoch())
}
None => {
warn!(log, "Beacon chain dropped. Chains not updated");
return;
}
local
.finalized_epoch
.start_slot(T::EthSpec::slots_per_epoch())
};
// Remove any outdated finalized chains
self.purge_outdated_chains(network, log);
self.purge_outdated_chains(network);
// Check if any chains become the new syncing chain
if let Some(index) = self.finalized_syncing_index() {
@@ -149,13 +241,20 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
})
{
// A chain has more peers. Swap the syncing chain
debug!(log, "Switching finalized chains to sync"; "new_target_root" => format!("{}", chain.target_head_root), "new_end_slot" => chain.target_head_slot, "new_start_slot"=> chain.start_slot);
debug!(self.log, "Switching finalized chains to sync"; "new_target_root" => format!("{}", chain.target_head_root), "new_end_slot" => chain.target_head_slot, "new_start_slot"=> chain.start_slot);
// update the state to a new finalized state
let state = RangeSyncState::Finalized {
start_slot: chain.start_slot,
head_slot: chain.target_head_slot,
head_root: chain.target_head_root,
};
self.update_sync_state(state);
// Stop the current chain from syncing
self.finalized_chains[index].stop_syncing();
// Start the new chain
self.finalized_chains[new_index].start_syncing(network, local_slot);
self.sync_state = SyncState::Finalized;
}
} else if let Some(chain) = self
.finalized_chains
@@ -163,15 +262,36 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
.max_by_key(|chain| chain.peer_pool.len())
{
// There is no currently syncing finalization chain, starting the one with the most peers
debug!(log, "New finalized chain started syncing"; "new_target_root" => format!("{}", chain.target_head_root), "new_end_slot" => chain.target_head_slot, "new_start_slot"=> chain.start_slot);
debug!(self.log, "New finalized chain started syncing"; "new_target_root" => format!("{}", chain.target_head_root), "new_end_slot" => chain.target_head_slot, "new_start_slot"=> chain.start_slot);
chain.start_syncing(network, local_slot);
self.sync_state = SyncState::Finalized;
let state = RangeSyncState::Finalized {
start_slot: chain.start_slot,
head_slot: chain.target_head_slot,
head_root: chain.target_head_root,
};
self.update_sync_state(state);
} else {
// There are no finalized chains, update the state.
if self.head_chains.is_empty() {
self.sync_state = SyncState::Idle;
self.update_sync_state(RangeSyncState::Idle);
} else {
self.sync_state = SyncState::Head;
// for the syncing API, we find the minimal start_slot and the maximum
// target_slot of all head chains to report back.
let (min_slot, max_slot) = self.head_chains.iter().fold(
(Slot::from(0u64), Slot::from(0u64)),
|(min, max), chain| {
(
std::cmp::min(min, chain.start_slot),
std::cmp::max(max, chain.target_head_slot),
)
},
);
let head_state = RangeSyncState::Head {
start_slot: min_slot,
head_slot: max_slot,
};
self.update_sync_state(head_state);
}
}
}
@@ -184,7 +304,6 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
target_slot: Slot,
peer_id: PeerId,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
log: &slog::Logger,
) {
self.finalized_chains.push(SyncingChain::new(
local_finalized_slot,
@@ -193,7 +312,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
peer_id,
sync_send,
self.beacon_chain.clone(),
log.clone(),
self.log.clone(),
));
}
@@ -207,7 +326,6 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
target_slot: Slot,
peer_id: PeerId,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
log: &slog::Logger,
) {
// remove the peer from any other head chains
@@ -223,7 +341,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
peer_id,
sync_send,
self.beacon_chain.clone(),
log.clone(),
self.log.clone(),
);
// All head chains can sync simultaneously
new_head_chain.start_syncing(network, remote_finalized_slot);
@@ -281,29 +399,20 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
///
/// This removes chains with no peers, or chains whose start block slot is less than our current
/// finalized block slot.
pub fn purge_outdated_chains(
&mut self,
network: &mut SyncNetworkContext<T::EthSpec>,
log: &slog::Logger,
) {
pub fn purge_outdated_chains(&mut self, network: &mut SyncNetworkContext<T::EthSpec>) {
// Remove any chains that have no peers
self.finalized_chains
.retain(|chain| !chain.peer_pool.is_empty());
self.head_chains.retain(|chain| !chain.peer_pool.is_empty());
let (beacon_chain, local_info) = match self.beacon_chain.upgrade() {
Some(chain) => match PeerSyncInfo::from_chain(&chain) {
Some(local) => (chain, local),
None => {
return error!(
log,
"Failed to get peer sync info";
"msg" => "likely due to head lock contention"
)
}
},
let local_info = match PeerSyncInfo::from_chain(&self.beacon_chain) {
Some(local) => local,
None => {
return;
return error!(
self.log,
"Failed to get peer sync info";
"msg" => "likely due to head lock contention"
)
}
};
@@ -311,6 +420,8 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
.finalized_epoch
.start_slot(T::EthSpec::slots_per_epoch());
let beacon_chain = &self.beacon_chain;
let log_ref = &self.log;
// Remove chains that are out-dated and re-status their peers
self.finalized_chains.retain(|chain| {
if chain.target_head_slot <= local_finalized_slot
@@ -318,7 +429,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
.fork_choice
.contains_block(&chain.target_head_root)
{
debug!(log, "Purging out of finalized chain"; "start_slot" => chain.start_slot, "end_slot" => chain.target_head_slot);
debug!(log_ref, "Purging out of finalized chain"; "start_slot" => chain.start_slot, "end_slot" => chain.target_head_slot);
chain.status_peers(network);
false
} else {
@@ -331,7 +442,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
.fork_choice
.contains_block(&chain.target_head_root)
{
debug!(log, "Purging out of date head chain"; "start_slot" => chain.start_slot, "end_slot" => chain.target_head_slot);
debug!(log_ref, "Purging out of date head chain"; "start_slot" => chain.start_slot, "end_slot" => chain.target_head_slot);
chain.status_peers(network);
false
} else {
@@ -355,12 +466,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
/// finalized chain length, indicates a head chain.
///
/// This will re-status the chains peers on removal. The index must exist.
pub fn remove_chain(
&mut self,
network: &mut SyncNetworkContext<T::EthSpec>,
index: usize,
log: &slog::Logger,
) {
pub fn remove_chain(&mut self, network: &mut SyncNetworkContext<T::EthSpec>, index: usize) {
let chain = if index >= self.finalized_chains.len() {
let index = index - self.finalized_chains.len();
let chain = self.head_chains.swap_remove(index);
@@ -372,10 +478,10 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
chain
};
debug!(log, "Chain was removed"; "start_slot" => chain.start_slot.as_u64(), "end_slot" => chain.target_head_slot.as_u64());
debug!(self.log, "Chain was removed"; "start_slot" => chain.start_slot.as_u64(), "end_slot" => chain.target_head_slot.as_u64());
// update the state
self.update_finalized(network, log);
self.update_finalized(network);
}
/// Returns the index of finalized chain that is currently syncing. Returns `None` if no

View File

@@ -40,7 +40,7 @@
//! and further batches are requested as current blocks are being processed.
use super::chain::ProcessingResult;
use super::chain_collection::{ChainCollection, SyncState};
use super::chain_collection::{ChainCollection, RangeSyncState};
use super::BatchId;
use crate::router::processor::PeerSyncInfo;
use crate::sync::block_processor::BatchProcessResult;
@@ -48,10 +48,10 @@ use crate::sync::manager::SyncMessage;
use crate::sync::network_context::SyncNetworkContext;
use beacon_chain::{BeaconChain, BeaconChainTypes};
use eth2_libp2p::rpc::RequestId;
use eth2_libp2p::PeerId;
use slog::{debug, error, trace, warn};
use eth2_libp2p::{NetworkGlobals, PeerId};
use slog::{debug, error, trace};
use std::collections::HashSet;
use std::sync::Weak;
use std::sync::Arc;
use tokio::sync::mpsc;
use types::{EthSpec, SignedBeaconBlock};
@@ -60,7 +60,7 @@ use types::{EthSpec, SignedBeaconBlock};
/// holds the current state of the long range sync.
pub struct RangeSync<T: BeaconChainTypes> {
/// The beacon chain for processing.
beacon_chain: Weak<BeaconChain<T>>,
beacon_chain: Arc<BeaconChain<T>>,
/// A collection of chains that need to be downloaded. This stores any head or finalized chains
/// that need to be downloaded.
chains: ChainCollection<T>,
@@ -77,13 +77,14 @@ pub struct RangeSync<T: BeaconChainTypes> {
impl<T: BeaconChainTypes> RangeSync<T> {
pub fn new(
beacon_chain: Weak<BeaconChain<T>>,
beacon_chain: Arc<BeaconChain<T>>,
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
log: slog::Logger,
) -> Self {
RangeSync {
beacon_chain: beacon_chain.clone(),
chains: ChainCollection::new(beacon_chain),
chains: ChainCollection::new(beacon_chain, network_globals, log.clone()),
awaiting_head_peers: HashSet::new(),
sync_send,
log,
@@ -118,21 +119,14 @@ impl<T: BeaconChainTypes> RangeSync<T> {
// determine if we need to run a sync to the nearest finalized state or simply sync to
// its current head
let (chain, local_info) = match self.beacon_chain.upgrade() {
Some(chain) => match PeerSyncInfo::from_chain(&chain) {
Some(local) => (chain, local),
None => {
return error!(
self.log,
"Failed to get peer sync info";
"msg" => "likely due to head lock contention"
)
}
},
let local_info = match PeerSyncInfo::from_chain(&self.beacon_chain) {
Some(local) => local,
None => {
return warn!(self.log,
"Beacon chain dropped. Peer not considered for sync";
"peer_id" => format!("{:?}", peer_id));
return error!(
self.log,
"Failed to get peer sync info";
"msg" => "likely due to head lock contention"
)
}
};
@@ -148,10 +142,13 @@ impl<T: BeaconChainTypes> RangeSync<T> {
self.remove_peer(network, &peer_id);
// remove any out-of-date chains
self.chains.purge_outdated_chains(network, &self.log);
self.chains.purge_outdated_chains(network);
if remote_finalized_slot > local_info.head_slot
&& !chain.fork_choice.contains_block(&remote.finalized_root)
&& !self
.beacon_chain
.fork_choice
.contains_block(&remote.finalized_root)
{
debug!(self.log, "Finalization sync peer joined"; "peer_id" => format!("{:?}", peer_id));
// Finalized chain search
@@ -171,7 +168,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
chain.add_peer(network, peer_id);
// check if the new peer's addition will favour a new syncing chain.
self.chains.update_finalized(network, &self.log);
self.chains.update_finalized(network);
} else {
// there is no finalized chain that matches this peer's last finalized target
// create a new finalized chain
@@ -183,9 +180,8 @@ impl<T: BeaconChainTypes> RangeSync<T> {
remote_finalized_slot,
peer_id,
self.sync_send.clone(),
&self.log,
);
self.chains.update_finalized(network, &self.log);
self.chains.update_finalized(network);
}
} else {
if self.chains.is_finalizing_sync() {
@@ -216,10 +212,9 @@ impl<T: BeaconChainTypes> RangeSync<T> {
remote.head_slot,
peer_id,
self.sync_send.clone(),
&self.log,
);
}
self.chains.update_finalized(network, &self.log);
self.chains.update_finalized(network);
}
}
@@ -274,7 +269,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
chain.status_peers(network);
// update the state of the collection
self.chains.update_finalized(network, &self.log);
self.chains.update_finalized(network);
// set the state to a head sync, to inform the manager that we are awaiting a
// head chain.
@@ -282,13 +277,13 @@ impl<T: BeaconChainTypes> RangeSync<T> {
// if there are no more finalized chains, re-status all known peers awaiting a head
// sync
match self.chains.sync_state() {
SyncState::Idle | SyncState::Head => {
match self.chains.state() {
RangeSyncState::Idle | RangeSyncState::Head { .. } => {
for peer_id in self.awaiting_head_peers.drain() {
network.status_peer(self.beacon_chain.clone(), peer_id);
}
}
SyncState::Finalized => {} // Have more finalized chains to complete
RangeSyncState::Finalized { .. } => {} // Have more finalized chains to complete
}
}
Some((_, ProcessingResult::KeepChain)) => {}
@@ -308,7 +303,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
chain.status_peers(network);
// update the state of the collection
self.chains.update_finalized(network, &self.log);
self.chains.update_finalized(network);
}
Some((_, ProcessingResult::KeepChain)) => {}
None => {
@@ -321,15 +316,6 @@ impl<T: BeaconChainTypes> RangeSync<T> {
}
}
/// Public method to indicate the current state of the long range sync.
pub fn is_syncing(&self) -> bool {
match self.chains.sync_state() {
SyncState::Finalized => true,
SyncState::Head => true,
SyncState::Idle => false,
}
}
/// A peer has disconnected. This removes the peer from any ongoing chains and mappings. A
/// disconnected peer could remove a chain
pub fn peer_disconnect(
@@ -344,7 +330,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
self.remove_peer(network, peer_id);
// update the state of the collection
self.chains.update_finalized(network, &self.log);
self.chains.update_finalized(network);
}
/// When a peer gets removed, both the head and finalized chains need to be searched to check which pool the peer is in. The chain may also have a batch or batches awaiting
@@ -370,7 +356,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
{
// the chain needed to be removed
debug!(self.log, "Chain being removed due to failed batch");
self.chains.remove_chain(network, index, &self.log);
self.chains.remove_chain(network, index);
}
}
@@ -392,7 +378,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
Some((_, ProcessingResult::KeepChain)) => {} // error handled chain persists
Some((index, ProcessingResult::RemoveChain)) => {
debug!(self.log, "Chain being removed due to RPC error");
self.chains.remove_chain(network, index, &self.log)
self.chains.remove_chain(network, index)
}
None => {} // request wasn't in the finalized chains, check the head chains
}