mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-11 18:04:18 +00:00
Tweak head syncing (#1845)
## Issue Addressed Fixes head syncing ## Proposed Changes - Get back to statusing peers after removing chain segments and making the peer manager deal with status according to the Sync status, preventing an old known deadlock - Also a bug where a chain would get removed if the optimistic batch succeeds being empty ## Additional Info Tested on Medalla and looking good
This commit is contained in:
@@ -19,7 +19,7 @@
|
||||
//! need to be downloaded.
|
||||
//!
|
||||
//! A few interesting notes about finalized chain syncing:
|
||||
//! - Only one finalized chain can sync at a time.
|
||||
//! - Only one finalized chain can sync at a time
|
||||
//! - The finalized chain with the largest peer pool takes priority.
|
||||
//! - As one finalized chain completes, others are checked to see if we they can be continued,
|
||||
//! otherwise they are removed.
|
||||
@@ -39,7 +39,7 @@
|
||||
//! Each chain is downloaded in batches of blocks. The batched blocks are processed sequentially
|
||||
//! and further batches are requested as current blocks are being processed.
|
||||
|
||||
use super::chain::ChainId;
|
||||
use super::chain::{ChainId, RemoveChain, SyncingChain};
|
||||
use super::chain_collection::ChainCollection;
|
||||
use super::sync_type::RangeSyncType;
|
||||
use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
|
||||
@@ -49,7 +49,7 @@ use crate::sync::PeerSyncInfo;
|
||||
use crate::sync::RequestId;
|
||||
use beacon_chain::{BeaconChain, BeaconChainTypes};
|
||||
use eth2_libp2p::PeerId;
|
||||
use slog::{debug, error, trace};
|
||||
use slog::{crit, debug, error, trace};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::mpsc;
|
||||
@@ -121,7 +121,8 @@ impl<T: BeaconChainTypes> RangeSync<T> {
|
||||
.finalized_epoch
|
||||
.start_slot(T::EthSpec::slots_per_epoch());
|
||||
|
||||
// NOTE: A peer that has been re-status'd may now exist in multiple finalized chains.
|
||||
// NOTE: A peer that has been re-status'd may now exist in multiple finalized chains. This
|
||||
// is OK since we since only one finalized chain at a time.
|
||||
|
||||
// determine which kind of sync to perform and set up the chains
|
||||
match RangeSyncType::new(&self.beacon_chain, &local_info, &remote_info) {
|
||||
@@ -208,22 +209,22 @@ impl<T: BeaconChainTypes> RangeSync<T> {
|
||||
chain.on_block_response(network, batch_id, &peer_id, request_id, beacon_block)
|
||||
}) {
|
||||
Ok((removed_chain, sync_type)) => {
|
||||
if let Some(_removed_chain) = removed_chain {
|
||||
debug!(self.log, "Chain removed after block response"; "sync_type" => ?sync_type, "chain_id" => chain_id);
|
||||
// update the state of the collection
|
||||
self.chains.update(
|
||||
if let Some((removed_chain, remove_reason)) = removed_chain {
|
||||
self.on_chain_removed(
|
||||
removed_chain,
|
||||
sync_type,
|
||||
remove_reason,
|
||||
network,
|
||||
&mut self.awaiting_head_peers,
|
||||
&self.beacon_processor_send,
|
||||
"block response",
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
debug!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
debug!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
|
||||
trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -241,17 +242,18 @@ impl<T: BeaconChainTypes> RangeSync<T> {
|
||||
Ok((None, _sync_type)) => {
|
||||
// Chain was found and not removed
|
||||
}
|
||||
Ok((Some(_removed_chain), sync_type)) => {
|
||||
debug!(self.log, "Chain removed after processing result"; "chain" => chain_id, "sync_type" => ?sync_type);
|
||||
self.chains.update(
|
||||
Ok((Some((removed_chain, remove_reason)), sync_type)) => {
|
||||
self.on_chain_removed(
|
||||
removed_chain,
|
||||
sync_type,
|
||||
remove_reason,
|
||||
network,
|
||||
&mut self.awaiting_head_peers,
|
||||
&self.beacon_processor_send,
|
||||
"batch processing result",
|
||||
);
|
||||
}
|
||||
|
||||
Err(_) => {
|
||||
debug!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -279,14 +281,16 @@ impl<T: BeaconChainTypes> RangeSync<T> {
|
||||
.chains
|
||||
.call_all(|chain| chain.remove_peer(peer_id, network))
|
||||
{
|
||||
debug!(self.log, "Chain removed after removing peer"; "sync_type" => ?sync_type, "chain" => removed_chain.get_id(), "reason" => ?remove_reason);
|
||||
self.on_chain_removed(
|
||||
removed_chain,
|
||||
sync_type,
|
||||
remove_reason,
|
||||
network,
|
||||
"peer removed",
|
||||
);
|
||||
|
||||
// update the state of the collection
|
||||
}
|
||||
self.chains.update(
|
||||
network,
|
||||
&mut self.awaiting_head_peers,
|
||||
&self.beacon_processor_send,
|
||||
);
|
||||
}
|
||||
|
||||
/// An RPC error has occurred.
|
||||
@@ -307,21 +311,45 @@ impl<T: BeaconChainTypes> RangeSync<T> {
|
||||
}) {
|
||||
Ok((removed_chain, sync_type)) => {
|
||||
if let Some((removed_chain, remove_reason)) = removed_chain {
|
||||
debug!(self.log, "Chain removed on rpc error"; "sync_type" => ?sync_type, "chain" => removed_chain.get_id(), "reason" => ?remove_reason);
|
||||
// update the state of the collection
|
||||
self.chains.update(
|
||||
self.on_chain_removed(
|
||||
removed_chain,
|
||||
sync_type,
|
||||
remove_reason,
|
||||
network,
|
||||
&mut self.awaiting_head_peers,
|
||||
&self.beacon_processor_send,
|
||||
"RPC error",
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
debug!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
debug!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
|
||||
trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
|
||||
}
|
||||
}
|
||||
|
||||
fn on_chain_removed(
|
||||
&mut self,
|
||||
chain: SyncingChain<T>,
|
||||
sync_type: RangeSyncType,
|
||||
remove_reason: RemoveChain,
|
||||
network: &mut SyncNetworkContext<T::EthSpec>,
|
||||
op: &'static str,
|
||||
) {
|
||||
if remove_reason.is_critical() {
|
||||
crit!(self.log, "Chain removed"; "sync_type" => ?sync_type, &chain, "reason" => ?remove_reason, "op" => op);
|
||||
} else {
|
||||
debug!(self.log, "Chain removed"; "sync_type" => ?sync_type, &chain, "reason" => ?remove_reason, "op" => op);
|
||||
}
|
||||
|
||||
network.status_peers(self.beacon_chain.clone(), chain.peers());
|
||||
|
||||
// update the state of the collection
|
||||
self.chains.update(
|
||||
network,
|
||||
&mut self.awaiting_head_peers,
|
||||
&self.beacon_processor_send,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user