mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-15 19:02:42 +00:00
Integrate tracing (#6339)
Tracing Integration
- [reference](5bbf1859e9/projects/project-ideas.md (L297))
- [x] replace slog & log with tracing throughout the codebase
- [x] implement custom crit log
- [x] make relevant changes in the formatter
- [x] replace sloggers
- [x] re-write SSE logging components
cc: @macladson @eserilev
This commit is contained in:
@@ -3,6 +3,7 @@ use lighthouse_network::rpc::methods::BlocksByRangeRequest;
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
use lighthouse_network::PeerId;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::ops::Sub;
|
||||
use std::time::{Duration, Instant};
|
||||
@@ -61,6 +62,7 @@ pub trait BatchConfig {
|
||||
fn batch_attempt_hash<E: EthSpec>(blocks: &[RpcBlock<E>]) -> u64;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RangeSyncBatchConfig {}
|
||||
|
||||
impl BatchConfig for RangeSyncBatchConfig {
|
||||
@@ -93,6 +95,7 @@ pub enum BatchProcessingResult {
|
||||
NonFaultyFailure,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// A segment of a chain.
|
||||
pub struct BatchInfo<E: EthSpec, B: BatchConfig = RangeSyncBatchConfig> {
|
||||
/// Start slot of the batch.
|
||||
@@ -113,6 +116,17 @@ pub struct BatchInfo<E: EthSpec, B: BatchConfig = RangeSyncBatchConfig> {
|
||||
marker: std::marker::PhantomData<B>,
|
||||
}
|
||||
|
||||
impl<E: EthSpec, B: BatchConfig> fmt::Display for BatchInfo<E, B> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Start Slot: {}, End Slot: {}, State: {}",
|
||||
self.start_slot, self.end_slot, self.state
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Display)]
|
||||
/// Current state of a batch
|
||||
pub enum BatchState<E: EthSpec> {
|
||||
/// The batch has failed either downloading or processing, but can be requested again.
|
||||
@@ -190,15 +204,6 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
peers
|
||||
}
|
||||
|
||||
/// Return the number of times this batch has failed downloading and failed processing, in this
|
||||
/// order.
|
||||
pub fn failed_attempts(&self) -> (usize, usize) {
|
||||
(
|
||||
self.failed_download_attempts.len(),
|
||||
self.failed_processing_attempts.len(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Verifies if an incoming block belongs to this batch.
|
||||
pub fn is_expecting_block(&self, request_id: &Id) -> bool {
|
||||
if let BatchState::Downloading(_, expected_id) = &self.state {
|
||||
@@ -456,39 +461,6 @@ impl Attempt {
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: EthSpec, B: BatchConfig> slog::KV for &mut BatchInfo<E, B> {
|
||||
fn serialize(
|
||||
&self,
|
||||
record: &slog::Record,
|
||||
serializer: &mut dyn slog::Serializer,
|
||||
) -> slog::Result {
|
||||
slog::KV::serialize(*self, record, serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: EthSpec, B: BatchConfig> slog::KV for BatchInfo<E, B> {
|
||||
fn serialize(
|
||||
&self,
|
||||
record: &slog::Record,
|
||||
serializer: &mut dyn slog::Serializer,
|
||||
) -> slog::Result {
|
||||
use slog::Value;
|
||||
Value::serialize(&self.start_slot, record, "start_slot", serializer)?;
|
||||
Value::serialize(
|
||||
&(self.end_slot - 1), // NOTE: The -1 shows inclusive blocks
|
||||
record,
|
||||
"end_slot",
|
||||
serializer,
|
||||
)?;
|
||||
serializer.emit_usize("downloaded", self.failed_download_attempts.len())?;
|
||||
serializer.emit_usize("processed", self.failed_processing_attempts.len())?;
|
||||
serializer.emit_u8("processed_no_penalty", self.non_faulty_processing_attempts)?;
|
||||
serializer.emit_arguments("state", &format_args!("{:?}", self.state))?;
|
||||
serializer.emit_arguments("batch_ty", &format_args!("{}", self.batch_type))?;
|
||||
slog::Result::Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: EthSpec> std::fmt::Debug for BatchState<E> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
|
||||
@@ -9,11 +9,13 @@ use beacon_chain::BeaconChainTypes;
|
||||
use fnv::FnvHashMap;
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
use lighthouse_network::{PeerAction, PeerId};
|
||||
use logging::crit;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::Rng;
|
||||
use slog::{crit, debug, o, warn};
|
||||
use std::collections::{btree_map::Entry, BTreeMap, HashSet};
|
||||
use std::fmt;
|
||||
use strum::IntoStaticStr;
|
||||
use tracing::{debug, instrument, warn};
|
||||
use types::{Epoch, EthSpec, Hash256, Slot};
|
||||
|
||||
/// Blocks are downloaded in batches from peers. This constant specifies how many epochs worth of
|
||||
@@ -37,6 +39,7 @@ pub type ProcessingResult = Result<KeepChain, RemoveChain>;
|
||||
|
||||
/// Reasons for removing a chain
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub enum RemoveChain {
|
||||
EmptyPeerPool,
|
||||
ChainCompleted,
|
||||
@@ -66,6 +69,7 @@ pub enum SyncingChainType {
|
||||
/// A chain of blocks that need to be downloaded. Peers who claim to contain the target head
|
||||
/// root are grouped into the peer pool and queried for batches when downloading the
|
||||
/// chain.
|
||||
#[derive(Debug)]
|
||||
pub struct SyncingChain<T: BeaconChainTypes> {
|
||||
/// A random id used to identify this chain.
|
||||
id: ChainId,
|
||||
@@ -110,9 +114,16 @@ pub struct SyncingChain<T: BeaconChainTypes> {
|
||||
|
||||
/// The current processing batch, if any.
|
||||
current_processing_batch: Option<BatchId>,
|
||||
}
|
||||
|
||||
/// The chain's log.
|
||||
log: slog::Logger,
|
||||
impl<T: BeaconChainTypes> fmt::Display for SyncingChain<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.chain_type {
|
||||
SyncingChainType::Head => write!(f, "Head"),
|
||||
SyncingChainType::Finalized => write!(f, "Finalized"),
|
||||
SyncingChainType::Backfill => write!(f, "Backfill"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
@@ -132,7 +143,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
target_head_root: Hash256,
|
||||
peer_id: PeerId,
|
||||
chain_type: SyncingChainType,
|
||||
log: &slog::Logger,
|
||||
) -> Self {
|
||||
let mut peers = FnvHashMap::default();
|
||||
peers.insert(peer_id, Default::default());
|
||||
@@ -151,7 +161,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
attempted_optimistic_starts: HashSet::default(),
|
||||
state: ChainSyncingState::Stopped,
|
||||
current_processing_batch: None,
|
||||
log: log.new(o!("chain" => id)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,21 +170,25 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Check if the chain has peers from which to process batches.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn available_peers(&self) -> usize {
|
||||
self.peers.len()
|
||||
}
|
||||
|
||||
/// Get the chain's id.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn get_id(&self) -> ChainId {
|
||||
self.id
|
||||
}
|
||||
|
||||
/// Peers currently syncing this chain.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn peers(&self) -> impl Iterator<Item = PeerId> + '_ {
|
||||
self.peers.keys().cloned()
|
||||
}
|
||||
|
||||
/// Progress in epochs made by the chain
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn processed_epochs(&self) -> u64 {
|
||||
self.processing_target
|
||||
.saturating_sub(self.start_epoch)
|
||||
@@ -183,6 +196,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Returns the total count of pending blocks in all the batches of this chain
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn pending_blocks(&self) -> usize {
|
||||
self.batches
|
||||
.values()
|
||||
@@ -192,6 +206,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Removes a peer from the chain.
|
||||
/// If the peer has active batches, those are considered failed and re-requested.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn remove_peer(
|
||||
&mut self,
|
||||
peer_id: &PeerId,
|
||||
@@ -211,8 +226,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
self.retry_batch_download(network, id)?;
|
||||
} else {
|
||||
debug!(self.log, "Batch not found while removing peer";
|
||||
"peer" => %peer_id, "batch" => id)
|
||||
debug!(%peer_id, batch = ?id, "Batch not found while removing peer")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -225,6 +239,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Returns the latest slot number that has been processed.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn current_processed_slot(&self) -> Slot {
|
||||
// the last slot we processed was included in the previous batch, and corresponds to the
|
||||
// first slot of the current target epoch
|
||||
@@ -234,6 +249,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// A block has been received for a batch on this chain.
|
||||
/// If the block correctly completes the batch it will be processed if possible.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn on_block_response(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -245,7 +261,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// check if we have this batch
|
||||
let batch = match self.batches.get_mut(&batch_id) {
|
||||
None => {
|
||||
debug!(self.log, "Received a block for unknown batch"; "epoch" => batch_id);
|
||||
debug!(epoch = %batch_id, "Received a block for unknown batch");
|
||||
// A batch might get removed when the chain advances, so this is non fatal.
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
@@ -273,7 +289,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
let awaiting_batches = batch_id
|
||||
.saturating_sub(self.optimistic_start.unwrap_or(self.processing_target))
|
||||
/ EPOCHS_PER_BATCH;
|
||||
debug!(self.log, "Batch downloaded"; "epoch" => batch_id, "blocks" => received, "batch_state" => self.visualize_batch_state(), "awaiting_batches" => awaiting_batches);
|
||||
debug!(epoch = %batch_id, blocks = received, batch_state = self.visualize_batch_state(), %awaiting_batches,"Batch downloaded");
|
||||
|
||||
// pre-emptively request more blocks from peers whilst we process current blocks,
|
||||
self.request_batches(network)?;
|
||||
@@ -282,6 +298,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Processes the batch with the given id.
|
||||
/// The batch must exist and be ready for processing
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn process_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -317,8 +334,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.current_processing_batch = Some(batch_id);
|
||||
|
||||
if let Err(e) = beacon_processor.send_chain_segment(process_id, blocks) {
|
||||
crit!(self.log, "Failed to send chain segment to processor."; "msg" => "process_batch",
|
||||
"error" => %e, "batch" => self.processing_target);
|
||||
crit!(msg = "process_batch",error = %e, batch = ?self.processing_target, "Failed to send chain segment to processor.");
|
||||
// This is unlikely to happen but it would stall syncing since the batch now has no
|
||||
// blocks to continue, and the chain is expecting a processing result that won't
|
||||
// arrive. To mitigate this, (fake) fail this processing so that the batch is
|
||||
@@ -330,6 +346,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Processes the next ready batch, prioritizing optimistic batches over the processing target.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn process_completed_batches(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -349,7 +366,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
match state {
|
||||
BatchState::AwaitingProcessing(..) => {
|
||||
// this batch is ready
|
||||
debug!(self.log, "Processing optimistic start"; "epoch" => epoch);
|
||||
debug!(%epoch, "Processing optimistic start");
|
||||
return self.process_batch(network, epoch);
|
||||
}
|
||||
BatchState::Downloading(..) => {
|
||||
@@ -377,7 +394,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// batch has been requested and processed we can land here. We drop the
|
||||
// optimistic candidate since we can't conclude whether the batch included
|
||||
// blocks or not at this point
|
||||
debug!(self.log, "Dropping optimistic candidate"; "batch" => epoch);
|
||||
debug!(batch = %epoch, "Dropping optimistic candidate");
|
||||
self.optimistic_start = None;
|
||||
}
|
||||
}
|
||||
@@ -411,7 +428,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// inside the download buffer (between `self.processing_target` and
|
||||
// `self.to_be_downloaded`). In this case, eventually the chain advances to the
|
||||
// batch (`self.processing_target` reaches this point).
|
||||
debug!(self.log, "Chain encountered a robust batch awaiting validation"; "batch" => self.processing_target);
|
||||
debug!(
|
||||
batch = %self.processing_target,
|
||||
"Chain encountered a robust batch awaiting validation"
|
||||
);
|
||||
|
||||
self.processing_target += EPOCHS_PER_BATCH;
|
||||
if self.to_be_downloaded <= self.processing_target {
|
||||
@@ -436,6 +456,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// The block processor has completed processing a batch. This function handles the result
|
||||
/// of the batch processor.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn on_batch_process_result(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -447,13 +468,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
let batch_state = self.visualize_batch_state();
|
||||
let batch = match &self.current_processing_batch {
|
||||
Some(processing_id) if *processing_id != batch_id => {
|
||||
debug!(self.log, "Unexpected batch result";
|
||||
"batch_epoch" => batch_id, "expected_batch_epoch" => processing_id);
|
||||
debug!(batch_epoch = %batch_id, expected_batch_epoch = %processing_id,"Unexpected batch result");
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
None => {
|
||||
debug!(self.log, "Chain was not expecting a batch result";
|
||||
"batch_epoch" => batch_id);
|
||||
debug!(batch_epoch = %batch_id,"Chain was not expecting a batch result");
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
_ => {
|
||||
@@ -476,8 +495,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
})?;
|
||||
|
||||
// Log the process result and the batch for debugging purposes.
|
||||
debug!(self.log, "Batch processing result"; "result" => ?result, &batch,
|
||||
"batch_epoch" => batch_id, "client" => %network.client_type(&peer), "batch_state" => batch_state);
|
||||
debug!(
|
||||
result = ?result,
|
||||
batch_epoch = %batch_id,
|
||||
client = %network.client_type(&peer),
|
||||
batch_state = ?batch_state,
|
||||
?batch,
|
||||
"Batch processing result"
|
||||
);
|
||||
|
||||
// We consider three cases. Batch was successfully processed, Batch failed processing due
|
||||
// to a faulty peer, or batch failed processing but the peer can't be deemed faulty.
|
||||
@@ -563,10 +588,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// There are some edge cases with forks that could land us in this situation.
|
||||
// This should be unlikely, so we tolerate these errors, but not often.
|
||||
warn!(
|
||||
self.log,
|
||||
"Batch failed to download. Dropping chain scoring peers";
|
||||
"score_adjustment" => %penalty,
|
||||
"batch_epoch"=> batch_id,
|
||||
score_adjustment = %penalty,
|
||||
batch_epoch = %batch_id,
|
||||
"Batch failed to download. Dropping chain scoring peers"
|
||||
);
|
||||
|
||||
for (peer, _) in self.peers.drain() {
|
||||
@@ -587,6 +611,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn reject_optimistic_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -599,13 +624,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// it. NOTE: this is done to prevent non-sequential batches coming from optimistic
|
||||
// starts from filling up the buffer size
|
||||
if epoch < self.to_be_downloaded {
|
||||
debug!(self.log, "Rejected optimistic batch left for future use"; "epoch" => %epoch, "reason" => reason);
|
||||
debug!(%epoch, reason, "Rejected optimistic batch left for future use");
|
||||
// this batch is now treated as any other batch, and re-requested for future use
|
||||
if redownload {
|
||||
return self.retry_batch_download(network, epoch);
|
||||
}
|
||||
} else {
|
||||
debug!(self.log, "Rejected optimistic batch"; "epoch" => %epoch, "reason" => reason);
|
||||
debug!(%epoch, reason, "Rejected optimistic batch");
|
||||
self.batches.remove(&epoch);
|
||||
}
|
||||
}
|
||||
@@ -621,6 +646,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// If a previous batch has been validated and it had been re-processed, penalize the original
|
||||
/// peer.
|
||||
#[allow(clippy::modulo_one)]
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn advance_chain(&mut self, network: &mut SyncNetworkContext<T>, validating_epoch: Epoch) {
|
||||
// make sure this epoch produces an advancement
|
||||
if validating_epoch <= self.start_epoch {
|
||||
@@ -629,7 +655,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
// safety check for batch boundaries
|
||||
if validating_epoch % EPOCHS_PER_BATCH != self.start_epoch % EPOCHS_PER_BATCH {
|
||||
crit!(self.log, "Validating Epoch is not aligned");
|
||||
crit!("Validating Epoch is not aligned");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -651,9 +677,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// A different peer sent the correct batch, the previous peer did not
|
||||
// We negatively score the original peer.
|
||||
let action = PeerAction::LowToleranceError;
|
||||
debug!(self.log, "Re-processed batch validated. Scoring original peer";
|
||||
"batch_epoch" => id, "score_adjustment" => %action,
|
||||
"original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id
|
||||
debug!(
|
||||
batch_epoch = %id, score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id, new_peer = %processed_attempt.peer_id,
|
||||
"Re-processed batch validated. Scoring original peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
@@ -664,9 +691,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// The same peer corrected it's previous mistake. There was an error, so we
|
||||
// negative score the original peer.
|
||||
let action = PeerAction::MidToleranceError;
|
||||
debug!(self.log, "Re-processed batch validated by the same peer";
|
||||
"batch_epoch" => id, "score_adjustment" => %action,
|
||||
"original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id
|
||||
debug!(
|
||||
batch_epoch = %id,
|
||||
score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id,
|
||||
new_peer = %processed_attempt.peer_id,
|
||||
"Re-processed batch validated by the same peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
@@ -683,13 +713,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
active_batches.remove(&id);
|
||||
}
|
||||
}
|
||||
BatchState::Failed | BatchState::Poisoned | BatchState::AwaitingDownload => crit!(
|
||||
self.log,
|
||||
"batch indicates inconsistent chain state while advancing chain"
|
||||
),
|
||||
BatchState::Failed | BatchState::Poisoned | BatchState::AwaitingDownload => {
|
||||
crit!("batch indicates inconsistent chain state while advancing chain")
|
||||
}
|
||||
BatchState::AwaitingProcessing(..) => {}
|
||||
BatchState::Processing(_) => {
|
||||
debug!(self.log, "Advancing chain while processing a batch"; "batch" => id, batch);
|
||||
debug!(batch = %id, %batch, "Advancing chain while processing a batch");
|
||||
if let Some(processing_id) = self.current_processing_batch {
|
||||
if id <= processing_id {
|
||||
self.current_processing_batch = None;
|
||||
@@ -713,8 +742,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.optimistic_start = None;
|
||||
}
|
||||
}
|
||||
debug!(self.log, "Chain advanced"; "previous_start" => old_start,
|
||||
"new_start" => self.start_epoch, "processing_target" => self.processing_target);
|
||||
debug!(
|
||||
previous_start = %old_start,
|
||||
new_start = %self.start_epoch,
|
||||
processing_target = %self.processing_target,
|
||||
"Chain advanced"
|
||||
);
|
||||
}
|
||||
|
||||
/// An invalid batch has been received that could not be processed, but that can be retried.
|
||||
@@ -722,6 +755,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// These events occur when a peer has successfully responded with blocks, but the blocks we
|
||||
/// have received are incorrect or invalid. This indicates the peer has not performed as
|
||||
/// intended and can result in downvoting a peer.
|
||||
#[instrument(parent = None,level = "info", fields(service = self.id, network), skip_all)]
|
||||
fn handle_invalid_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -781,6 +815,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// This chain has been requested to start syncing.
|
||||
///
|
||||
/// This could be new chain, or an old chain that is being resumed.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn start_syncing(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -819,6 +854,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// Add a peer to the chain.
|
||||
///
|
||||
/// If the chain is active, this starts requesting batches from this peer.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn add_peer(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -836,6 +872,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// An RPC error has occurred.
|
||||
///
|
||||
/// If the batch exists it is re-requested.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn inject_error(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -852,24 +889,21 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// columns.
|
||||
if !batch.is_expecting_block(&request_id) {
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch not expecting block";
|
||||
"batch_epoch" => batch_id,
|
||||
"batch_state" => ?batch.state(),
|
||||
"peer_id" => %peer_id,
|
||||
"request_id" => %request_id,
|
||||
"batch_state" => batch_state
|
||||
batch_epoch = %batch_id,
|
||||
batch_state = ?batch.state(),
|
||||
%peer_id,
|
||||
%request_id,
|
||||
?batch_state,
|
||||
"Batch not expecting block"
|
||||
);
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch failed. RPC Error";
|
||||
"batch_epoch" => batch_id,
|
||||
"batch_state" => ?batch.state(),
|
||||
"peer_id" => %peer_id,
|
||||
"request_id" => %request_id,
|
||||
"batch_state" => batch_state
|
||||
batch_epoch = %batch_id,
|
||||
batch_state = ?batch.state(),
|
||||
%peer_id,
|
||||
%request_id,
|
||||
"Batch failed. RPC Error"
|
||||
);
|
||||
if let Some(active_requests) = self.peers.get_mut(peer_id) {
|
||||
active_requests.remove(&batch_id);
|
||||
@@ -883,12 +917,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.retry_batch_download(network, batch_id)
|
||||
} else {
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch not found";
|
||||
"batch_epoch" => batch_id,
|
||||
"peer_id" => %peer_id,
|
||||
"request_id" => %request_id,
|
||||
"batch_state" => batch_state
|
||||
batch_epoch = %batch_id,
|
||||
%peer_id,
|
||||
%request_id,
|
||||
batch_state,
|
||||
"Batch not found"
|
||||
);
|
||||
// this could be an error for an old batch, removed when the chain advances
|
||||
Ok(KeepChain)
|
||||
@@ -896,6 +929,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Sends and registers the request of a batch awaiting download.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn retry_batch_download(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -932,6 +966,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Requests the batch assigned to the given id from a given peer.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn send_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -958,9 +993,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
.map(|epoch| epoch == batch_id)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
debug!(self.log, "Requesting optimistic batch"; "epoch" => batch_id, &batch, "batch_state" => batch_state);
|
||||
debug!(epoch = %batch_id, %batch, %batch_state, "Requesting optimistic batch");
|
||||
} else {
|
||||
debug!(self.log, "Requesting batch"; "epoch" => batch_id, &batch, "batch_state" => batch_state);
|
||||
debug!(epoch = %batch_id, %batch, %batch_state, "Requesting batch");
|
||||
}
|
||||
// register the batch for this peer
|
||||
return self
|
||||
@@ -979,8 +1014,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
Err(e) => {
|
||||
// NOTE: under normal conditions this shouldn't happen but we handle it anyway
|
||||
warn!(self.log, "Could not send batch request";
|
||||
"batch_id" => batch_id, "error" => ?e, &batch);
|
||||
warn!(%batch_id, error = %e, %batch, "Could not send batch request");
|
||||
// register the failed download and check if the batch can be retried
|
||||
batch.start_downloading_from_peer(peer, 1)?; // fake request_id is not relevant
|
||||
self.peers
|
||||
@@ -1005,6 +1039,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Returns true if this chain is currently syncing.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn is_syncing(&self) -> bool {
|
||||
match self.state {
|
||||
ChainSyncingState::Syncing => true,
|
||||
@@ -1014,6 +1049,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Kickstarts the chain by sending for processing batches that are ready and requesting more
|
||||
/// batches if needed.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn resume(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -1026,6 +1062,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Attempts to request the next required batches from the peer pool if the chain is syncing. It will exhaust the peer
|
||||
/// pool and left over batches until the batch buffer is reached or all peers are exhausted.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn request_batches(&mut self, network: &mut SyncNetworkContext<T>) -> ProcessingResult {
|
||||
if !matches!(self.state, ChainSyncingState::Syncing) {
|
||||
return Ok(KeepChain);
|
||||
@@ -1052,10 +1089,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// We wait for this batch before requesting any other batches.
|
||||
if let Some(epoch) = self.optimistic_start {
|
||||
if !self.good_peers_on_sampling_subnets(epoch, network) {
|
||||
debug!(
|
||||
self.log,
|
||||
"Waiting for peers to be available on sampling column subnets"
|
||||
);
|
||||
debug!("Waiting for peers to be available on sampling column subnets");
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
|
||||
@@ -1114,6 +1148,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Creates the next required batch from the chain. If there are no more batches required,
|
||||
/// `false` is returned.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn include_next_batch(&mut self, network: &mut SyncNetworkContext<T>) -> Option<BatchId> {
|
||||
// don't request batches beyond the target head slot
|
||||
if self
|
||||
@@ -1147,10 +1182,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// block and data column requests are currently coupled. This can be removed once we find a
|
||||
// way to decouple the requests and do retries individually, see issue #6258.
|
||||
if !self.good_peers_on_sampling_subnets(self.to_be_downloaded, network) {
|
||||
debug!(
|
||||
self.log,
|
||||
"Waiting for peers to be available on custody column subnets"
|
||||
);
|
||||
debug!("Waiting for peers to be available on custody column subnets");
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -1177,6 +1209,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// This produces a string of the form: [D,E,E,E,E]
|
||||
/// to indicate the current buffer state of the chain. The symbols are defined on each of the
|
||||
/// batch states. See [BatchState::visualize] for symbol definitions.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn visualize_batch_state(&self) -> String {
|
||||
let mut visualization_string = String::with_capacity((BATCH_BUFFER_SIZE * 3) as usize);
|
||||
|
||||
@@ -1212,45 +1245,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> slog::KV for &mut SyncingChain<T> {
|
||||
fn serialize(
|
||||
&self,
|
||||
record: &slog::Record,
|
||||
serializer: &mut dyn slog::Serializer,
|
||||
) -> slog::Result {
|
||||
slog::KV::serialize(*self, record, serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> slog::KV for SyncingChain<T> {
|
||||
fn serialize(
|
||||
&self,
|
||||
record: &slog::Record,
|
||||
serializer: &mut dyn slog::Serializer,
|
||||
) -> slog::Result {
|
||||
use slog::Value;
|
||||
serializer.emit_u32("id", self.id)?;
|
||||
Value::serialize(&self.start_epoch, record, "from", serializer)?;
|
||||
Value::serialize(
|
||||
&self.target_head_slot.epoch(T::EthSpec::slots_per_epoch()),
|
||||
record,
|
||||
"to",
|
||||
serializer,
|
||||
)?;
|
||||
serializer.emit_arguments("end_root", &format_args!("{}", self.target_head_root))?;
|
||||
Value::serialize(
|
||||
&self.processing_target,
|
||||
record,
|
||||
"current_target",
|
||||
serializer,
|
||||
)?;
|
||||
serializer.emit_usize("batches", self.batches.len())?;
|
||||
serializer.emit_usize("peers", self.peers.len())?;
|
||||
serializer.emit_arguments("state", &format_args!("{:?}", self.state))?;
|
||||
slog::Result::Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
use super::batch::WrongState as WrongBatchState;
|
||||
impl From<WrongBatchState> for RemoveChain {
|
||||
fn from(err: WrongBatchState) -> Self {
|
||||
|
||||
@@ -12,11 +12,12 @@ use fnv::FnvHashMap;
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
use lighthouse_network::PeerId;
|
||||
use lighthouse_network::SyncInfo;
|
||||
use slog::{crit, debug, error};
|
||||
use logging::crit;
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, error};
|
||||
use types::EthSpec;
|
||||
use types::{Epoch, Hash256, Slot};
|
||||
|
||||
@@ -50,18 +51,15 @@ pub struct ChainCollection<T: BeaconChainTypes> {
|
||||
head_chains: FnvHashMap<ChainId, SyncingChain<T>>,
|
||||
/// The current sync state of the process.
|
||||
state: RangeSyncState,
|
||||
/// Logger for the collection.
|
||||
log: slog::Logger,
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>, log: slog::Logger) -> Self {
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>) -> Self {
|
||||
ChainCollection {
|
||||
beacon_chain,
|
||||
finalized_chains: FnvHashMap::default(),
|
||||
head_chains: FnvHashMap::default(),
|
||||
state: RangeSyncState::Idle,
|
||||
log,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -295,9 +293,8 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
.expect("Chain exists");
|
||||
|
||||
match old_id {
|
||||
Some(Some(old_id)) => debug!(self.log, "Switching finalized chains";
|
||||
"old_id" => old_id, &chain),
|
||||
None => debug!(self.log, "Syncing new finalized chain"; &chain),
|
||||
Some(Some(old_id)) => debug!(old_id, %chain, "Switching finalized chains"),
|
||||
None => debug!(%chain, "Syncing new finalized chain"),
|
||||
Some(None) => {
|
||||
// this is the same chain. We try to advance it.
|
||||
}
|
||||
@@ -309,10 +306,10 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
if let Err(remove_reason) = chain.start_syncing(network, local_epoch, local_head_epoch)
|
||||
{
|
||||
if remove_reason.is_critical() {
|
||||
crit!(self.log, "Chain removed while switching chains"; "chain" => new_id, "reason" => ?remove_reason);
|
||||
crit!(chain = new_id, reason = ?remove_reason, "Chain removed while switching chains");
|
||||
} else {
|
||||
// this happens only if sending a batch over the `network` fails a lot
|
||||
error!(self.log, "Chain removed while switching chains"; "chain" => new_id, "reason" => ?remove_reason);
|
||||
error!(chain = new_id, reason = ?remove_reason, "Chain removed while switching chains");
|
||||
}
|
||||
self.finalized_chains.remove(&new_id);
|
||||
self.on_chain_removed(&new_id, true, RangeSyncType::Finalized);
|
||||
@@ -330,7 +327,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
) {
|
||||
// Include the awaiting head peers
|
||||
for (peer_id, peer_sync_info) in awaiting_head_peers.drain() {
|
||||
debug!(self.log, "including head peer");
|
||||
debug!("including head peer");
|
||||
self.add_peer_or_create_chain(
|
||||
local_epoch,
|
||||
peer_sync_info.head_root,
|
||||
@@ -362,16 +359,16 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
if syncing_chains.len() < PARALLEL_HEAD_CHAINS {
|
||||
// start this chain if it's not already syncing
|
||||
if !chain.is_syncing() {
|
||||
debug!(self.log, "New head chain started syncing"; &chain);
|
||||
debug!(%chain, "New head chain started syncing");
|
||||
}
|
||||
if let Err(remove_reason) =
|
||||
chain.start_syncing(network, local_epoch, local_head_epoch)
|
||||
{
|
||||
self.head_chains.remove(&id);
|
||||
if remove_reason.is_critical() {
|
||||
crit!(self.log, "Chain removed while switching head chains"; "chain" => id, "reason" => ?remove_reason);
|
||||
crit!(chain = id, reason = ?remove_reason, "Chain removed while switching head chains");
|
||||
} else {
|
||||
error!(self.log, "Chain removed while switching head chains"; "chain" => id, "reason" => ?remove_reason);
|
||||
error!(chain = id, reason = ?remove_reason, "Chain removed while switching head chains");
|
||||
}
|
||||
} else {
|
||||
syncing_chains.push(id);
|
||||
@@ -407,7 +404,6 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
.start_slot(T::EthSpec::slots_per_epoch());
|
||||
|
||||
let beacon_chain = &self.beacon_chain;
|
||||
let log_ref = &self.log;
|
||||
|
||||
let is_outdated = |target_slot: &Slot, target_root: &Hash256| {
|
||||
target_slot <= &local_finalized_slot
|
||||
@@ -425,7 +421,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
if is_outdated(&chain.target_head_slot, &chain.target_head_root)
|
||||
|| chain.available_peers() == 0
|
||||
{
|
||||
debug!(log_ref, "Purging out of finalized chain"; &chain);
|
||||
debug!(%chain, "Purging out of finalized chain");
|
||||
Some((*id, chain.is_syncing(), RangeSyncType::Finalized))
|
||||
} else {
|
||||
None
|
||||
@@ -436,7 +432,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
if is_outdated(&chain.target_head_slot, &chain.target_head_root)
|
||||
|| chain.available_peers() == 0
|
||||
{
|
||||
debug!(log_ref, "Purging out of date head chain"; &chain);
|
||||
debug!(%chain, "Purging out of date head chain");
|
||||
Some((*id, chain.is_syncing(), RangeSyncType::Head))
|
||||
} else {
|
||||
None
|
||||
@@ -477,14 +473,14 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
.find(|(_, chain)| chain.has_same_target(target_head_slot, target_head_root))
|
||||
{
|
||||
Some((&id, chain)) => {
|
||||
debug!(self.log, "Adding peer to known chain"; "peer_id" => %peer, "sync_type" => ?sync_type, "id" => id);
|
||||
debug!(peer_id = %peer, ?sync_type, id, "Adding peer to known chain");
|
||||
debug_assert_eq!(chain.target_head_root, target_head_root);
|
||||
debug_assert_eq!(chain.target_head_slot, target_head_slot);
|
||||
if let Err(remove_reason) = chain.add_peer(network, peer) {
|
||||
if remove_reason.is_critical() {
|
||||
crit!(self.log, "Chain removed after adding peer"; "chain" => id, "reason" => ?remove_reason);
|
||||
crit!(chain = %id, reason = ?remove_reason, "Chain removed after adding peer");
|
||||
} else {
|
||||
error!(self.log, "Chain removed after adding peer"; "chain" => id, "reason" => ?remove_reason);
|
||||
error!(chain = %id, reason = ?remove_reason, "Chain removed after adding peer");
|
||||
}
|
||||
let is_syncing = chain.is_syncing();
|
||||
collection.remove(&id);
|
||||
@@ -501,9 +497,9 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
target_head_root,
|
||||
peer,
|
||||
sync_type.into(),
|
||||
&self.log,
|
||||
);
|
||||
debug!(self.log, "New chain added to sync"; "peer_id" => peer_rpr, "sync_type" => ?sync_type, &new_chain);
|
||||
|
||||
debug!(peer_id = peer_rpr, ?sync_type, %new_chain, "New chain added to sync");
|
||||
collection.insert(id, new_chain);
|
||||
metrics::inc_counter_vec(&metrics::SYNCING_CHAINS_ADDED, &[sync_type.as_str()]);
|
||||
self.update_metrics();
|
||||
|
||||
@@ -51,10 +51,11 @@ use beacon_chain::{BeaconChain, BeaconChainTypes};
|
||||
use lighthouse_network::rpc::GoodbyeReason;
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
use lighthouse_network::{PeerId, SyncInfo};
|
||||
use logging::crit;
|
||||
use lru_cache::LRUTimeCache;
|
||||
use slog::{crit, debug, trace, warn};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, instrument, trace, warn};
|
||||
use types::{Epoch, EthSpec, Hash256};
|
||||
|
||||
/// For how long we store failed finalized chains to prevent retries.
|
||||
@@ -74,23 +75,26 @@ pub struct RangeSync<T: BeaconChainTypes> {
|
||||
chains: ChainCollection<T>,
|
||||
/// Chains that have failed and are stored to prevent being retried.
|
||||
failed_chains: LRUTimeCache<Hash256>,
|
||||
/// The syncing logger.
|
||||
log: slog::Logger,
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> RangeSync<T>
|
||||
where
|
||||
T: BeaconChainTypes,
|
||||
{
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>, log: slog::Logger) -> Self {
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>) -> Self {
|
||||
RangeSync {
|
||||
beacon_chain: beacon_chain.clone(),
|
||||
chains: ChainCollection::new(beacon_chain, log.clone()),
|
||||
chains: ChainCollection::new(beacon_chain),
|
||||
failed_chains: LRUTimeCache::new(std::time::Duration::from_secs(
|
||||
FAILED_CHAINS_EXPIRY_SECONDS,
|
||||
)),
|
||||
awaiting_head_peers: HashMap::new(),
|
||||
log,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,6 +103,12 @@ where
|
||||
self.failed_chains.keys().copied().collect()
|
||||
}
|
||||
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn state(&self) -> SyncChainStatus {
|
||||
self.chains.state()
|
||||
}
|
||||
@@ -108,6 +118,12 @@ where
|
||||
/// may need to be synced as a result. A new peer, may increase the peer pool of a finalized
|
||||
/// chain, this may result in a different finalized chain from syncing as finalized chains are
|
||||
/// prioritised by peer-pool size.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn add_peer(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -133,14 +149,13 @@ where
|
||||
RangeSyncType::Finalized => {
|
||||
// Make sure we have not recently tried this chain
|
||||
if self.failed_chains.contains(&remote_info.finalized_root) {
|
||||
debug!(self.log, "Disconnecting peer that belongs to previously failed chain";
|
||||
"failed_root" => %remote_info.finalized_root, "peer_id" => %peer_id);
|
||||
debug!(failed_root = ?remote_info.finalized_root, %peer_id,"Disconnecting peer that belongs to previously failed chain");
|
||||
network.goodbye_peer(peer_id, GoodbyeReason::IrrelevantNetwork);
|
||||
return;
|
||||
}
|
||||
|
||||
// Finalized chain search
|
||||
debug!(self.log, "Finalization sync peer joined"; "peer_id" => %peer_id);
|
||||
debug!(%peer_id, "Finalization sync peer joined");
|
||||
self.awaiting_head_peers.remove(&peer_id);
|
||||
|
||||
// Because of our change in finalized sync batch size from 2 to 1 and our transition
|
||||
@@ -171,8 +186,7 @@ where
|
||||
if self.chains.is_finalizing_sync() {
|
||||
// If there are finalized chains to sync, finish these first, before syncing head
|
||||
// chains.
|
||||
trace!(self.log, "Waiting for finalized sync to complete";
|
||||
"peer_id" => %peer_id, "awaiting_head_peers" => &self.awaiting_head_peers.len());
|
||||
trace!(%peer_id, awaiting_head_peers = &self.awaiting_head_peers.len(),"Waiting for finalized sync to complete");
|
||||
self.awaiting_head_peers.insert(peer_id, remote_info);
|
||||
return;
|
||||
}
|
||||
@@ -204,6 +218,12 @@ where
|
||||
///
|
||||
/// This function finds the chain that made this request. Once found, processes the result.
|
||||
/// This request could complete a chain or simply add to its progress.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn blocks_by_range_response(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -229,11 +249,17 @@ where
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
trace!(%chain_id, "BlocksByRange response for removed chain")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn handle_block_process_result(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -259,13 +285,19 @@ where
|
||||
}
|
||||
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
trace!(%chain_id, "BlocksByRange response for removed chain")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A peer has disconnected. This removes the peer from any ongoing chains and mappings. A
|
||||
/// disconnected peer could remove a chain
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn peer_disconnect(&mut self, network: &mut SyncNetworkContext<T>, peer_id: &PeerId) {
|
||||
// if the peer is in the awaiting head mapping, remove it
|
||||
self.awaiting_head_peers.remove(peer_id);
|
||||
@@ -278,6 +310,12 @@ where
|
||||
/// which pool the peer is in. The chain may also have a batch or batches awaiting
|
||||
/// for this peer. If so we mark the batch as failed. The batch may then hit it's maximum
|
||||
/// retries. In this case, we need to remove the chain.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn remove_peer(&mut self, network: &mut SyncNetworkContext<T>, peer_id: &PeerId) {
|
||||
for (removed_chain, sync_type, remove_reason) in self
|
||||
.chains
|
||||
@@ -297,6 +335,12 @@ where
|
||||
///
|
||||
/// Check to see if the request corresponds to a pending batch. If so, re-request it if possible, if there have
|
||||
/// been too many failed attempts for the batch, remove the chain.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn inject_error(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -321,11 +365,17 @@ where
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
trace!(self.log, "BlocksByRange response for removed chain"; "chain" => chain_id)
|
||||
trace!(%chain_id, "BlocksByRange response for removed chain")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn on_chain_removed(
|
||||
&mut self,
|
||||
chain: SyncingChain<T>,
|
||||
@@ -335,14 +385,18 @@ where
|
||||
op: &'static str,
|
||||
) {
|
||||
if remove_reason.is_critical() {
|
||||
crit!(self.log, "Chain removed"; "sync_type" => ?sync_type, &chain, "reason" => ?remove_reason, "op" => op);
|
||||
crit!(?sync_type, %chain, reason = ?remove_reason,op, "Chain removed");
|
||||
} else {
|
||||
debug!(self.log, "Chain removed"; "sync_type" => ?sync_type, &chain, "reason" => ?remove_reason, "op" => op);
|
||||
debug!(?sync_type, %chain, reason = ?remove_reason,op, "Chain removed");
|
||||
}
|
||||
|
||||
if let RemoveChain::ChainFailed { blacklist, .. } = remove_reason {
|
||||
if RangeSyncType::Finalized == sync_type && blacklist {
|
||||
warn!(self.log, "Chain failed! Syncing to its head won't be retried for at least the next {} seconds", FAILED_CHAINS_EXPIRY_SECONDS; &chain);
|
||||
warn!(
|
||||
%chain,
|
||||
"Chain failed! Syncing to its head won't be retried for at least the next {} seconds",
|
||||
FAILED_CHAINS_EXPIRY_SECONDS
|
||||
);
|
||||
self.failed_chains.insert(chain.target_head_root);
|
||||
}
|
||||
}
|
||||
@@ -369,6 +423,12 @@ where
|
||||
}
|
||||
|
||||
/// Kickstarts sync.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn resume(&mut self, network: &mut SyncNetworkContext<T>) {
|
||||
for (removed_chain, sync_type, remove_reason) in
|
||||
self.chains.call_all(|chain| chain.resume(network))
|
||||
|
||||
Reference in New Issue
Block a user