mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-06 18:21:45 +00:00
Integrate tracing (#6339)
Tracing Integration
- [reference](5bbf1859e9/projects/project-ideas.md (L297))
- [x] replace slog & log with tracing throughout the codebase
- [x] implement custom crit log
- [x] make relevant changes in the formatter
- [x] replace sloggers
- [x] re-write SSE logging components
cc: @macladson @eserilev
This commit is contained in:
@@ -9,11 +9,13 @@ use beacon_chain::BeaconChainTypes;
|
||||
use fnv::FnvHashMap;
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
use lighthouse_network::{PeerAction, PeerId};
|
||||
use logging::crit;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::Rng;
|
||||
use slog::{crit, debug, o, warn};
|
||||
use std::collections::{btree_map::Entry, BTreeMap, HashSet};
|
||||
use std::fmt;
|
||||
use strum::IntoStaticStr;
|
||||
use tracing::{debug, instrument, warn};
|
||||
use types::{Epoch, EthSpec, Hash256, Slot};
|
||||
|
||||
/// Blocks are downloaded in batches from peers. This constant specifies how many epochs worth of
|
||||
@@ -37,6 +39,7 @@ pub type ProcessingResult = Result<KeepChain, RemoveChain>;
|
||||
|
||||
/// Reasons for removing a chain
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub enum RemoveChain {
|
||||
EmptyPeerPool,
|
||||
ChainCompleted,
|
||||
@@ -66,6 +69,7 @@ pub enum SyncingChainType {
|
||||
/// A chain of blocks that need to be downloaded. Peers who claim to contain the target head
|
||||
/// root are grouped into the peer pool and queried for batches when downloading the
|
||||
/// chain.
|
||||
#[derive(Debug)]
|
||||
pub struct SyncingChain<T: BeaconChainTypes> {
|
||||
/// A random id used to identify this chain.
|
||||
id: ChainId,
|
||||
@@ -110,9 +114,16 @@ pub struct SyncingChain<T: BeaconChainTypes> {
|
||||
|
||||
/// The current processing batch, if any.
|
||||
current_processing_batch: Option<BatchId>,
|
||||
}
|
||||
|
||||
/// The chain's log.
|
||||
log: slog::Logger,
|
||||
impl<T: BeaconChainTypes> fmt::Display for SyncingChain<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.chain_type {
|
||||
SyncingChainType::Head => write!(f, "Head"),
|
||||
SyncingChainType::Finalized => write!(f, "Finalized"),
|
||||
SyncingChainType::Backfill => write!(f, "Backfill"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
@@ -132,7 +143,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
target_head_root: Hash256,
|
||||
peer_id: PeerId,
|
||||
chain_type: SyncingChainType,
|
||||
log: &slog::Logger,
|
||||
) -> Self {
|
||||
let mut peers = FnvHashMap::default();
|
||||
peers.insert(peer_id, Default::default());
|
||||
@@ -151,7 +161,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
attempted_optimistic_starts: HashSet::default(),
|
||||
state: ChainSyncingState::Stopped,
|
||||
current_processing_batch: None,
|
||||
log: log.new(o!("chain" => id)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,21 +170,25 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Check if the chain has peers from which to process batches.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn available_peers(&self) -> usize {
|
||||
self.peers.len()
|
||||
}
|
||||
|
||||
/// Get the chain's id.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn get_id(&self) -> ChainId {
|
||||
self.id
|
||||
}
|
||||
|
||||
/// Peers currently syncing this chain.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn peers(&self) -> impl Iterator<Item = PeerId> + '_ {
|
||||
self.peers.keys().cloned()
|
||||
}
|
||||
|
||||
/// Progress in epochs made by the chain
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn processed_epochs(&self) -> u64 {
|
||||
self.processing_target
|
||||
.saturating_sub(self.start_epoch)
|
||||
@@ -183,6 +196,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Returns the total count of pending blocks in all the batches of this chain
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn pending_blocks(&self) -> usize {
|
||||
self.batches
|
||||
.values()
|
||||
@@ -192,6 +206,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Removes a peer from the chain.
|
||||
/// If the peer has active batches, those are considered failed and re-requested.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn remove_peer(
|
||||
&mut self,
|
||||
peer_id: &PeerId,
|
||||
@@ -211,8 +226,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
self.retry_batch_download(network, id)?;
|
||||
} else {
|
||||
debug!(self.log, "Batch not found while removing peer";
|
||||
"peer" => %peer_id, "batch" => id)
|
||||
debug!(%peer_id, batch = ?id, "Batch not found while removing peer")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -225,6 +239,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Returns the latest slot number that has been processed.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn current_processed_slot(&self) -> Slot {
|
||||
// the last slot we processed was included in the previous batch, and corresponds to the
|
||||
// first slot of the current target epoch
|
||||
@@ -234,6 +249,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// A block has been received for a batch on this chain.
|
||||
/// If the block correctly completes the batch it will be processed if possible.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn on_block_response(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -245,7 +261,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// check if we have this batch
|
||||
let batch = match self.batches.get_mut(&batch_id) {
|
||||
None => {
|
||||
debug!(self.log, "Received a block for unknown batch"; "epoch" => batch_id);
|
||||
debug!(epoch = %batch_id, "Received a block for unknown batch");
|
||||
// A batch might get removed when the chain advances, so this is non fatal.
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
@@ -273,7 +289,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
let awaiting_batches = batch_id
|
||||
.saturating_sub(self.optimistic_start.unwrap_or(self.processing_target))
|
||||
/ EPOCHS_PER_BATCH;
|
||||
debug!(self.log, "Batch downloaded"; "epoch" => batch_id, "blocks" => received, "batch_state" => self.visualize_batch_state(), "awaiting_batches" => awaiting_batches);
|
||||
debug!(epoch = %batch_id, blocks = received, batch_state = self.visualize_batch_state(), %awaiting_batches,"Batch downloaded");
|
||||
|
||||
// pre-emptively request more blocks from peers whilst we process current blocks,
|
||||
self.request_batches(network)?;
|
||||
@@ -282,6 +298,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Processes the batch with the given id.
|
||||
/// The batch must exist and be ready for processing
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn process_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -317,8 +334,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.current_processing_batch = Some(batch_id);
|
||||
|
||||
if let Err(e) = beacon_processor.send_chain_segment(process_id, blocks) {
|
||||
crit!(self.log, "Failed to send chain segment to processor."; "msg" => "process_batch",
|
||||
"error" => %e, "batch" => self.processing_target);
|
||||
crit!(msg = "process_batch",error = %e, batch = ?self.processing_target, "Failed to send chain segment to processor.");
|
||||
// This is unlikely to happen but it would stall syncing since the batch now has no
|
||||
// blocks to continue, and the chain is expecting a processing result that won't
|
||||
// arrive. To mitigate this, (fake) fail this processing so that the batch is
|
||||
@@ -330,6 +346,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Processes the next ready batch, prioritizing optimistic batches over the processing target.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn process_completed_batches(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -349,7 +366,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
match state {
|
||||
BatchState::AwaitingProcessing(..) => {
|
||||
// this batch is ready
|
||||
debug!(self.log, "Processing optimistic start"; "epoch" => epoch);
|
||||
debug!(%epoch, "Processing optimistic start");
|
||||
return self.process_batch(network, epoch);
|
||||
}
|
||||
BatchState::Downloading(..) => {
|
||||
@@ -377,7 +394,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// batch has been requested and processed we can land here. We drop the
|
||||
// optimistic candidate since we can't conclude whether the batch included
|
||||
// blocks or not at this point
|
||||
debug!(self.log, "Dropping optimistic candidate"; "batch" => epoch);
|
||||
debug!(batch = %epoch, "Dropping optimistic candidate");
|
||||
self.optimistic_start = None;
|
||||
}
|
||||
}
|
||||
@@ -411,7 +428,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// inside the download buffer (between `self.processing_target` and
|
||||
// `self.to_be_downloaded`). In this case, eventually the chain advances to the
|
||||
// batch (`self.processing_target` reaches this point).
|
||||
debug!(self.log, "Chain encountered a robust batch awaiting validation"; "batch" => self.processing_target);
|
||||
debug!(
|
||||
batch = %self.processing_target,
|
||||
"Chain encountered a robust batch awaiting validation"
|
||||
);
|
||||
|
||||
self.processing_target += EPOCHS_PER_BATCH;
|
||||
if self.to_be_downloaded <= self.processing_target {
|
||||
@@ -436,6 +456,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// The block processor has completed processing a batch. This function handles the result
|
||||
/// of the batch processor.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn on_batch_process_result(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -447,13 +468,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
let batch_state = self.visualize_batch_state();
|
||||
let batch = match &self.current_processing_batch {
|
||||
Some(processing_id) if *processing_id != batch_id => {
|
||||
debug!(self.log, "Unexpected batch result";
|
||||
"batch_epoch" => batch_id, "expected_batch_epoch" => processing_id);
|
||||
debug!(batch_epoch = %batch_id, expected_batch_epoch = %processing_id,"Unexpected batch result");
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
None => {
|
||||
debug!(self.log, "Chain was not expecting a batch result";
|
||||
"batch_epoch" => batch_id);
|
||||
debug!(batch_epoch = %batch_id,"Chain was not expecting a batch result");
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
_ => {
|
||||
@@ -476,8 +495,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
})?;
|
||||
|
||||
// Log the process result and the batch for debugging purposes.
|
||||
debug!(self.log, "Batch processing result"; "result" => ?result, &batch,
|
||||
"batch_epoch" => batch_id, "client" => %network.client_type(&peer), "batch_state" => batch_state);
|
||||
debug!(
|
||||
result = ?result,
|
||||
batch_epoch = %batch_id,
|
||||
client = %network.client_type(&peer),
|
||||
batch_state = ?batch_state,
|
||||
?batch,
|
||||
"Batch processing result"
|
||||
);
|
||||
|
||||
// We consider three cases. Batch was successfully processed, Batch failed processing due
|
||||
// to a faulty peer, or batch failed processing but the peer can't be deemed faulty.
|
||||
@@ -563,10 +588,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// There are some edge cases with forks that could land us in this situation.
|
||||
// This should be unlikely, so we tolerate these errors, but not often.
|
||||
warn!(
|
||||
self.log,
|
||||
"Batch failed to download. Dropping chain scoring peers";
|
||||
"score_adjustment" => %penalty,
|
||||
"batch_epoch"=> batch_id,
|
||||
score_adjustment = %penalty,
|
||||
batch_epoch = %batch_id,
|
||||
"Batch failed to download. Dropping chain scoring peers"
|
||||
);
|
||||
|
||||
for (peer, _) in self.peers.drain() {
|
||||
@@ -587,6 +611,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn reject_optimistic_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -599,13 +624,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// it. NOTE: this is done to prevent non-sequential batches coming from optimistic
|
||||
// starts from filling up the buffer size
|
||||
if epoch < self.to_be_downloaded {
|
||||
debug!(self.log, "Rejected optimistic batch left for future use"; "epoch" => %epoch, "reason" => reason);
|
||||
debug!(%epoch, reason, "Rejected optimistic batch left for future use");
|
||||
// this batch is now treated as any other batch, and re-requested for future use
|
||||
if redownload {
|
||||
return self.retry_batch_download(network, epoch);
|
||||
}
|
||||
} else {
|
||||
debug!(self.log, "Rejected optimistic batch"; "epoch" => %epoch, "reason" => reason);
|
||||
debug!(%epoch, reason, "Rejected optimistic batch");
|
||||
self.batches.remove(&epoch);
|
||||
}
|
||||
}
|
||||
@@ -621,6 +646,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// If a previous batch has been validated and it had been re-processed, penalize the original
|
||||
/// peer.
|
||||
#[allow(clippy::modulo_one)]
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn advance_chain(&mut self, network: &mut SyncNetworkContext<T>, validating_epoch: Epoch) {
|
||||
// make sure this epoch produces an advancement
|
||||
if validating_epoch <= self.start_epoch {
|
||||
@@ -629,7 +655,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
// safety check for batch boundaries
|
||||
if validating_epoch % EPOCHS_PER_BATCH != self.start_epoch % EPOCHS_PER_BATCH {
|
||||
crit!(self.log, "Validating Epoch is not aligned");
|
||||
crit!("Validating Epoch is not aligned");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -651,9 +677,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// A different peer sent the correct batch, the previous peer did not
|
||||
// We negatively score the original peer.
|
||||
let action = PeerAction::LowToleranceError;
|
||||
debug!(self.log, "Re-processed batch validated. Scoring original peer";
|
||||
"batch_epoch" => id, "score_adjustment" => %action,
|
||||
"original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id
|
||||
debug!(
|
||||
batch_epoch = %id, score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id, new_peer = %processed_attempt.peer_id,
|
||||
"Re-processed batch validated. Scoring original peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
@@ -664,9 +691,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// The same peer corrected it's previous mistake. There was an error, so we
|
||||
// negative score the original peer.
|
||||
let action = PeerAction::MidToleranceError;
|
||||
debug!(self.log, "Re-processed batch validated by the same peer";
|
||||
"batch_epoch" => id, "score_adjustment" => %action,
|
||||
"original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id
|
||||
debug!(
|
||||
batch_epoch = %id,
|
||||
score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id,
|
||||
new_peer = %processed_attempt.peer_id,
|
||||
"Re-processed batch validated by the same peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
@@ -683,13 +713,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
active_batches.remove(&id);
|
||||
}
|
||||
}
|
||||
BatchState::Failed | BatchState::Poisoned | BatchState::AwaitingDownload => crit!(
|
||||
self.log,
|
||||
"batch indicates inconsistent chain state while advancing chain"
|
||||
),
|
||||
BatchState::Failed | BatchState::Poisoned | BatchState::AwaitingDownload => {
|
||||
crit!("batch indicates inconsistent chain state while advancing chain")
|
||||
}
|
||||
BatchState::AwaitingProcessing(..) => {}
|
||||
BatchState::Processing(_) => {
|
||||
debug!(self.log, "Advancing chain while processing a batch"; "batch" => id, batch);
|
||||
debug!(batch = %id, %batch, "Advancing chain while processing a batch");
|
||||
if let Some(processing_id) = self.current_processing_batch {
|
||||
if id <= processing_id {
|
||||
self.current_processing_batch = None;
|
||||
@@ -713,8 +742,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.optimistic_start = None;
|
||||
}
|
||||
}
|
||||
debug!(self.log, "Chain advanced"; "previous_start" => old_start,
|
||||
"new_start" => self.start_epoch, "processing_target" => self.processing_target);
|
||||
debug!(
|
||||
previous_start = %old_start,
|
||||
new_start = %self.start_epoch,
|
||||
processing_target = %self.processing_target,
|
||||
"Chain advanced"
|
||||
);
|
||||
}
|
||||
|
||||
/// An invalid batch has been received that could not be processed, but that can be retried.
|
||||
@@ -722,6 +755,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// These events occur when a peer has successfully responded with blocks, but the blocks we
|
||||
/// have received are incorrect or invalid. This indicates the peer has not performed as
|
||||
/// intended and can result in downvoting a peer.
|
||||
#[instrument(parent = None,level = "info", fields(service = self.id, network), skip_all)]
|
||||
fn handle_invalid_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -781,6 +815,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// This chain has been requested to start syncing.
|
||||
///
|
||||
/// This could be new chain, or an old chain that is being resumed.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn start_syncing(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -819,6 +854,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// Add a peer to the chain.
|
||||
///
|
||||
/// If the chain is active, this starts requesting batches from this peer.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn add_peer(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -836,6 +872,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// An RPC error has occurred.
|
||||
///
|
||||
/// If the batch exists it is re-requested.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn inject_error(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -852,24 +889,21 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// columns.
|
||||
if !batch.is_expecting_block(&request_id) {
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch not expecting block";
|
||||
"batch_epoch" => batch_id,
|
||||
"batch_state" => ?batch.state(),
|
||||
"peer_id" => %peer_id,
|
||||
"request_id" => %request_id,
|
||||
"batch_state" => batch_state
|
||||
batch_epoch = %batch_id,
|
||||
batch_state = ?batch.state(),
|
||||
%peer_id,
|
||||
%request_id,
|
||||
?batch_state,
|
||||
"Batch not expecting block"
|
||||
);
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch failed. RPC Error";
|
||||
"batch_epoch" => batch_id,
|
||||
"batch_state" => ?batch.state(),
|
||||
"peer_id" => %peer_id,
|
||||
"request_id" => %request_id,
|
||||
"batch_state" => batch_state
|
||||
batch_epoch = %batch_id,
|
||||
batch_state = ?batch.state(),
|
||||
%peer_id,
|
||||
%request_id,
|
||||
"Batch failed. RPC Error"
|
||||
);
|
||||
if let Some(active_requests) = self.peers.get_mut(peer_id) {
|
||||
active_requests.remove(&batch_id);
|
||||
@@ -883,12 +917,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.retry_batch_download(network, batch_id)
|
||||
} else {
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch not found";
|
||||
"batch_epoch" => batch_id,
|
||||
"peer_id" => %peer_id,
|
||||
"request_id" => %request_id,
|
||||
"batch_state" => batch_state
|
||||
batch_epoch = %batch_id,
|
||||
%peer_id,
|
||||
%request_id,
|
||||
batch_state,
|
||||
"Batch not found"
|
||||
);
|
||||
// this could be an error for an old batch, removed when the chain advances
|
||||
Ok(KeepChain)
|
||||
@@ -896,6 +929,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Sends and registers the request of a batch awaiting download.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn retry_batch_download(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -932,6 +966,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Requests the batch assigned to the given id from a given peer.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn send_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -958,9 +993,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
.map(|epoch| epoch == batch_id)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
debug!(self.log, "Requesting optimistic batch"; "epoch" => batch_id, &batch, "batch_state" => batch_state);
|
||||
debug!(epoch = %batch_id, %batch, %batch_state, "Requesting optimistic batch");
|
||||
} else {
|
||||
debug!(self.log, "Requesting batch"; "epoch" => batch_id, &batch, "batch_state" => batch_state);
|
||||
debug!(epoch = %batch_id, %batch, %batch_state, "Requesting batch");
|
||||
}
|
||||
// register the batch for this peer
|
||||
return self
|
||||
@@ -979,8 +1014,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
Err(e) => {
|
||||
// NOTE: under normal conditions this shouldn't happen but we handle it anyway
|
||||
warn!(self.log, "Could not send batch request";
|
||||
"batch_id" => batch_id, "error" => ?e, &batch);
|
||||
warn!(%batch_id, error = %e, %batch, "Could not send batch request");
|
||||
// register the failed download and check if the batch can be retried
|
||||
batch.start_downloading_from_peer(peer, 1)?; // fake request_id is not relevant
|
||||
self.peers
|
||||
@@ -1005,6 +1039,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
|
||||
/// Returns true if this chain is currently syncing.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn is_syncing(&self) -> bool {
|
||||
match self.state {
|
||||
ChainSyncingState::Syncing => true,
|
||||
@@ -1014,6 +1049,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Kickstarts the chain by sending for processing batches that are ready and requesting more
|
||||
/// batches if needed.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn resume(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -1026,6 +1062,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Attempts to request the next required batches from the peer pool if the chain is syncing. It will exhaust the peer
|
||||
/// pool and left over batches until the batch buffer is reached or all peers are exhausted.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn request_batches(&mut self, network: &mut SyncNetworkContext<T>) -> ProcessingResult {
|
||||
if !matches!(self.state, ChainSyncingState::Syncing) {
|
||||
return Ok(KeepChain);
|
||||
@@ -1052,10 +1089,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// We wait for this batch before requesting any other batches.
|
||||
if let Some(epoch) = self.optimistic_start {
|
||||
if !self.good_peers_on_sampling_subnets(epoch, network) {
|
||||
debug!(
|
||||
self.log,
|
||||
"Waiting for peers to be available on sampling column subnets"
|
||||
);
|
||||
debug!("Waiting for peers to be available on sampling column subnets");
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
|
||||
@@ -1114,6 +1148,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
|
||||
/// Creates the next required batch from the chain. If there are no more batches required,
|
||||
/// `false` is returned.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn include_next_batch(&mut self, network: &mut SyncNetworkContext<T>) -> Option<BatchId> {
|
||||
// don't request batches beyond the target head slot
|
||||
if self
|
||||
@@ -1147,10 +1182,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// block and data column requests are currently coupled. This can be removed once we find a
|
||||
// way to decouple the requests and do retries individually, see issue #6258.
|
||||
if !self.good_peers_on_sampling_subnets(self.to_be_downloaded, network) {
|
||||
debug!(
|
||||
self.log,
|
||||
"Waiting for peers to be available on custody column subnets"
|
||||
);
|
||||
debug!("Waiting for peers to be available on custody column subnets");
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -1177,6 +1209,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// This produces a string of the form: [D,E,E,E,E]
|
||||
/// to indicate the current buffer state of the chain. The symbols are defined on each of the
|
||||
/// batch states. See [BatchState::visualize] for symbol definitions.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn visualize_batch_state(&self) -> String {
|
||||
let mut visualization_string = String::with_capacity((BATCH_BUFFER_SIZE * 3) as usize);
|
||||
|
||||
@@ -1212,45 +1245,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> slog::KV for &mut SyncingChain<T> {
|
||||
fn serialize(
|
||||
&self,
|
||||
record: &slog::Record,
|
||||
serializer: &mut dyn slog::Serializer,
|
||||
) -> slog::Result {
|
||||
slog::KV::serialize(*self, record, serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> slog::KV for SyncingChain<T> {
|
||||
fn serialize(
|
||||
&self,
|
||||
record: &slog::Record,
|
||||
serializer: &mut dyn slog::Serializer,
|
||||
) -> slog::Result {
|
||||
use slog::Value;
|
||||
serializer.emit_u32("id", self.id)?;
|
||||
Value::serialize(&self.start_epoch, record, "from", serializer)?;
|
||||
Value::serialize(
|
||||
&self.target_head_slot.epoch(T::EthSpec::slots_per_epoch()),
|
||||
record,
|
||||
"to",
|
||||
serializer,
|
||||
)?;
|
||||
serializer.emit_arguments("end_root", &format_args!("{}", self.target_head_root))?;
|
||||
Value::serialize(
|
||||
&self.processing_target,
|
||||
record,
|
||||
"current_target",
|
||||
serializer,
|
||||
)?;
|
||||
serializer.emit_usize("batches", self.batches.len())?;
|
||||
serializer.emit_usize("peers", self.peers.len())?;
|
||||
serializer.emit_arguments("state", &format_args!("{:?}", self.state))?;
|
||||
slog::Result::Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
use super::batch::WrongState as WrongBatchState;
|
||||
impl From<WrongBatchState> for RemoveChain {
|
||||
fn from(err: WrongBatchState) -> Self {
|
||||
|
||||
Reference in New Issue
Block a user