mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-10 04:01:51 +00:00
Integrate tracing (#6339)
Tracing Integration
- [reference](5bbf1859e9/projects/project-ideas.md (L297))
- [x] replace slog & log with tracing throughout the codebase
- [x] implement custom crit log
- [x] make relevant changes in the formatter
- [x] replace sloggers
- [x] re-write SSE logging components
cc: @macladson @eserilev
This commit is contained in:
@@ -20,13 +20,14 @@ use beacon_chain::{BeaconChain, BeaconChainTypes};
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
use lighthouse_network::types::{BackFillState, NetworkGlobals};
|
||||
use lighthouse_network::{PeerAction, PeerId};
|
||||
use logging::crit;
|
||||
use rand::seq::SliceRandom;
|
||||
use slog::{crit, debug, error, info, warn};
|
||||
use std::collections::{
|
||||
btree_map::{BTreeMap, Entry},
|
||||
HashMap, HashSet,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, error, info, instrument, warn};
|
||||
use types::{Epoch, EthSpec};
|
||||
|
||||
/// Blocks are downloaded in batches from peers. This constant specifies how many epochs worth of
|
||||
@@ -146,16 +147,17 @@ pub struct BackFillSync<T: BeaconChainTypes> {
|
||||
/// Reference to the network globals in order to obtain valid peers to backfill blocks from
|
||||
/// (i.e synced peers).
|
||||
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
|
||||
|
||||
/// A logger for backfill sync.
|
||||
log: slog::Logger,
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn new(
|
||||
beacon_chain: Arc<BeaconChain<T>>,
|
||||
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
|
||||
log: slog::Logger,
|
||||
) -> Self {
|
||||
// Determine if backfill is enabled or not.
|
||||
// If, for some reason a backfill has already been completed (or we've used a trusted
|
||||
@@ -186,7 +188,6 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
participating_peers: HashSet::new(),
|
||||
restart_failed_sync: false,
|
||||
beacon_chain,
|
||||
log,
|
||||
};
|
||||
|
||||
// Update the global network state with the current backfill state.
|
||||
@@ -195,9 +196,15 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
|
||||
/// Pauses the backfill sync if it's currently syncing.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn pause(&mut self) {
|
||||
if let BackFillState::Syncing = self.state() {
|
||||
debug!(self.log, "Backfill sync paused"; "processed_epochs" => self.validated_batches, "to_be_processed" => self.current_start);
|
||||
debug!(processed_epochs = %self.validated_batches, to_be_processed = %self.current_start,"Backfill sync paused");
|
||||
self.set_state(BackFillState::Paused);
|
||||
}
|
||||
}
|
||||
@@ -206,6 +213,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
///
|
||||
/// If resuming is successful, reports back the current syncing metrics.
|
||||
#[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"]
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn start(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -222,7 +235,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
.is_some()
|
||||
{
|
||||
// If there are peers to resume with, begin the resume.
|
||||
debug!(self.log, "Resuming backfill sync"; "start_epoch" => self.current_start, "awaiting_batches" => self.batches.len(), "processing_target" => self.processing_target);
|
||||
debug!(start_epoch = ?self.current_start, awaiting_batches = self.batches.len(), processing_target = ?self.processing_target, "Resuming backfill sync");
|
||||
self.set_state(BackFillState::Syncing);
|
||||
// Resume any previously failed batches.
|
||||
self.resume_batches(network)?;
|
||||
@@ -251,14 +264,14 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// This infallible match exists to force us to update this code if a future
|
||||
// refactor of `ResetEpochError` adds a variant.
|
||||
let ResetEpochError::SyncCompleted = e;
|
||||
error!(self.log, "Backfill sync completed whilst in failed status");
|
||||
error!("Backfill sync completed whilst in failed status");
|
||||
self.set_state(BackFillState::Completed);
|
||||
return Err(BackFillError::InvalidSyncState(String::from(
|
||||
"chain completed",
|
||||
)));
|
||||
}
|
||||
|
||||
debug!(self.log, "Resuming a failed backfill sync"; "start_epoch" => self.current_start);
|
||||
debug!(start_epoch = %self.current_start, "Resuming a failed backfill sync");
|
||||
|
||||
// begin requesting blocks from the peer pool, until all peers are exhausted.
|
||||
self.request_batches(network)?;
|
||||
@@ -281,6 +294,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
/// A fully synced peer has joined us.
|
||||
/// If we are in a failed state, update a local variable to indicate we are able to restart
|
||||
/// the failed sync on the next attempt.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn fully_synced_peer_joined(&mut self) {
|
||||
if matches!(self.state(), BackFillState::Failed) {
|
||||
self.restart_failed_sync = true;
|
||||
@@ -289,6 +308,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
|
||||
/// A peer has disconnected.
|
||||
/// If the peer has active batches, those are considered failed and re-requested.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
#[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"]
|
||||
pub fn peer_disconnected(
|
||||
&mut self,
|
||||
@@ -318,15 +343,13 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// short circuit early.
|
||||
if self.retry_batch_download(network, id).is_err() {
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch could not be retried";
|
||||
"batch_id" => id,
|
||||
"error" => "no synced peers"
|
||||
batch_id = %id,
|
||||
error = "no synced peers",
|
||||
"Batch could not be retried"
|
||||
);
|
||||
}
|
||||
} else {
|
||||
debug!(self.log, "Batch not found while removing peer";
|
||||
"peer" => %peer_id, "batch" => id)
|
||||
debug!(peer = %peer_id, batch = %id, "Batch not found while removing peer");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -339,6 +362,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
/// An RPC error has occurred.
|
||||
///
|
||||
/// If the batch exists it is re-requested.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
#[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"]
|
||||
pub fn inject_error(
|
||||
&mut self,
|
||||
@@ -356,7 +385,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
if !batch.is_expecting_block(&request_id) {
|
||||
return Ok(());
|
||||
}
|
||||
debug!(self.log, "Batch failed"; "batch_epoch" => batch_id, "error" => "rpc_error");
|
||||
debug!(batch_epoch = %batch_id, error = "rpc_error", "Batch failed");
|
||||
if let Some(active_requests) = self.active_requests.get_mut(peer_id) {
|
||||
active_requests.remove(&batch_id);
|
||||
}
|
||||
@@ -378,6 +407,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
/// If this returns an error, the backfill sync has failed and will be restarted once new peers
|
||||
/// join the system.
|
||||
/// The sync manager should update the global sync state on failure.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
#[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"]
|
||||
pub fn on_block_response(
|
||||
&mut self,
|
||||
@@ -391,7 +426,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
let Some(batch) = self.batches.get_mut(&batch_id) else {
|
||||
if !matches!(self.state(), BackFillState::Failed) {
|
||||
// A batch might get removed when the chain advances, so this is non fatal.
|
||||
debug!(self.log, "Received a block for unknown batch"; "epoch" => batch_id);
|
||||
debug!(epoch = %batch_id, "Received a block for unknown batch");
|
||||
}
|
||||
return Ok(ProcessResult::Successful);
|
||||
};
|
||||
@@ -416,7 +451,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
Ok(received) => {
|
||||
let awaiting_batches =
|
||||
self.processing_target.saturating_sub(batch_id) / BACKFILL_EPOCHS_PER_BATCH;
|
||||
debug!(self.log, "Completed batch received"; "epoch" => batch_id, "blocks" => received, "awaiting_batches" => awaiting_batches);
|
||||
debug!(
|
||||
epoch = %batch_id,
|
||||
blocks = received,
|
||||
%awaiting_batches,
|
||||
"Completed batch received"
|
||||
);
|
||||
|
||||
// pre-emptively request more blocks from peers whilst we process current blocks,
|
||||
self.request_batches(network)?;
|
||||
@@ -432,6 +472,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
/// The syncing process has failed.
|
||||
///
|
||||
/// This resets past variables, to allow for a fresh start when resuming.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn fail_sync(&mut self, error: BackFillError) -> Result<(), BackFillError> {
|
||||
// Some errors shouldn't fail the chain.
|
||||
if matches!(error, BackFillError::Paused) {
|
||||
@@ -455,7 +501,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// NOTE: Lets keep validated_batches for posterity
|
||||
|
||||
// Emit the log here
|
||||
error!(self.log, "Backfill sync failed"; "error" => ?error);
|
||||
error!(?error, "Backfill sync failed");
|
||||
|
||||
// Return the error, kinda weird pattern, but I want to use
|
||||
// `self.fail_chain(_)?` in other parts of the code.
|
||||
@@ -464,6 +510,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
|
||||
/// Processes the batch with the given id.
|
||||
/// The batch must exist and be ready for processing
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn process_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -503,8 +555,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
.beacon_processor()
|
||||
.send_chain_segment(process_id, blocks)
|
||||
{
|
||||
crit!(self.log, "Failed to send backfill segment to processor."; "msg" => "process_batch",
|
||||
"error" => %e, "batch" => self.processing_target);
|
||||
crit!(
|
||||
msg = "process_batch",
|
||||
error = %e,
|
||||
batch = ?self.processing_target,
|
||||
"Failed to send backfill segment to processor."
|
||||
);
|
||||
// This is unlikely to happen but it would stall syncing since the batch now has no
|
||||
// blocks to continue, and the chain is expecting a processing result that won't
|
||||
// arrive. To mitigate this, (fake) fail this processing so that the batch is
|
||||
@@ -518,6 +574,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
/// The block processor has completed processing a batch. This function handles the result
|
||||
/// of the batch processor.
|
||||
/// If an error is returned the BackFill sync has failed.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
#[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"]
|
||||
pub fn on_batch_process_result(
|
||||
&mut self,
|
||||
@@ -530,13 +592,15 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// result
|
||||
let batch = match &self.current_processing_batch {
|
||||
Some(processing_id) if *processing_id != batch_id => {
|
||||
debug!(self.log, "Unexpected batch result";
|
||||
"batch_epoch" => batch_id, "expected_batch_epoch" => processing_id);
|
||||
debug!(
|
||||
batch_epoch = %batch_id.as_u64(),
|
||||
expected_batch_epoch = processing_id.as_u64(),
|
||||
"Unexpected batch result"
|
||||
);
|
||||
return Ok(ProcessResult::Successful);
|
||||
}
|
||||
None => {
|
||||
debug!(self.log, "Chain was not expecting a batch result";
|
||||
"batch_epoch" => batch_id);
|
||||
debug!(%batch_id, "Chain was not expecting a batch result");
|
||||
return Ok(ProcessResult::Successful);
|
||||
}
|
||||
_ => {
|
||||
@@ -566,8 +630,14 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
return Ok(ProcessResult::Successful);
|
||||
};
|
||||
|
||||
debug!(self.log, "Backfill batch processed"; "result" => ?result, &batch,
|
||||
"batch_epoch" => batch_id, "peer" => %peer, "client" => %network.client_type(peer));
|
||||
debug!(
|
||||
?result,
|
||||
%batch,
|
||||
batch_epoch = %batch_id,
|
||||
%peer,
|
||||
client = %network.client_type(peer),
|
||||
"Backfill batch processed"
|
||||
);
|
||||
|
||||
match result {
|
||||
BatchProcessResult::Success {
|
||||
@@ -591,7 +661,10 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// check if the chain has completed syncing
|
||||
if self.check_completed() {
|
||||
// chain is completed
|
||||
info!(self.log, "Backfill sync completed"; "blocks_processed" => self.validated_batches * T::EthSpec::slots_per_epoch());
|
||||
info!(
|
||||
blocks_processed = self.validated_batches * T::EthSpec::slots_per_epoch(),
|
||||
"Backfill sync completed"
|
||||
);
|
||||
self.set_state(BackFillState::Completed);
|
||||
Ok(ProcessResult::SyncCompleted)
|
||||
} else {
|
||||
@@ -619,10 +692,9 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// repeatedly and are either malicious or faulty. We stop the backfill sync and
|
||||
// report all synced peers that have participated.
|
||||
warn!(
|
||||
self.log,
|
||||
"Backfill batch failed to download. Penalizing peers";
|
||||
"score_adjustment" => %penalty,
|
||||
"batch_epoch"=> batch_id
|
||||
score_adjustment = %penalty,
|
||||
batch_epoch = %batch_id,
|
||||
"Backfill batch failed to download. Penalizing peers"
|
||||
);
|
||||
|
||||
for peer in self.participating_peers.drain() {
|
||||
@@ -658,6 +730,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
|
||||
/// Processes the next ready batch.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn process_completed_batches(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -692,7 +770,10 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
BatchState::AwaitingValidation(_) => {
|
||||
// TODO: I don't think this state is possible, log a CRIT just in case.
|
||||
// If this is not observed, add it to the failed state branch above.
|
||||
crit!(self.log, "Chain encountered a robust batch awaiting validation"; "batch" => self.processing_target);
|
||||
crit!(
|
||||
batch = ?self.processing_target,
|
||||
"Chain encountered a robust batch awaiting validation"
|
||||
);
|
||||
|
||||
self.processing_target -= BACKFILL_EPOCHS_PER_BATCH;
|
||||
if self.to_be_downloaded >= self.processing_target {
|
||||
@@ -718,6 +799,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
///
|
||||
/// If a previous batch has been validated and it had been re-processed, penalize the original
|
||||
/// peer.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn advance_chain(&mut self, network: &mut SyncNetworkContext<T>, validating_epoch: Epoch) {
|
||||
// make sure this epoch produces an advancement
|
||||
if validating_epoch >= self.current_start {
|
||||
@@ -745,9 +832,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// A different peer sent the correct batch, the previous peer did not
|
||||
// We negatively score the original peer.
|
||||
let action = PeerAction::LowToleranceError;
|
||||
debug!(self.log, "Re-processed batch validated. Scoring original peer";
|
||||
"batch_epoch" => id, "score_adjustment" => %action,
|
||||
"original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id
|
||||
debug!(
|
||||
batch_epoch = ?id,
|
||||
score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id,
|
||||
new_peer = %processed_attempt.peer_id,
|
||||
"Re-processed batch validated. Scoring original peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
@@ -758,9 +848,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// The same peer corrected it's previous mistake. There was an error, so we
|
||||
// negative score the original peer.
|
||||
let action = PeerAction::MidToleranceError;
|
||||
debug!(self.log, "Re-processed batch validated by the same peer";
|
||||
"batch_epoch" => id, "score_adjustment" => %action,
|
||||
"original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id
|
||||
debug!(
|
||||
batch_epoch = ?id,
|
||||
score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id,
|
||||
new_peer = %processed_attempt.peer_id,
|
||||
"Re-processed batch validated by the same peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
@@ -778,14 +871,11 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
}
|
||||
BatchState::Failed | BatchState::Poisoned | BatchState::AwaitingDownload => {
|
||||
crit!(
|
||||
self.log,
|
||||
"batch indicates inconsistent chain state while advancing chain"
|
||||
)
|
||||
crit!("batch indicates inconsistent chain state while advancing chain")
|
||||
}
|
||||
BatchState::AwaitingProcessing(..) => {}
|
||||
BatchState::Processing(_) => {
|
||||
debug!(self.log, "Advancing chain while processing a batch"; "batch" => id, batch);
|
||||
debug!(batch = %id, %batch, "Advancing chain while processing a batch");
|
||||
if let Some(processing_id) = self.current_processing_batch {
|
||||
if id >= processing_id {
|
||||
self.current_processing_batch = None;
|
||||
@@ -803,7 +893,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// won't have this batch, so we need to request it.
|
||||
self.to_be_downloaded -= BACKFILL_EPOCHS_PER_BATCH;
|
||||
}
|
||||
debug!(self.log, "Backfill advanced"; "validated_epoch" => validating_epoch, "processing_target" => self.processing_target);
|
||||
debug!(?validating_epoch, processing_target = ?self.processing_target, "Backfill advanced");
|
||||
}
|
||||
|
||||
/// An invalid batch has been received that could not be processed, but that can be retried.
|
||||
@@ -811,6 +901,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
/// These events occur when a peer has successfully responded with blocks, but the blocks we
|
||||
/// have received are incorrect or invalid. This indicates the peer has not performed as
|
||||
/// intended and can result in downvoting a peer.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn handle_invalid_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -862,6 +958,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
|
||||
/// Sends and registers the request of a batch awaiting download.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn retry_batch_download(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -896,13 +998,19 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
self.send_batch(network, batch_id, peer)
|
||||
} else {
|
||||
// If we are here the chain has no more synced peers
|
||||
info!(self.log, "Backfill sync paused"; "reason" => "insufficient_synced_peers");
|
||||
info!(reason = "insufficient_synced_peers", "Backfill sync paused");
|
||||
self.set_state(BackFillState::Paused);
|
||||
Err(BackFillError::Paused)
|
||||
}
|
||||
}
|
||||
|
||||
/// Requests the batch assigned to the given id from a given peer.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn send_batch(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -922,7 +1030,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
if let Err(e) = batch.start_downloading_from_peer(peer, request_id) {
|
||||
return self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0));
|
||||
}
|
||||
debug!(self.log, "Requesting batch"; "epoch" => batch_id, &batch);
|
||||
debug!(epoch = %batch_id, %batch, "Requesting batch");
|
||||
|
||||
// register the batch for this peer
|
||||
self.active_requests
|
||||
@@ -933,8 +1041,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
Err(e) => {
|
||||
// NOTE: under normal conditions this shouldn't happen but we handle it anyway
|
||||
warn!(self.log, "Could not send batch request";
|
||||
"batch_id" => batch_id, "error" => ?e, &batch);
|
||||
warn!(%batch_id, error = ?e, %batch,"Could not send batch request");
|
||||
// register the failed download and check if the batch can be retried
|
||||
if let Err(e) = batch.start_downloading_from_peer(peer, 1) {
|
||||
return self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0));
|
||||
@@ -963,6 +1070,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
|
||||
/// When resuming a chain, this function searches for batches that need to be re-downloaded and
|
||||
/// transitions their state to redownload the batch.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn resume_batches(&mut self, network: &mut SyncNetworkContext<T>) -> Result<(), BackFillError> {
|
||||
let batch_ids_to_retry = self
|
||||
.batches
|
||||
@@ -987,6 +1100,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
|
||||
/// Attempts to request the next required batches from the peer pool if the chain is syncing. It will exhaust the peer
|
||||
/// pool and left over batches until the batch buffer is reached or all peers are exhausted.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn request_batches(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
@@ -1029,6 +1148,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
|
||||
/// Creates the next required batch from the chain. If there are no more batches required,
|
||||
/// `false` is returned.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn include_next_batch(&mut self, network: &mut SyncNetworkContext<T>) -> Option<BatchId> {
|
||||
// don't request batches beyond genesis;
|
||||
if self.last_batch_downloaded {
|
||||
@@ -1090,6 +1215,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
///
|
||||
/// This errors if the beacon chain indicates that backfill sync has already completed or is
|
||||
/// not required.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn reset_start_epoch(&mut self) -> Result<(), ResetEpochError> {
|
||||
let anchor_info = self.beacon_chain.store.get_anchor_info();
|
||||
if anchor_info.block_backfill_complete(self.beacon_chain.genesis_backfill_slot) {
|
||||
@@ -1103,6 +1234,12 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
|
||||
/// Checks with the beacon chain if backfill sync has completed.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn check_completed(&mut self) -> bool {
|
||||
if self.would_complete(self.current_start) {
|
||||
// Check that the beacon chain agrees
|
||||
@@ -1111,13 +1248,19 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
if anchor_info.block_backfill_complete(self.beacon_chain.genesis_backfill_slot) {
|
||||
return true;
|
||||
} else {
|
||||
error!(self.log, "Backfill out of sync with beacon chain");
|
||||
error!("Backfill out of sync with beacon chain");
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Checks if backfill would complete by syncing to `start_epoch`.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn would_complete(&self, start_epoch: Epoch) -> bool {
|
||||
start_epoch
|
||||
<= self
|
||||
@@ -1127,10 +1270,22 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
|
||||
/// Updates the global network state indicating the current state of a backfill sync.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn set_state(&self, state: BackFillState) {
|
||||
*self.network_globals.backfill_state.write() = state;
|
||||
}
|
||||
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
fn state(&self) -> BackFillState {
|
||||
self.network_globals.backfill_state.read().clone()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user