mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-20 22:38:34 +00:00
Complete backfill restart
This commit is contained in:
@@ -36,6 +36,8 @@ pub enum HistoricalBlockError {
|
|||||||
IndexOutOfBounds,
|
IndexOutOfBounds,
|
||||||
/// Internal store error
|
/// Internal store error
|
||||||
StoreError(StoreError),
|
StoreError(StoreError),
|
||||||
|
/// Internal error
|
||||||
|
InternalError(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<StoreError> for HistoricalBlockError {
|
impl From<StoreError> for HistoricalBlockError {
|
||||||
@@ -45,6 +47,37 @@ impl From<StoreError> for HistoricalBlockError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T: BeaconChainTypes> BeaconChain<T> {
|
impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||||
|
pub fn reset_anchor_oldest_block(
|
||||||
|
&self,
|
||||||
|
new_oldest_block_slot: Slot,
|
||||||
|
) -> Result<(), HistoricalBlockError> {
|
||||||
|
let prev_anchor = self.store.get_anchor_info();
|
||||||
|
|
||||||
|
if new_oldest_block_slot > prev_anchor.oldest_block_slot {
|
||||||
|
let new_oldest_parent_root = self
|
||||||
|
.block_root_at_slot(new_oldest_block_slot, crate::WhenSlotSkipped::Prev)
|
||||||
|
.map_err(|e| {
|
||||||
|
HistoricalBlockError::InternalError(format!(
|
||||||
|
"Error reading block root at slot: {e:?}"
|
||||||
|
))
|
||||||
|
})?
|
||||||
|
// The block at `new_oldest_block_slot` must already be imported since it's gte
|
||||||
|
// current `oldest_block_slot`.
|
||||||
|
.ok_or(HistoricalBlockError::InternalError(format!(
|
||||||
|
"Missing historical block root at slot {new_oldest_block_slot}"
|
||||||
|
)))?;
|
||||||
|
let new_anchor = prev_anchor
|
||||||
|
.as_increased_oldest_block(new_oldest_block_slot, new_oldest_parent_root);
|
||||||
|
self.store
|
||||||
|
.compare_and_set_anchor_info_with_write(prev_anchor, new_anchor)?;
|
||||||
|
debug!(%new_oldest_block_slot, ?new_oldest_parent_root, "Mutated anchor info to advance oldest block");
|
||||||
|
} else {
|
||||||
|
// This batch can be imported, no need to update anchor
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Store a batch of historical blocks in the database.
|
/// Store a batch of historical blocks in the database.
|
||||||
///
|
///
|
||||||
/// The `blocks` should be given in slot-ascending order. One of the blocks should have a block
|
/// The `blocks` should be given in slot-ascending order. One of the blocks should have a block
|
||||||
|
|||||||
@@ -592,6 +592,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
|||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
process_id: ChainSegmentProcessId,
|
process_id: ChainSegmentProcessId,
|
||||||
blocks: Vec<RpcBlock<T::EthSpec>>,
|
blocks: Vec<RpcBlock<T::EthSpec>>,
|
||||||
|
reset_anchor_new_oldest_block_slot: Option<Slot>,
|
||||||
) -> Result<(), Error<T::EthSpec>> {
|
) -> Result<(), Error<T::EthSpec>> {
|
||||||
let is_backfill = matches!(&process_id, ChainSegmentProcessId::BackSyncBatchId { .. });
|
let is_backfill = matches!(&process_id, ChainSegmentProcessId::BackSyncBatchId { .. });
|
||||||
debug!(blocks = blocks.len(), id = ?process_id, "Batch sending for process");
|
debug!(blocks = blocks.len(), id = ?process_id, "Batch sending for process");
|
||||||
@@ -609,7 +610,12 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
|||||||
NotifyExecutionLayer::Yes
|
NotifyExecutionLayer::Yes
|
||||||
};
|
};
|
||||||
processor
|
processor
|
||||||
.process_chain_segment(process_id, blocks, notify_execution_layer)
|
.process_chain_segment(
|
||||||
|
process_id,
|
||||||
|
blocks,
|
||||||
|
notify_execution_layer,
|
||||||
|
reset_anchor_new_oldest_block_slot,
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
};
|
};
|
||||||
let process_fn = Box::pin(process_fn);
|
let process_fn = Box::pin(process_fn);
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ use tokio::sync::mpsc;
|
|||||||
use tracing::{debug, error, info, warn};
|
use tracing::{debug, error, info, warn};
|
||||||
use types::beacon_block_body::format_kzg_commitments;
|
use types::beacon_block_body::format_kzg_commitments;
|
||||||
use types::blob_sidecar::FixedBlobSidecarList;
|
use types::blob_sidecar::FixedBlobSidecarList;
|
||||||
use types::{BlockImportSource, DataColumnSidecar, DataColumnSidecarList, Epoch, Hash256};
|
use types::{BlockImportSource, DataColumnSidecar, DataColumnSidecarList, Epoch, Hash256, Slot};
|
||||||
|
|
||||||
/// Id associated to a batch processing request, either a sync batch or a parent lookup.
|
/// Id associated to a batch processing request, either a sync batch or a parent lookup.
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
@@ -438,6 +438,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
|||||||
sync_type: ChainSegmentProcessId,
|
sync_type: ChainSegmentProcessId,
|
||||||
downloaded_blocks: Vec<RpcBlock<T::EthSpec>>,
|
downloaded_blocks: Vec<RpcBlock<T::EthSpec>>,
|
||||||
notify_execution_layer: NotifyExecutionLayer,
|
notify_execution_layer: NotifyExecutionLayer,
|
||||||
|
reset_anchor_new_oldest_block_slot: Option<Slot>,
|
||||||
) {
|
) {
|
||||||
let result = match sync_type {
|
let result = match sync_type {
|
||||||
// this a request from the range sync
|
// this a request from the range sync
|
||||||
@@ -498,7 +499,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
|||||||
.map(|wrapped| wrapped.n_data_columns())
|
.map(|wrapped| wrapped.n_data_columns())
|
||||||
.sum::<usize>();
|
.sum::<usize>();
|
||||||
|
|
||||||
match self.process_backfill_blocks(downloaded_blocks) {
|
match self
|
||||||
|
.process_backfill_blocks(downloaded_blocks, reset_anchor_new_oldest_block_slot)
|
||||||
|
{
|
||||||
(imported_blocks, Ok(_)) => {
|
(imported_blocks, Ok(_)) => {
|
||||||
debug!(
|
debug!(
|
||||||
batch_epoch = %epoch,
|
batch_epoch = %epoch,
|
||||||
@@ -586,6 +589,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
|||||||
fn process_backfill_blocks(
|
fn process_backfill_blocks(
|
||||||
&self,
|
&self,
|
||||||
downloaded_blocks: Vec<RpcBlock<T::EthSpec>>,
|
downloaded_blocks: Vec<RpcBlock<T::EthSpec>>,
|
||||||
|
reset_anchor_new_oldest_block_slot: Option<Slot>,
|
||||||
) -> (usize, Result<(), ChainSegmentFailed>) {
|
) -> (usize, Result<(), ChainSegmentFailed>) {
|
||||||
let total_blocks = downloaded_blocks.len();
|
let total_blocks = downloaded_blocks.len();
|
||||||
let available_blocks = match self
|
let available_blocks = match self
|
||||||
@@ -636,6 +640,23 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(das): If `reset_anchor_new_oldest_block_slot` does not get set for some reason,
|
||||||
|
// backfill sync will continue as usual but importing blocks from the previous start,
|
||||||
|
// leaving a CGC gap in the DB. I would like to have stronger assurances that this is
|
||||||
|
// working as expected. The issue is the `blocks_to_import` filtered vec in
|
||||||
|
// `import_historical_block_batch`.
|
||||||
|
if let Some(new_oldest_block_slot) = reset_anchor_new_oldest_block_slot {
|
||||||
|
if let Err(e) = self.chain.reset_anchor_oldest_block(new_oldest_block_slot) {
|
||||||
|
return (
|
||||||
|
0,
|
||||||
|
Err(ChainSegmentFailed {
|
||||||
|
peer_action: None,
|
||||||
|
message: format!("Failed to reset anchor oldest block: {e:?}"),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
match self.chain.import_historical_block_batch(available_blocks) {
|
match self.chain.import_historical_block_batch(available_blocks) {
|
||||||
Ok(imported_blocks) => {
|
Ok(imported_blocks) => {
|
||||||
metrics::inc_counter(
|
metrics::inc_counter(
|
||||||
@@ -690,6 +711,11 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
|||||||
warn!(error = ?e, "Backfill batch processing error");
|
warn!(error = ?e, "Backfill batch processing error");
|
||||||
// This is an internal error, don't penalize the peer.
|
// This is an internal error, don't penalize the peer.
|
||||||
None
|
None
|
||||||
|
}
|
||||||
|
HistoricalBlockError::InternalError(e) => {
|
||||||
|
warn!(error = e, "Backfill batch processing error");
|
||||||
|
// This is an internal error, don't penalize the peer.
|
||||||
|
None
|
||||||
} //
|
} //
|
||||||
// Do not use a fallback match, handle all errors explicitly
|
// Do not use a fallback match, handle all errors explicitly
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
|||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||||
use tracing::{debug, error, info_span, trace, warn, Instrument};
|
use tracing::{debug, error, info_span, trace, warn, Instrument};
|
||||||
use types::{BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, SignedBeaconBlock, Slot};
|
use types::{BlobSidecar, DataColumnSidecar, Epoch, EthSpec, ForkContext, SignedBeaconBlock};
|
||||||
|
|
||||||
/// Handles messages from the network and routes them to the appropriate service to be handled.
|
/// Handles messages from the network and routes them to the appropriate service to be handled.
|
||||||
pub struct Router<T: BeaconChainTypes> {
|
pub struct Router<T: BeaconChainTypes> {
|
||||||
@@ -76,7 +76,7 @@ pub enum RouterMessage<E: EthSpec> {
|
|||||||
/// The peer manager has requested we re-status a peer.
|
/// The peer manager has requested we re-status a peer.
|
||||||
StatusPeer(PeerId),
|
StatusPeer(PeerId),
|
||||||
/// Trigger backfill sync restart
|
/// Trigger backfill sync restart
|
||||||
BackfillSyncRestart(Slot),
|
BackfillSyncRestart(Epoch),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: BeaconChainTypes> Router<T> {
|
impl<T: BeaconChainTypes> Router<T> {
|
||||||
@@ -183,8 +183,8 @@ impl<T: BeaconChainTypes> Router<T> {
|
|||||||
RouterMessage::PubsubMessage(id, peer_id, gossip, should_process) => {
|
RouterMessage::PubsubMessage(id, peer_id, gossip, should_process) => {
|
||||||
self.handle_gossip(id, peer_id, gossip, should_process);
|
self.handle_gossip(id, peer_id, gossip, should_process);
|
||||||
}
|
}
|
||||||
RouterMessage::BackfillSyncRestart(slot) => {
|
RouterMessage::BackfillSyncRestart(epoch) => {
|
||||||
self.send_to_sync(SyncMessage::BackfillSyncRestart(slot));
|
self.send_to_sync(SyncMessage::BackfillSyncRestart(epoch));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1098,7 +1098,9 @@ impl<T: BeaconChainTypes> NetworkService<T> {
|
|||||||
// `finalized_slot`.
|
// `finalized_slot`.
|
||||||
self.network_globals
|
self.network_globals
|
||||||
.prune_cgc_updates_older_than(finalized_slot);
|
.prune_cgc_updates_older_than(finalized_slot);
|
||||||
self.send_to_router(RouterMessage::BackfillSyncRestart(finalized_slot));
|
self.send_to_router(RouterMessage::BackfillSyncRestart(
|
||||||
|
finalized_slot.epoch(T::EthSpec::slots_per_epoch()),
|
||||||
|
));
|
||||||
|
|
||||||
info!(slot = %finalized_slot, "Restarting backfill sync to fetch custody columns");
|
info!(slot = %finalized_slot, "Restarting backfill sync to fetch custody columns");
|
||||||
metrics::inc_counter(&metrics::BACKFILL_RESTARTED_FOR_CGC);
|
metrics::inc_counter(&metrics::BACKFILL_RESTARTED_FOR_CGC);
|
||||||
|
|||||||
@@ -108,6 +108,10 @@ pub struct BackFillSync<T: BeaconChainTypes> {
|
|||||||
/// This only gets refreshed from the beacon chain if we enter a failed state.
|
/// This only gets refreshed from the beacon chain if we enter a failed state.
|
||||||
current_start: BatchId,
|
current_start: BatchId,
|
||||||
|
|
||||||
|
/// If Some it will reset the anchor oldest block pointer to this epoch. Used in PeerDAS to
|
||||||
|
/// restart backfill over a segment of blocks already imported.
|
||||||
|
restart_epoch: Option<Epoch>,
|
||||||
|
|
||||||
/// Starting epoch of the batch that needs to be processed next.
|
/// Starting epoch of the batch that needs to be processed next.
|
||||||
/// This is incremented as the chain advances.
|
/// This is incremented as the chain advances.
|
||||||
processing_target: BatchId,
|
processing_target: BatchId,
|
||||||
@@ -179,6 +183,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
|||||||
active_requests: HashMap::new(),
|
active_requests: HashMap::new(),
|
||||||
processing_target: current_start,
|
processing_target: current_start,
|
||||||
current_start,
|
current_start,
|
||||||
|
restart_epoch: None,
|
||||||
last_batch_downloaded: false,
|
last_batch_downloaded: false,
|
||||||
to_be_downloaded: current_start,
|
to_be_downloaded: current_start,
|
||||||
network_globals,
|
network_globals,
|
||||||
@@ -219,7 +224,13 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
|||||||
pub fn restart(
|
pub fn restart(
|
||||||
&mut self,
|
&mut self,
|
||||||
network: &mut SyncNetworkContext<T>,
|
network: &mut SyncNetworkContext<T>,
|
||||||
|
new_start: Epoch,
|
||||||
) -> Result<SyncStart, BackFillError> {
|
) -> Result<SyncStart, BackFillError> {
|
||||||
|
self.current_start = new_start;
|
||||||
|
self.processing_target = new_start;
|
||||||
|
self.to_be_downloaded = new_start;
|
||||||
|
self.restart_epoch = Some(new_start);
|
||||||
|
|
||||||
match self.state() {
|
match self.state() {
|
||||||
// Reset and start again
|
// Reset and start again
|
||||||
BackFillState::Syncing => {
|
BackFillState::Syncing => {
|
||||||
@@ -593,10 +604,23 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
|||||||
let process_id = ChainSegmentProcessId::BackSyncBatchId(batch_id);
|
let process_id = ChainSegmentProcessId::BackSyncBatchId(batch_id);
|
||||||
self.current_processing_batch = Some(batch_id);
|
self.current_processing_batch = Some(batch_id);
|
||||||
|
|
||||||
if let Err(e) = network
|
// TODO(das): This mechanism can fail silently. But at the same time we don't want to keep
|
||||||
.beacon_processor()
|
// re-writing the anchor everytime. It must happen once.
|
||||||
.send_chain_segment(process_id, blocks)
|
let reset_anchor_new_oldest_block_slot = if let Some(restart_epoch) = self.restart_epoch {
|
||||||
{
|
if restart_epoch == batch_id {
|
||||||
|
Some(restart_epoch.start_slot(T::EthSpec::slots_per_epoch()))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Err(e) = network.beacon_processor().send_chain_segment(
|
||||||
|
process_id,
|
||||||
|
blocks,
|
||||||
|
reset_anchor_new_oldest_block_slot,
|
||||||
|
) {
|
||||||
crit!(
|
crit!(
|
||||||
msg = "process_batch",
|
msg = "process_batch",
|
||||||
error = %e,
|
error = %e,
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ use std::time::Duration;
|
|||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use tracing::{debug, error, info, info_span, trace, warn, Instrument};
|
use tracing::{debug, error, info, info_span, trace, warn, Instrument};
|
||||||
use types::{
|
use types::{
|
||||||
BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, Hash256, SignedBeaconBlock, Slot,
|
BlobSidecar, DataColumnSidecar, Epoch, EthSpec, ForkContext, Hash256, SignedBeaconBlock, Slot,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -178,8 +178,9 @@ pub enum SyncMessage<E: EthSpec> {
|
|||||||
/// A block from gossip has completed processing,
|
/// A block from gossip has completed processing,
|
||||||
GossipBlockProcessResult { block_root: Hash256, imported: bool },
|
GossipBlockProcessResult { block_root: Hash256, imported: bool },
|
||||||
|
|
||||||
/// Network service asks backfill sync to restart after increasing the oldest_block_slot
|
/// Network service asks backfill sync to restart after increasing the oldest_block_slot. Must
|
||||||
BackfillSyncRestart(Slot),
|
/// start fetching batches from `epoch`.
|
||||||
|
BackfillSyncRestart(Epoch),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The type of processing specified for a received block.
|
/// The type of processing specified for a received block.
|
||||||
@@ -899,11 +900,11 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
self.on_sampling_result(requester, result)
|
self.on_sampling_result(requester, result)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SyncMessage::BackfillSyncRestart(slot) => {
|
SyncMessage::BackfillSyncRestart(start_epoch) => {
|
||||||
if let Err(e) = self.backfill_sync.restart(&mut self.network) {
|
if let Err(e) = self.backfill_sync.restart(&mut self.network, start_epoch) {
|
||||||
error!(error = ?e, "Error on backfill sync restart");
|
error!(error = ?e, "Error on backfill sync restart");
|
||||||
} else {
|
} else {
|
||||||
debug!(%slot, "Received backfill sync restart event");
|
debug!(%start_epoch, "Received backfill sync restart event");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -333,7 +333,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
let process_id = ChainSegmentProcessId::RangeBatchId(self.id, batch_id);
|
let process_id = ChainSegmentProcessId::RangeBatchId(self.id, batch_id);
|
||||||
self.current_processing_batch = Some(batch_id);
|
self.current_processing_batch = Some(batch_id);
|
||||||
|
|
||||||
if let Err(e) = beacon_processor.send_chain_segment(process_id, blocks) {
|
if let Err(e) = beacon_processor.send_chain_segment(process_id, blocks, None) {
|
||||||
crit!(msg = "process_batch",error = %e, batch = ?self.processing_target, "Failed to send chain segment to processor.");
|
crit!(msg = "process_batch",error = %e, batch = ?self.processing_target, "Failed to send chain segment to processor.");
|
||||||
// This is unlikely to happen but it would stall syncing since the batch now has no
|
// This is unlikely to happen but it would stall syncing since the batch now has no
|
||||||
// blocks to continue, and the chain is expecting a processing result that won't
|
// blocks to continue, and the chain is expecting a processing result that won't
|
||||||
|
|||||||
@@ -177,6 +177,20 @@ impl AnchorInfo {
|
|||||||
pub fn full_state_pruning_enabled(&self) -> bool {
|
pub fn full_state_pruning_enabled(&self) -> bool {
|
||||||
self.state_lower_limit == 0 && self.state_upper_limit == STATE_UPPER_LIMIT_NO_RETAIN
|
self.state_lower_limit == 0 && self.state_upper_limit == STATE_UPPER_LIMIT_NO_RETAIN
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn as_increased_oldest_block(
|
||||||
|
&self,
|
||||||
|
oldest_block_slot: Slot,
|
||||||
|
oldest_block_parent: Hash256,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
anchor_slot: self.anchor_slot,
|
||||||
|
oldest_block_slot,
|
||||||
|
oldest_block_parent,
|
||||||
|
state_upper_limit: self.state_upper_limit,
|
||||||
|
state_lower_limit: self.state_lower_limit,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StoreItem for AnchorInfo {
|
impl StoreItem for AnchorInfo {
|
||||||
|
|||||||
Reference in New Issue
Block a user