Ensure custody backfill sync couples all responses before importing (#8339)

Custody backfill sync has a bug when we request columns from more than one peer per batch. The fix here ensures we wait for all requests to be completed before performing verification and importing the responses.

I've also added an endpoint `lighthouse/custody/backfill` that resets a nodes earliest available data column to the current epoch so that custody backfill can be triggered. This endpoint is needed to rescue any nodes that may have missing columns due to the custody backfill sync bug without requiring a full re-sync.


  


Co-Authored-By: Eitan Seri- Levi <eserilev@gmail.com>

Co-Authored-By: Eitan Seri-Levi <eserilev@ucsc.edu>

Co-Authored-By: Jimmy Chen <jchen.tc@gmail.com>

Co-Authored-By: Michael Sproul <michaelsproul@users.noreply.github.com>
This commit is contained in:
Eitan Seri-Levi
2025-11-03 00:06:06 -08:00
committed by GitHub
parent 4908687e7d
commit 5d0f8a083a
11 changed files with 230 additions and 24 deletions

View File

@@ -382,11 +382,9 @@ impl<T: BeaconChainTypes> CustodyBackFillSync<T> {
return None;
};
let mut missing_columns = HashSet::new();
// Skip all batches (Epochs) that don't have missing columns.
for epoch in Epoch::range_inclusive_rev(self.to_be_downloaded, column_da_boundary) {
missing_columns = self.beacon_chain.get_missing_columns_for_epoch(epoch);
let missing_columns = self.beacon_chain.get_missing_columns_for_epoch(epoch);
if !missing_columns.is_empty() {
self.to_be_downloaded = epoch;
@@ -445,6 +443,7 @@ impl<T: BeaconChainTypes> CustodyBackFillSync<T> {
self.include_next_batch()
}
Entry::Vacant(entry) => {
let missing_columns = self.beacon_chain.get_missing_columns_for_epoch(batch_id);
entry.insert(BatchInfo::new(
&batch_id,
CUSTODY_BACKFILL_EPOCHS_PER_BATCH,

View File

@@ -70,16 +70,17 @@ impl<T: BeaconChainTypes> RangeDataColumnBatchRequest<T> {
HashMap::new();
let mut column_to_peer_id: HashMap<u64, PeerId> = HashMap::new();
for column in self
.requests
.values()
.filter_map(|req| req.to_finished())
.flatten()
{
received_columns_for_slot
.entry(column.slot())
.or_default()
.push(column.clone());
for req in self.requests.values() {
let Some(columns) = req.to_finished() else {
return None;
};
for column in columns {
received_columns_for_slot
.entry(column.slot())
.or_default()
.push(column.clone());
}
}
// Note: this assumes that only 1 peer is responsible for a column