Remove all batches related to a peer on disconnect (#5969)

* Remove all batches related to a peer on disconnect

* Cleanup map entries after disconnect

* Allow lookups to continue in case of disconnections

* Pretty response types

* fmt

* Fix lints

* Remove lookup if it cannot progress

* Fix tests

* Remove poll_close on rpc behaviour

* Remove redundant test

* Fix issue raised by lion

* Revert pretty response types

* Cleanup

* Fix test

* Merge remote-tracking branch 'origin/release-v5.2.1' into rpc-error-on-disconnect-revert

* Apply suggestions from joao

Co-authored-by: João Oliveira <hello@jxs.pt>

* Fix log

* update request status on no peers found

* Do not remove lookup after peer disconnection

* Add comments about expected event api

* Update single_block_lookup.rs

* Update mod.rs

* Merge branch 'rpc-error-on-disconnect-revert' into 5969-review

* Merge pull request #10 from dapplion/5969-review

Add comments about expected event api
This commit is contained in:
Pawan Dhananjay
2024-06-26 16:53:53 -07:00
committed by GitHub
parent 758b58c9e9
commit bf4cbd3b0a
12 changed files with 270 additions and 182 deletions

View File

@@ -307,7 +307,11 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
/// A peer has disconnected.
/// If the peer has active batches, those are considered failed and re-requested.
#[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"]
pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Result<(), BackFillError> {
pub fn peer_disconnected(
&mut self,
peer_id: &PeerId,
network: &mut SyncNetworkContext<T>,
) -> Result<(), BackFillError> {
if matches!(
self.state(),
BackFillState::Failed | BackFillState::NotRequired
@@ -315,7 +319,37 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
return Ok(());
}
self.active_requests.remove(peer_id);
if let Some(batch_ids) = self.active_requests.remove(peer_id) {
// fail the batches.
for id in batch_ids {
if let Some(batch) = self.batches.get_mut(&id) {
match batch.download_failed(false) {
Ok(BatchOperationOutcome::Failed { blacklist: _ }) => {
self.fail_sync(BackFillError::BatchDownloadFailed(id))?;
}
Ok(BatchOperationOutcome::Continue) => {}
Err(e) => {
self.fail_sync(BackFillError::BatchInvalidState(id, e.0))?;
}
}
// If we have run out of peers in which to retry this batch, the backfill state
// transitions to a paused state.
// We still need to reset the state for all the affected batches, so we should not
// short circuit early.
if self.retry_batch_download(network, id).is_err() {
debug!(
self.log,
"Batch could not be retried";
"batch_id" => id,
"error" => "no synced peers"
);
}
} else {
debug!(self.log, "Batch not found while removing peer";
"peer" => %peer_id, "batch" => id)
}
}
}
// Remove the peer from the participation list
self.participating_peers.remove(peer_id);