mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-03 00:31:50 +00:00
Sync peer attribution (#7733)
Which issue # does this PR address? Closes #7604 Improvements to range sync including: 1. Contain column requests only to peers that are part of the SyncingChain 2. Attribute the fault to the correct peer and downscore them if they don't return the data columns for the request 3. Improve sync performance by retrying only the failed columns from other peers instead of failing the entire batch 4. Uses the earliest_available_slot to make requests to peers that claim to have the epoch. Note: if no earliest_available_slot info is available, fallback to using previous logic i.e. assume peer has everything backfilled upto WS checkpoint/da boundary Tested this on fusaka-devnet-2 with a full node and supernode and the recovering logic seems to works well. Also tested this a little on mainnet. Need to do more testing and possibly add some unit tests.
This commit is contained in:
@@ -248,6 +248,34 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
.map(|(peer_id, _)| peer_id)
|
||||
}
|
||||
|
||||
/// Returns all the synced peers from the list of allowed peers that claim to have the block
|
||||
/// components for the given epoch based on `status.earliest_available_slot`.
|
||||
///
|
||||
/// If `earliest_available_slot` info is not available, then return peer anyway assuming it has the
|
||||
/// required data.
|
||||
pub fn synced_peers_for_epoch<'a>(
|
||||
&'a self,
|
||||
epoch: Epoch,
|
||||
allowed_peers: &'a HashSet<PeerId>,
|
||||
) -> impl Iterator<Item = &'a PeerId> {
|
||||
self.peers
|
||||
.iter()
|
||||
.filter(move |(peer_id, info)| {
|
||||
allowed_peers.contains(peer_id)
|
||||
&& info.is_connected()
|
||||
&& match info.sync_status() {
|
||||
SyncStatus::Synced { info } => {
|
||||
info.has_slot(epoch.end_slot(E::slots_per_epoch()))
|
||||
}
|
||||
SyncStatus::Advanced { info } => {
|
||||
info.has_slot(epoch.end_slot(E::slots_per_epoch()))
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
})
|
||||
.map(|(peer_id, _)| peer_id)
|
||||
}
|
||||
|
||||
/// Gives the `peer_id` of all known connected and advanced peers.
|
||||
pub fn advanced_peers(&self) -> impl Iterator<Item = &PeerId> {
|
||||
self.peers
|
||||
@@ -291,6 +319,23 @@ impl<E: EthSpec> PeerDB<E> {
|
||||
.map(|(peer_id, _)| peer_id)
|
||||
}
|
||||
|
||||
/// Returns an iterator of all peers that are supposed to be custodying
|
||||
/// the given subnet id that also belong to `allowed_peers`.
|
||||
pub fn good_range_sync_custody_subnet_peer<'a>(
|
||||
&'a self,
|
||||
subnet: DataColumnSubnetId,
|
||||
allowed_peers: &'a HashSet<PeerId>,
|
||||
) -> impl Iterator<Item = &'a PeerId> {
|
||||
self.peers
|
||||
.iter()
|
||||
.filter(move |(peer_id, info)| {
|
||||
// The custody_subnets hashset can be populated via enr or metadata
|
||||
let is_custody_subnet_peer = info.is_assigned_to_custody_subnet(&subnet);
|
||||
allowed_peers.contains(peer_id) && info.is_connected() && is_custody_subnet_peer
|
||||
})
|
||||
.map(|(peer_id, _)| peer_id)
|
||||
}
|
||||
|
||||
/// Gives the ids of all known disconnected peers.
|
||||
pub fn disconnected_peers(&self) -> impl Iterator<Item = &PeerId> {
|
||||
self.peers
|
||||
|
||||
@@ -28,6 +28,19 @@ pub struct SyncInfo {
|
||||
pub earliest_available_slot: Option<Slot>,
|
||||
}
|
||||
|
||||
impl SyncInfo {
|
||||
/// Returns true if the provided slot is greater than or equal to the peer's `earliest_available_slot`.
|
||||
///
|
||||
/// If `earliest_available_slot` is None, then we just assume that the peer has the slot.
|
||||
pub fn has_slot(&self, slot: Slot) -> bool {
|
||||
if let Some(earliest_available_slot) = self.earliest_available_slot {
|
||||
slot >= earliest_available_slot
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::cmp::PartialEq for SyncStatus {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
matches!(
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::rpc::methods::{ResponseTermination, RpcResponse, RpcSuccessResponse, StatusMessage};
|
||||
use libp2p::PeerId;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::sync::Arc;
|
||||
use types::{
|
||||
@@ -61,6 +62,11 @@ pub struct DataColumnsByRangeRequestId {
|
||||
pub id: Id,
|
||||
/// The Id of the overall By Range request for block components.
|
||||
pub parent_request_id: ComponentsByRangeRequestId,
|
||||
/// The peer id associated with the request.
|
||||
///
|
||||
/// This is useful to penalize the peer at a later point if it returned data columns that
|
||||
/// did not match with the verified block.
|
||||
pub peer: PeerId,
|
||||
}
|
||||
|
||||
/// Block components by range request for range sync. Includes an ID for downstream consumers to
|
||||
@@ -306,6 +312,7 @@ mod tests {
|
||||
batch_id: Epoch::new(0),
|
||||
},
|
||||
},
|
||||
peer: PeerId::random(),
|
||||
};
|
||||
assert_eq!(format!("{id}"), "123/122/RangeSync/0/54");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user