mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-14 18:32:42 +00:00
Improve get_custody_columns validation, caching and error handling (#6308)
* Improve `get_custody_columns` validation, caching and error handling. * Merge branch 'unstable' into get-custody-columns-error-handing * Fix failing test and add more test. * Fix failing test and add more test. * Merge branch 'unstable' into get-custody-columns-error-handing # Conflicts: # beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs # beacon_node/lighthouse_network/src/peer_manager/peerdb.rs # beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs # beacon_node/lighthouse_network/src/types/globals.rs # beacon_node/network/src/service.rs # consensus/types/src/data_column_subnet_id.rs * Add unit test to make sure the default specs won't panic on the `compute_custody_requirement_subnets` function. * Add condition when calling `compute_custody_subnets_from_metadata` and update logs. * Validate `csc` when returning from enr. Remove `csc` computation on connection since we get them on metadata anyway. * Add `peers_per_custody_subnet_count` to track peer csc and supernodes. * Disconnect peers with invalid metadata and find other peers instead. * Fix sampling tests. * Merge branch 'unstable' into get-custody-columns-error-handing * Merge branch 'unstable' into get-custody-columns-error-handing
This commit is contained in:
@@ -19,7 +19,7 @@ use std::{
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use types::{EthSpec, SyncSubnetId};
|
||||
use types::{DataColumnSubnetId, EthSpec, SyncSubnetId};
|
||||
|
||||
pub use libp2p::core::Multiaddr;
|
||||
pub use libp2p::identity::Keypair;
|
||||
@@ -33,7 +33,7 @@ pub use peerdb::peer_info::{
|
||||
};
|
||||
use peerdb::score::{PeerAction, ReportSource};
|
||||
pub use peerdb::sync_status::{SyncInfo, SyncStatus};
|
||||
use std::collections::{hash_map::Entry, HashMap};
|
||||
use std::collections::{hash_map::Entry, HashMap, HashSet};
|
||||
use std::net::IpAddr;
|
||||
use strum::IntoEnumIterator;
|
||||
|
||||
@@ -701,6 +701,8 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
|
||||
/// Received a metadata response from a peer.
|
||||
pub fn meta_data_response(&mut self, peer_id: &PeerId, meta_data: MetaData<E>) {
|
||||
let mut invalid_meta_data = false;
|
||||
|
||||
if let Some(peer_info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
|
||||
if let Some(known_meta_data) = &peer_info.meta_data() {
|
||||
if *known_meta_data.seq_number() < *meta_data.seq_number() {
|
||||
@@ -717,12 +719,39 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
debug!(self.log, "Obtained peer's metadata";
|
||||
"peer_id" => %peer_id, "new_seq_no" => meta_data.seq_number());
|
||||
}
|
||||
let node_id_opt = peer_id_to_node_id(peer_id).ok();
|
||||
peer_info.set_meta_data(meta_data, node_id_opt, &self.network_globals.spec);
|
||||
|
||||
let custody_subnet_count_opt = meta_data.custody_subnet_count().copied().ok();
|
||||
peer_info.set_meta_data(meta_data);
|
||||
|
||||
if self.network_globals.spec.is_peer_das_scheduled() {
|
||||
// Gracefully ignore metadata/v2 peers. Potentially downscore after PeerDAS to
|
||||
// prioritize PeerDAS peers.
|
||||
if let Some(custody_subnet_count) = custody_subnet_count_opt {
|
||||
match self.compute_peer_custody_subnets(peer_id, custody_subnet_count) {
|
||||
Ok(custody_subnets) => {
|
||||
peer_info.set_custody_subnets(custody_subnets);
|
||||
}
|
||||
Err(err) => {
|
||||
debug!(self.log, "Unable to compute peer custody subnets from metadata";
|
||||
"info" => "Sending goodbye to peer",
|
||||
"peer_id" => %peer_id,
|
||||
"custody_subnet_count" => custody_subnet_count,
|
||||
"error" => ?err,
|
||||
);
|
||||
invalid_meta_data = true;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!(self.log, "Received METADATA from an unknown peer";
|
||||
"peer_id" => %peer_id);
|
||||
}
|
||||
|
||||
// Disconnect peers with invalid metadata and find other peers instead.
|
||||
if invalid_meta_data {
|
||||
self.goodbye_peer(peer_id, GoodbyeReason::Fault, ReportSource::PeerManager)
|
||||
}
|
||||
}
|
||||
|
||||
/// Updates the gossipsub scores for all known peers in gossipsub.
|
||||
@@ -1290,6 +1319,7 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
let mut peers_connected = 0;
|
||||
let mut clients_per_peer = HashMap::new();
|
||||
let mut peers_connected_mutli: HashMap<(&str, &str), i32> = HashMap::new();
|
||||
let mut peers_per_custody_subnet_count: HashMap<u64, i64> = HashMap::new();
|
||||
|
||||
for (_, peer_info) in self.network_globals.peers.read().connected_peers() {
|
||||
peers_connected += 1;
|
||||
@@ -1320,11 +1350,26 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
*peers_connected_mutli
|
||||
.entry((direction, transport))
|
||||
.or_default() += 1;
|
||||
|
||||
if let Some(MetaData::V3(meta_data)) = peer_info.meta_data() {
|
||||
*peers_per_custody_subnet_count
|
||||
.entry(meta_data.custody_subnet_count)
|
||||
.or_default() += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// PEERS_CONNECTED
|
||||
metrics::set_gauge(&metrics::PEERS_CONNECTED, peers_connected);
|
||||
|
||||
// CUSTODY_SUBNET_COUNT
|
||||
for (custody_subnet_count, peer_count) in peers_per_custody_subnet_count.into_iter() {
|
||||
metrics::set_gauge_vec(
|
||||
&metrics::PEERS_PER_CUSTODY_SUBNET_COUNT,
|
||||
&[&custody_subnet_count.to_string()],
|
||||
peer_count,
|
||||
)
|
||||
}
|
||||
|
||||
// PEERS_PER_CLIENT
|
||||
for client_kind in ClientKind::iter() {
|
||||
let value = clients_per_peer.get(&client_kind.to_string()).unwrap_or(&0);
|
||||
@@ -1348,6 +1393,45 @@ impl<E: EthSpec> PeerManager<E> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_peer_custody_subnets(
|
||||
&self,
|
||||
peer_id: &PeerId,
|
||||
custody_subnet_count: u64,
|
||||
) -> Result<HashSet<DataColumnSubnetId>, String> {
|
||||
// If we don't have a node id, we cannot compute the custody duties anyway
|
||||
let node_id = peer_id_to_node_id(peer_id)?;
|
||||
let spec = &self.network_globals.spec;
|
||||
|
||||
if !(spec.custody_requirement..=spec.data_column_sidecar_subnet_count)
|
||||
.contains(&custody_subnet_count)
|
||||
{
|
||||
return Err("Invalid custody subnet count in metadata: out of range".to_string());
|
||||
}
|
||||
|
||||
let custody_subnets = DataColumnSubnetId::compute_custody_subnets::<E>(
|
||||
node_id.raw(),
|
||||
custody_subnet_count,
|
||||
spec,
|
||||
)
|
||||
.map(|subnets| subnets.collect())
|
||||
.unwrap_or_else(|e| {
|
||||
// This is an unreachable scenario unless there's a bug, as we've validated the csc
|
||||
// just above.
|
||||
error!(
|
||||
self.log,
|
||||
"Computing peer custody subnets failed unexpectedly";
|
||||
"info" => "Falling back to default custody requirement subnets",
|
||||
"peer_id" => %peer_id,
|
||||
"custody_subnet_count" => custody_subnet_count,
|
||||
"error" => ?e
|
||||
);
|
||||
DataColumnSubnetId::compute_custody_requirement_subnets::<E>(node_id.raw(), spec)
|
||||
.collect()
|
||||
});
|
||||
|
||||
Ok(custody_subnets)
|
||||
}
|
||||
}
|
||||
|
||||
enum ConnectingType {
|
||||
@@ -1680,11 +1764,7 @@ mod tests {
|
||||
.write()
|
||||
.peer_info_mut(&peer0)
|
||||
.unwrap()
|
||||
.set_meta_data(
|
||||
MetaData::V2(metadata),
|
||||
None,
|
||||
&peer_manager.network_globals.spec,
|
||||
);
|
||||
.set_meta_data(MetaData::V2(metadata));
|
||||
peer_manager
|
||||
.network_globals
|
||||
.peers
|
||||
@@ -1704,11 +1784,7 @@ mod tests {
|
||||
.write()
|
||||
.peer_info_mut(&peer2)
|
||||
.unwrap()
|
||||
.set_meta_data(
|
||||
MetaData::V2(metadata),
|
||||
None,
|
||||
&peer_manager.network_globals.spec,
|
||||
);
|
||||
.set_meta_data(MetaData::V2(metadata));
|
||||
peer_manager
|
||||
.network_globals
|
||||
.peers
|
||||
@@ -1728,11 +1804,7 @@ mod tests {
|
||||
.write()
|
||||
.peer_info_mut(&peer4)
|
||||
.unwrap()
|
||||
.set_meta_data(
|
||||
MetaData::V2(metadata),
|
||||
None,
|
||||
&peer_manager.network_globals.spec,
|
||||
);
|
||||
.set_meta_data(MetaData::V2(metadata));
|
||||
peer_manager
|
||||
.network_globals
|
||||
.peers
|
||||
@@ -1806,11 +1878,7 @@ mod tests {
|
||||
.write()
|
||||
.peer_info_mut(&peer)
|
||||
.unwrap()
|
||||
.set_meta_data(
|
||||
MetaData::V2(metadata),
|
||||
None,
|
||||
&peer_manager.network_globals.spec,
|
||||
);
|
||||
.set_meta_data(MetaData::V2(metadata));
|
||||
peer_manager
|
||||
.network_globals
|
||||
.peers
|
||||
@@ -1934,11 +2002,7 @@ mod tests {
|
||||
.write()
|
||||
.peer_info_mut(&peer)
|
||||
.unwrap()
|
||||
.set_meta_data(
|
||||
MetaData::V2(metadata),
|
||||
None,
|
||||
&peer_manager.network_globals.spec,
|
||||
);
|
||||
.set_meta_data(MetaData::V2(metadata));
|
||||
let long_lived_subnets = peer_manager
|
||||
.network_globals
|
||||
.peers
|
||||
@@ -2047,11 +2111,7 @@ mod tests {
|
||||
.write()
|
||||
.peer_info_mut(&peer)
|
||||
.unwrap()
|
||||
.set_meta_data(
|
||||
MetaData::V2(metadata),
|
||||
None,
|
||||
&peer_manager.network_globals.spec,
|
||||
);
|
||||
.set_meta_data(MetaData::V2(metadata));
|
||||
let long_lived_subnets = peer_manager
|
||||
.network_globals
|
||||
.peers
|
||||
@@ -2217,11 +2277,7 @@ mod tests {
|
||||
.write()
|
||||
.peer_info_mut(&peer)
|
||||
.unwrap()
|
||||
.set_meta_data(
|
||||
MetaData::V2(metadata),
|
||||
None,
|
||||
&peer_manager.network_globals.spec,
|
||||
);
|
||||
.set_meta_data(MetaData::V2(metadata));
|
||||
let long_lived_subnets = peer_manager
|
||||
.network_globals
|
||||
.peers
|
||||
@@ -2378,11 +2434,7 @@ mod tests {
|
||||
|
||||
let mut peer_db = peer_manager.network_globals.peers.write();
|
||||
let peer_info = peer_db.peer_info_mut(&condition.peer_id).unwrap();
|
||||
peer_info.set_meta_data(
|
||||
MetaData::V2(metadata),
|
||||
None,
|
||||
&peer_manager.network_globals.spec,
|
||||
);
|
||||
peer_info.set_meta_data(MetaData::V2(metadata));
|
||||
peer_info.set_gossipsub_score(condition.gossipsub_score);
|
||||
peer_info.add_to_score(condition.score);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user