mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-23 14:54:45 +00:00
Sync corrections (#1034)
* Correct status re-request logic improve logging * Prevent multiple dials of the same peer * Discovery to obey max peers when connecting to new peers
This commit is contained in:
@@ -454,11 +454,17 @@ where
|
||||
for peer_id in closer_peers {
|
||||
// if we need more peers, attempt a connection
|
||||
|
||||
if self.network_globals.connected_peers() < self.max_peers
|
||||
&& !self.network_globals.peers.read().is_connected(&peer_id)
|
||||
if self.network_globals.connected_or_dialing_peers()
|
||||
< self.max_peers
|
||||
&& !self
|
||||
.network_globals
|
||||
.peers
|
||||
.read()
|
||||
.is_connected_or_dialing(&peer_id)
|
||||
&& !self.banned_peers.contains(&peer_id)
|
||||
{
|
||||
debug!(self.log, "Peer discovered"; "peer_id"=> format!("{:?}", peer_id));
|
||||
debug!(self.log, "Connecting to discovered peer"; "peer_id"=> format!("{:?}", peer_id));
|
||||
self.network_globals.peers.write().dialing_peer(&peer_id);
|
||||
self.events
|
||||
.push_back(NetworkBehaviourAction::DialPeer { peer_id });
|
||||
}
|
||||
|
||||
@@ -89,6 +89,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
|
||||
/// A ping request has been received.
|
||||
// NOTE: The behaviour responds with a PONG automatically
|
||||
// TODO: Update last seen
|
||||
pub fn ping_request(&mut self, peer_id: &PeerId, seq: u64) {
|
||||
if let Some(peer_info) = self.network_globals.peers.read().peer_info(peer_id) {
|
||||
// received a ping
|
||||
@@ -114,6 +115,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
}
|
||||
|
||||
/// A PONG has been returned from a peer.
|
||||
// TODO: Update last seen
|
||||
pub fn pong_response(&mut self, peer_id: &PeerId, seq: u64) {
|
||||
if let Some(peer_info) = self.network_globals.peers.read().peer_info(peer_id) {
|
||||
// received a pong
|
||||
@@ -137,6 +139,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
}
|
||||
|
||||
/// Received a metadata response from a peer.
|
||||
// TODO: Update last seen
|
||||
pub fn meta_data_response(&mut self, peer_id: &PeerId, meta_data: MetaData<TSpec>) {
|
||||
if let Some(peer_info) = self.network_globals.peers.write().peer_info_mut(peer_id) {
|
||||
if let Some(known_meta_data) = &peer_info.meta_data {
|
||||
@@ -165,12 +168,15 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
fn gets_banned(&mut self, peer_id: &PeerId) -> bool {
|
||||
// if the peer was already banned don't inform again
|
||||
let mut peerdb = self.network_globals.peers.write();
|
||||
if peerdb.reputation(peer_id) < MINIMUM_REPUTATION_BEFORE_BAN
|
||||
&& !peerdb.connection_status(peer_id).is_banned()
|
||||
{
|
||||
peerdb.ban(peer_id);
|
||||
self.events.push(PeerManagerEvent::BanPeer(peer_id.clone()));
|
||||
return true;
|
||||
|
||||
if let Some(connection_status) = peerdb.connection_status(peer_id) {
|
||||
if peerdb.reputation(peer_id) < MINIMUM_REPUTATION_BEFORE_BAN
|
||||
&& !connection_status.is_banned()
|
||||
{
|
||||
peerdb.ban(peer_id);
|
||||
self.events.push(PeerManagerEvent::BanPeer(peer_id.clone()));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
@@ -269,7 +275,8 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
|
||||
{
|
||||
let mut peerdb = self.network_globals.peers.write();
|
||||
if peerdb.connection_status(peer_id).is_banned() {
|
||||
if peerdb.connection_status(peer_id).map(|c| c.is_banned()) == Some(true) {
|
||||
// don't connect if the peer is banned
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -293,6 +300,11 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Notifies the peer manager that this peer is being dialed.
|
||||
pub fn _dialing_peer(&mut self, peer_id: &PeerId) {
|
||||
self.network_globals.peers.write().dialing_peer(peer_id);
|
||||
}
|
||||
}
|
||||
|
||||
impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
|
||||
@@ -304,12 +316,14 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
|
||||
while let Async::Ready(Some(peer_id)) = self.ping_peers.poll().map_err(|e| {
|
||||
error!(self.log, "Failed to check for peers to ping"; "error" => format!("{}",e));
|
||||
})? {
|
||||
debug!(self.log, "Pinging peer"; "peer_id" => format!("{}", peer_id));
|
||||
self.events.push(PeerManagerEvent::Ping(peer_id));
|
||||
}
|
||||
|
||||
while let Async::Ready(Some(peer_id)) = self.ping_peers.poll().map_err(|e| {
|
||||
while let Async::Ready(Some(peer_id)) = self.status_peers.poll().map_err(|e| {
|
||||
error!(self.log, "Failed to check for peers to status"; "error" => format!("{}",e));
|
||||
})? {
|
||||
debug!(self.log, "Sending Status to peer"; "peer_id" => format!("{}", peer_id));
|
||||
self.events.push(PeerManagerEvent::Status(peer_id));
|
||||
}
|
||||
|
||||
|
||||
@@ -60,11 +60,12 @@ impl<T: EthSpec> PeerInfo<T> {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
/// The current health status of the peer.
|
||||
pub enum PeerStatus {
|
||||
/// The peer is healthy
|
||||
/// The peer is healthy.
|
||||
Healthy,
|
||||
/// The peer is clogged. It has not been responding to requests on time
|
||||
Clogged,
|
||||
/// The peer is clogged. It has not been responding to requests on time.
|
||||
_Clogged,
|
||||
}
|
||||
|
||||
impl Default for PeerStatus {
|
||||
@@ -73,25 +74,29 @@ impl Default for PeerStatus {
|
||||
}
|
||||
}
|
||||
|
||||
/// Connection Status of the peer
|
||||
#[derive(Clone, Debug)]
|
||||
/// Connection Status of the peer.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum PeerConnectionStatus {
|
||||
/// The peer is connected.
|
||||
Connected {
|
||||
/// number of ingoing connections
|
||||
/// number of ingoing connections.
|
||||
n_in: u8,
|
||||
/// number of outgoing connections
|
||||
/// number of outgoing connections.
|
||||
n_out: u8,
|
||||
},
|
||||
/// The peer has disconnected.
|
||||
Disconnected {
|
||||
/// last time the peer was connected or discovered
|
||||
/// last time the peer was connected or discovered.
|
||||
since: Instant,
|
||||
},
|
||||
/// The peer has been banned and is disconnected.
|
||||
Banned {
|
||||
/// moment when the peer was banned
|
||||
/// moment when the peer was banned.
|
||||
since: Instant,
|
||||
},
|
||||
Unknown {
|
||||
/// time since we last saw this peer
|
||||
/// We are currently dialing this peer.
|
||||
Dialing {
|
||||
/// time since we last communicated with the peer.
|
||||
since: Instant,
|
||||
},
|
||||
}
|
||||
@@ -116,8 +121,8 @@ impl Serialize for PeerConnectionStatus {
|
||||
s.serialize_field("since", &since.elapsed().as_secs())?;
|
||||
s.end()
|
||||
}
|
||||
Unknown { since } => {
|
||||
let mut s = serializer.serialize_struct_variant("", 3, "Unknown", 1)?;
|
||||
Dialing { since } => {
|
||||
let mut s = serializer.serialize_struct_variant("", 3, "Dialing", 1)?;
|
||||
s.serialize_field("since", &since.elapsed().as_secs())?;
|
||||
s.end()
|
||||
}
|
||||
@@ -126,6 +131,7 @@ impl Serialize for PeerConnectionStatus {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
/// The current sync status of the peer.
|
||||
pub enum PeerSyncStatus {
|
||||
/// At the current state as our node or ahead of us.
|
||||
Synced {
|
||||
@@ -143,7 +149,7 @@ pub enum PeerSyncStatus {
|
||||
|
||||
impl Default for PeerConnectionStatus {
|
||||
fn default() -> Self {
|
||||
PeerConnectionStatus::Unknown {
|
||||
PeerConnectionStatus::Dialing {
|
||||
since: Instant::now(),
|
||||
}
|
||||
}
|
||||
@@ -158,6 +164,14 @@ impl PeerConnectionStatus {
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the status is connected
|
||||
pub fn is_dialing(&self) -> bool {
|
||||
match self {
|
||||
PeerConnectionStatus::Dialing { .. } => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the status is banned
|
||||
pub fn is_banned(&self) -> bool {
|
||||
match self {
|
||||
@@ -179,7 +193,7 @@ impl PeerConnectionStatus {
|
||||
pub fn connect_ingoing(&mut self) {
|
||||
match self {
|
||||
Connected { n_in, .. } => *n_in += 1,
|
||||
Disconnected { .. } | Banned { .. } | Unknown { .. } => {
|
||||
Disconnected { .. } | Banned { .. } | Dialing { .. } => {
|
||||
*self = Connected { n_in: 1, n_out: 0 }
|
||||
}
|
||||
}
|
||||
@@ -190,7 +204,7 @@ impl PeerConnectionStatus {
|
||||
pub fn connect_outgoing(&mut self) {
|
||||
match self {
|
||||
Connected { n_out, .. } => *n_out += 1,
|
||||
Disconnected { .. } | Banned { .. } | Unknown { .. } => {
|
||||
Disconnected { .. } | Banned { .. } | Dialing { .. } => {
|
||||
*self = Connected { n_in: 0, n_out: 1 }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use crate::rpc::methods::MetaData;
|
||||
use crate::PeerId;
|
||||
use slog::{crit, warn};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
use types::{EthSpec, SubnetId};
|
||||
|
||||
/// A peer's reputation.
|
||||
@@ -85,6 +86,16 @@ impl<TSpec: EthSpec> PeerDB<TSpec> {
|
||||
.map(|(peer_id, _)| peer_id)
|
||||
}
|
||||
|
||||
/// Connected or dialing peers
|
||||
pub fn connected_or_dialing_peers(&self) -> impl Iterator<Item = &PeerId> {
|
||||
self.peers
|
||||
.iter()
|
||||
.filter(|(_, info)| {
|
||||
info.connection_status.is_connected() || info.connection_status.is_dialing()
|
||||
})
|
||||
.map(|(peer_id, _)| peer_id)
|
||||
}
|
||||
|
||||
/// Gives the `peer_id` of all known connected and synced peers.
|
||||
pub fn synced_peers(&self) -> impl Iterator<Item = &PeerId> {
|
||||
self.peers
|
||||
@@ -153,25 +164,47 @@ impl<TSpec: EthSpec> PeerDB<TSpec> {
|
||||
}
|
||||
|
||||
/// Returns the peer's connection status. Returns unknown if the peer is not in the DB.
|
||||
pub fn connection_status(&self, peer_id: &PeerId) -> PeerConnectionStatus {
|
||||
pub fn connection_status(&self, peer_id: &PeerId) -> Option<PeerConnectionStatus> {
|
||||
self.peer_info(peer_id)
|
||||
.map_or(PeerConnectionStatus::default(), |info| {
|
||||
info.connection_status.clone()
|
||||
})
|
||||
.map(|info| info.connection_status.clone())
|
||||
}
|
||||
|
||||
/// Returns if the peer is already connected.
|
||||
pub fn is_connected(&self, peer_id: &PeerId) -> bool {
|
||||
if let PeerConnectionStatus::Connected { .. } = self.connection_status(peer_id) {
|
||||
if let Some(PeerConnectionStatus::Connected { .. }) = self.connection_status(peer_id) {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// If we are connected or currently dialing the peer returns true.
|
||||
pub fn is_connected_or_dialing(&self, peer_id: &PeerId) -> bool {
|
||||
match self.connection_status(peer_id) {
|
||||
Some(PeerConnectionStatus::Connected { .. })
|
||||
| Some(PeerConnectionStatus::Dialing { .. }) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/* Setters */
|
||||
|
||||
/// Sets a peer as connected with an ingoing connection
|
||||
/// A peer is being dialed.
|
||||
pub fn dialing_peer(&mut self, peer_id: &PeerId) {
|
||||
let info = self
|
||||
.peers
|
||||
.entry(peer_id.clone())
|
||||
.or_insert_with(|| Default::default());
|
||||
|
||||
if info.connection_status.is_disconnected() {
|
||||
self.n_dc -= 1;
|
||||
}
|
||||
info.connection_status = PeerConnectionStatus::Dialing {
|
||||
since: Instant::now(),
|
||||
};
|
||||
}
|
||||
|
||||
/// Sets a peer as connected with an ingoing connection.
|
||||
pub fn connect_ingoing(&mut self, peer_id: &PeerId) {
|
||||
let info = self
|
||||
.peers
|
||||
@@ -184,7 +217,7 @@ impl<TSpec: EthSpec> PeerDB<TSpec> {
|
||||
info.connection_status.connect_ingoing();
|
||||
}
|
||||
|
||||
/// Sets a peer as connected with an outgoing connection
|
||||
/// Sets a peer as connected with an outgoing connection.
|
||||
pub fn connect_outgoing(&mut self, peer_id: &PeerId) {
|
||||
let info = self
|
||||
.peers
|
||||
@@ -197,7 +230,7 @@ impl<TSpec: EthSpec> PeerDB<TSpec> {
|
||||
info.connection_status.connect_outgoing();
|
||||
}
|
||||
|
||||
/// Sets the peer as disconnected
|
||||
/// Sets the peer as disconnected.
|
||||
pub fn disconnect(&mut self, peer_id: &PeerId) {
|
||||
let log_ref = &self.log;
|
||||
let info = self.peers.entry(peer_id.clone()).or_insert_with(|| {
|
||||
|
||||
@@ -138,6 +138,11 @@ impl<TSpec: EthSpec> Service<TSpec> {
|
||||
if let Protocol::Udp(_) = components[1] {
|
||||
continue;
|
||||
}
|
||||
// inform the peer manager that we are currently dialing this peer
|
||||
network_globals
|
||||
.peers
|
||||
.write()
|
||||
.dialing_peer(&bootnode_enr.peer_id());
|
||||
dial_addr(multiaddr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,6 +83,11 @@ impl<TSpec: EthSpec> NetworkGlobals<TSpec> {
|
||||
self.peers.read().connected_peer_ids().count()
|
||||
}
|
||||
|
||||
/// Returns the number of libp2p peers that are either connected or being dialed.
|
||||
pub fn connected_or_dialing_peers(&self) -> usize {
|
||||
self.peers.read().connected_or_dialing_peers().count()
|
||||
}
|
||||
|
||||
/// Returns in the node is syncing.
|
||||
pub fn is_syncing(&self) -> bool {
|
||||
self.sync_state.read().is_syncing()
|
||||
|
||||
@@ -8,7 +8,7 @@ pub mod processor;
|
||||
|
||||
use crate::error;
|
||||
use crate::service::NetworkMessage;
|
||||
use beacon_chain::{AttestationType, BeaconChain, BeaconChainTypes};
|
||||
use beacon_chain::{AttestationType, BeaconChain, BeaconChainTypes, BlockError};
|
||||
use eth2_libp2p::{
|
||||
rpc::{RPCError, RPCErrorResponse, RPCRequest, RPCResponse, RequestId, ResponseTermination},
|
||||
MessageId, NetworkGlobals, PeerId, PubsubMessage, RPCEvent,
|
||||
@@ -268,7 +268,9 @@ impl<T: BeaconChainTypes> Router<T> {
|
||||
self.propagate_message(id, peer_id.clone());
|
||||
self.processor.on_block_gossip(peer_id, verified_block);
|
||||
}
|
||||
Err(BlockError::ParentUnknown { .. }) => {} // performing a parent lookup
|
||||
Err(e) => {
|
||||
// performing a parent lookup
|
||||
warn!(self.log, "Could not verify block for gossip";
|
||||
"error" => format!("{:?}", e));
|
||||
}
|
||||
|
||||
@@ -495,9 +495,10 @@ impl<T: BeaconChainTypes> Processor<T> {
|
||||
) -> Result<GossipVerifiedBlock<T>, BlockError> {
|
||||
let result = self.chain.verify_block_for_gossip(*block.clone());
|
||||
|
||||
if let Err(BlockError::ParentUnknown(_)) = result {
|
||||
if let Err(BlockError::ParentUnknown(block_hash)) = result {
|
||||
// if we don't know the parent, start a parent lookup
|
||||
// TODO: Modify the return to avoid the block clone.
|
||||
debug!(self.log, "Unknown block received. Starting a parent lookup"; "block_slot" => block.message.slot, "block_hash" => format!("{}", block_hash));
|
||||
self.send_to_sync(SyncMessage::UnknownBlock(peer_id.clone(), block));
|
||||
}
|
||||
result
|
||||
|
||||
@@ -306,7 +306,6 @@ fn spawn_service<T: BeaconChainTypes>(
|
||||
.map_err(|_| { debug!(log, "Failed to send peer disconnect to router");})?;
|
||||
}
|
||||
BehaviourEvent::StatusPeer(peer_id) => {
|
||||
debug!(log, "Re-status peer"; "peer_id" => format!("{}", peer_id));
|
||||
service.router_send
|
||||
.try_send(RouterMessage::StatusPeer(peer_id))
|
||||
.map_err(|_| { debug!(log, "Failed to send re-status peer to router");})?;
|
||||
|
||||
@@ -58,7 +58,7 @@ use types::{EthSpec, Hash256, SignedBeaconBlock, Slot};
|
||||
/// fully sync'd peer.
|
||||
const SLOT_IMPORT_TOLERANCE: usize = 20;
|
||||
/// How many attempts we try to find a parent of a block before we give up trying .
|
||||
const PARENT_FAIL_TOLERANCE: usize = 3;
|
||||
const PARENT_FAIL_TOLERANCE: usize = 5;
|
||||
/// The maximum depth we will search for a parent block. In principle we should have sync'd any
|
||||
/// canonical chain to its head once the peer connects. A chain should not appear where it's depth
|
||||
/// is further back than the most recent head slot.
|
||||
@@ -640,9 +640,16 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
if parent_request.failed_attempts >= PARENT_FAIL_TOLERANCE
|
||||
|| parent_request.downloaded_blocks.len() >= PARENT_DEPTH_TOLERANCE
|
||||
{
|
||||
let error = if parent_request.failed_attempts >= PARENT_FAIL_TOLERANCE {
|
||||
"too many failed attempts"
|
||||
} else {
|
||||
"reached maximum lookup-depth"
|
||||
};
|
||||
|
||||
debug!(self.log, "Parent import failed";
|
||||
"block" => format!("{:?}",parent_request.downloaded_blocks[0].canonical_root()),
|
||||
"ancestors_found" => parent_request.downloaded_blocks.len()
|
||||
"ancestors_found" => parent_request.downloaded_blocks.len(),
|
||||
"reason" => error
|
||||
);
|
||||
return; // drop the request
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user