From 311e69db65f0133fb3f9ce980d3f88202cea042d Mon Sep 17 00:00:00 2001 From: Age Manning Date: Mon, 3 Apr 2023 03:02:57 +0000 Subject: [PATCH] Ban peer race condition (#4140) It is possible that when we go to ban a peer, there is already an unbanned message in the queue. It could lead to the case that we ban and immediately unban a peer leaving us in a state where a should-be banned peer is unbanned. If this banned peer connects to us in this faulty state, we currently do not attempt to re-ban it. This PR does correct this also, so if we do see this error, it will now self-correct (although we shouldn't see the error in the first place). I have also incremented the severity of not supporting protocols as I see peers ultimately get banned in a few steps and it seems to make sense to just ban them outright, rather than have them linger. --- .../src/peer_manager/mod.rs | 23 +++++++++++++------ .../src/peer_manager/network_behaviour.rs | 4 +++- .../lighthouse_network/src/service/mod.rs | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs index 3d5c862e8b..a461a12e53 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs @@ -290,11 +290,20 @@ impl PeerManager { // If a peer is being banned, this trumps any temporary ban the peer might be // under. We no longer track it in the temporary ban list. - self.temporary_banned_peers.raw_remove(peer_id); - - // Inform the Swarm to ban the peer - self.events - .push(PeerManagerEvent::Banned(*peer_id, banned_ips)); + if !self.temporary_banned_peers.raw_remove(peer_id) { + // If the peer is not already banned, inform the Swarm to ban the peer + self.events + .push(PeerManagerEvent::Banned(*peer_id, banned_ips)); + // If the peer was in the process of being un-banned, remove it (a rare race + // condition) + self.events.retain(|event| { + if let PeerManagerEvent::UnBanned(unbanned_peer_id, _) = event { + unbanned_peer_id != peer_id // Remove matching peer ids + } else { + true + } + }); + } } } } @@ -552,8 +561,8 @@ impl PeerManager { Protocol::BlocksByRoot => return, Protocol::Goodbye => return, Protocol::LightClientBootstrap => return, - Protocol::MetaData => PeerAction::LowToleranceError, - Protocol::Status => PeerAction::LowToleranceError, + Protocol::MetaData => PeerAction::Fatal, + Protocol::Status => PeerAction::Fatal, } } RPCError::StreamTimeout => match direction { diff --git a/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs b/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs index a29f243c9e..24de83a61d 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs @@ -156,8 +156,10 @@ impl PeerManager { BanResult::BadScore => { // This is a faulty state error!(self.log, "Connected to a banned peer. Re-banning"; "peer_id" => %peer_id); - // Reban the peer + // Disconnect the peer. self.goodbye_peer(&peer_id, GoodbyeReason::Banned, ReportSource::PeerManager); + // Re-ban the peer to prevent repeated errors. + self.events.push(PeerManagerEvent::Banned(peer_id, vec![])); return; } BanResult::BannedIp(ip_addr) => { diff --git a/beacon_node/lighthouse_network/src/service/mod.rs b/beacon_node/lighthouse_network/src/service/mod.rs index 5cdcdeaf85..dc9b44849f 100644 --- a/beacon_node/lighthouse_network/src/service/mod.rs +++ b/beacon_node/lighthouse_network/src/service/mod.rs @@ -1119,7 +1119,7 @@ impl Network { debug!(self.log, "Peer does not support gossipsub"; "peer_id" => %peer_id); self.peer_manager_mut().report_peer( &peer_id, - PeerAction::LowToleranceError, + PeerAction::Fatal, ReportSource::Gossipsub, Some(GoodbyeReason::Unknown), "does_not_support_gossipsub",