Sync Re-Write (#663)

* Apply clippy lints to beacon node

* Remove unnecessary logging and correct formatting

* Initial bones of load-balanced range-sync

* Port bump meshsup tests

* Further structure and network handling logic added

* Basic structure, ignoring error handling

* Correct max peers delay bug

* Clean up and re-write message processor and sync manager

* Restructure directory, correct type issues

* Fix compiler issues

* Completed first testing of new sync

* Correct merge issues

* Clean up warnings

* Push attestation processed log down to dbg

* Correct math error, downgraded logs

* Add RPC error handling and improved syncing code

* Add libp2p stream error handling and dropping of invalid peers

* Lower logs

* Add discovery tweak

* Correct libp2p service locking

* Handles peer disconnects for sync

* Add logs downgrade discovery log

* Less fork choice (#679)

* Try merge in change to reduce fork choice calls

* Remove fork choice from process block

* Minor log fix

* Check successes > 0

* Fix failing beacon chain tests

* Fix re-org warnings

* Fix mistake in prev commit

* Range sync refactor

- Introduces `ChainCollection`
- Correct Disconnect node handling
- Removes duplicate code

* Various bug fixes

* Remove unnecessary logs

* Maintain syncing state in the transition from finalied to head

* Improved disconnect handling

* Adds forwards block interator

* Notifies lighthouse on stream timeouts

* Apply new gossipsub updates
This commit is contained in:
Age Manning
2019-12-09 18:50:21 +11:00
committed by GitHub
parent 988059bc9c
commit 5853326342
21 changed files with 1805 additions and 838 deletions

View File

@@ -172,7 +172,7 @@ impl<TSubstream: AsyncRead + AsyncWrite> NetworkBehaviourEventProcess<IdentifyEv
);
info.listen_addrs.truncate(MAX_IDENTIFY_ADDRESSES);
}
debug!(self.log, "Identified Peer"; "Peer" => format!("{}", peer_id),
debug!(self.log, "Identified Peer"; "peer" => format!("{}", peer_id),
"protocol_version" => info.protocol_version,
"agent_version" => info.agent_version,
"listening_ addresses" => format!("{:?}", info.listen_addrs),

View File

@@ -21,7 +21,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
use tokio::timer::Delay;
/// Maximum seconds before searching for extra peers.
const MAX_TIME_BETWEEN_PEER_SEARCHES: u64 = 60;
const MAX_TIME_BETWEEN_PEER_SEARCHES: u64 = 120;
/// Initial delay between peer searches.
const INITIAL_SEARCH_DELAY: u64 = 5;
/// Local ENR storage filename.
@@ -172,18 +172,6 @@ impl<TSubstream> Discovery<TSubstream> {
let random_node = NodeId::random();
debug!(self.log, "Searching for peers");
self.discovery.find_node(random_node);
// update the time until next discovery
let delay = {
if self.past_discovery_delay < MAX_TIME_BETWEEN_PEER_SEARCHES {
self.past_discovery_delay *= 2;
self.past_discovery_delay
} else {
MAX_TIME_BETWEEN_PEER_SEARCHES
}
};
self.peer_discovery_delay
.reset(Instant::now() + Duration::from_secs(delay));
}
}
@@ -252,6 +240,10 @@ where
if self.connected_peers.len() < self.max_peers {
self.find_peers();
}
// Set to maximum, and update to earlier, once we get our results back.
self.peer_discovery_delay.reset(
Instant::now() + Duration::from_secs(MAX_TIME_BETWEEN_PEER_SEARCHES),
);
}
Ok(Async::NotReady) => break,
Err(e) => {
@@ -283,6 +275,17 @@ where
}
Discv5Event::FindNodeResult { closer_peers, .. } => {
debug!(self.log, "Discovery query completed"; "peers_found" => closer_peers.len());
// update the time to the next query
if self.past_discovery_delay < MAX_TIME_BETWEEN_PEER_SEARCHES {
self.past_discovery_delay *= 2;
}
let delay = std::cmp::max(
self.past_discovery_delay,
MAX_TIME_BETWEEN_PEER_SEARCHES,
);
self.peer_discovery_delay
.reset(Instant::now() + Duration::from_secs(delay));
if closer_peers.is_empty() {
debug!(self.log, "Discovery random query found no peers");
}

View File

@@ -8,11 +8,11 @@ use crate::rpc::protocol::{InboundFramed, OutboundFramed};
use core::marker::PhantomData;
use fnv::FnvHashMap;
use futures::prelude::*;
use libp2p::core::upgrade::{InboundUpgrade, OutboundUpgrade};
use libp2p::core::upgrade::{InboundUpgrade, OutboundUpgrade, UpgradeError};
use libp2p::swarm::protocols_handler::{
KeepAlive, ProtocolsHandler, ProtocolsHandlerEvent, ProtocolsHandlerUpgrErr, SubstreamProtocol,
};
use slog::{crit, debug, error, trace, warn};
use slog::{crit, debug, error, trace};
use smallvec::SmallVec;
use std::collections::hash_map::Entry;
use std::time::{Duration, Instant};
@@ -25,6 +25,9 @@ use tokio::timer::{delay_queue, DelayQueue};
/// The time (in seconds) before a substream that is awaiting a response from the user times out.
pub const RESPONSE_TIMEOUT: u64 = 10;
/// The number of times to retry an outbound upgrade in the case of IO errors.
const IO_ERROR_RETRIES: u8 = 3;
/// Inbound requests are given a sequential `RequestId` to keep track of.
type InboundRequestId = RequestId;
/// Outbound requests are associated with an id that is given by the application that sent the
@@ -40,7 +43,7 @@ where
listen_protocol: SubstreamProtocol<RPCProtocol>,
/// If `Some`, something bad happened and we should shut down the handler with an error.
pending_error: Option<ProtocolsHandlerUpgrErr<RPCError>>,
pending_error: Option<(RequestId, ProtocolsHandlerUpgrErr<RPCError>)>,
/// Queue of events to produce in `poll()`.
events_out: SmallVec<[RPCEvent; 4]>,
@@ -81,6 +84,10 @@ where
/// After the given duration has elapsed, an inactive connection will shutdown.
inactive_timeout: Duration,
/// Try to negotiate the outbound upgrade a few times if there is an IO error before reporting the request as failed.
/// This keeps track of the number of attempts.
outbound_io_error_retries: u8,
/// Logger for handling RPC streams
log: slog::Logger,
@@ -150,6 +157,7 @@ where
max_dial_negotiated: 8,
keep_alive: KeepAlive::Yes,
inactive_timeout,
outbound_io_error_retries: 0,
log: log.clone(),
_phantom: PhantomData,
}
@@ -339,13 +347,29 @@ where
fn inject_dial_upgrade_error(
&mut self,
_: Self::OutboundOpenInfo,
request: Self::OutboundOpenInfo,
error: ProtocolsHandlerUpgrErr<
<Self::OutboundProtocol as OutboundUpgrade<Self::Substream>>::Error,
>,
) {
if let ProtocolsHandlerUpgrErr::Upgrade(UpgradeError::Apply(RPCError::IoError(_))) = error {
self.outbound_io_error_retries += 1;
if self.outbound_io_error_retries < IO_ERROR_RETRIES {
self.send_request(request);
return;
}
}
self.outbound_io_error_retries = 0;
// add the error
let request_id = {
if let RPCEvent::Request(id, _) = request {
id
} else {
0
}
};
if self.pending_error.is_none() {
self.pending_error = Some(error);
self.pending_error = Some((request_id, error));
}
}
@@ -359,11 +383,43 @@ where
ProtocolsHandlerEvent<Self::OutboundProtocol, Self::OutboundOpenInfo, Self::OutEvent>,
Self::Error,
> {
if let Some(err) = self.pending_error.take() {
// Returning an error here will result in dropping any peer that doesn't support any of
// the RPC protocols. For our immediate purposes we permit this and simply log that an
// upgrade was not supported.
warn!(self.log,"RPC Protocol was not supported"; "Error" => format!("{}", err));
if let Some((request_id, err)) = self.pending_error.take() {
// Returning an error here will result in dropping the peer.
match err {
ProtocolsHandlerUpgrErr::Upgrade(UpgradeError::Apply(
RPCError::InvalidProtocol(protocol_string),
)) => {
// Peer does not support the protocol.
// TODO: We currently will not drop the peer, for maximal compatibility with
// other clients testing their software. In the future, we will need to decide
// which protocols are a bare minimum to support before kicking the peer.
error!(self.log, "Peer doesn't support the RPC protocol"; "protocol" => protocol_string);
return Ok(Async::Ready(ProtocolsHandlerEvent::Custom(
RPCEvent::Error(request_id, RPCError::InvalidProtocol(protocol_string)),
)));
}
ProtocolsHandlerUpgrErr::Timeout | ProtocolsHandlerUpgrErr::Timer => {
// negotiation timeout, mark the request as failed
return Ok(Async::Ready(ProtocolsHandlerEvent::Custom(
RPCEvent::Error(
request_id,
RPCError::Custom("Protocol negotiation timeout".into()),
),
)));
}
ProtocolsHandlerUpgrErr::Upgrade(UpgradeError::Apply(err)) => {
// IO/Decode/Custom Error, report to the application
return Ok(Async::Ready(ProtocolsHandlerEvent::Custom(
RPCEvent::Error(request_id, err),
)));
}
ProtocolsHandlerUpgrErr::Upgrade(UpgradeError::Select(err)) => {
// Error during negotiation
return Ok(Async::Ready(ProtocolsHandlerEvent::Custom(
RPCEvent::Error(request_id, RPCError::Custom(format!("{}", err))),
)));
}
}
}
// return any events that need to be reported
@@ -385,12 +441,19 @@ where
}
// purge expired outbound substreams
while let Async::Ready(Some(stream_id)) = self
if let Async::Ready(Some(stream_id)) = self
.outbound_substreams_delay
.poll()
.map_err(|_| ProtocolsHandlerUpgrErr::Timer)?
{
self.outbound_substreams.remove(stream_id.get_ref());
// notify the user
return Ok(Async::Ready(ProtocolsHandlerEvent::Custom(
RPCEvent::Error(
stream_id.get_ref().clone(),
RPCError::Custom("Stream timed out".into()),
),
)));
}
// drive inbound streams that need to be processed