Custom RPC request management for sync (#3029)

## Proposed Changes
Make `lighthouse_network` generic over request ids, now usable by sync
This commit is contained in:
Divma
2022-03-02 22:07:17 +00:00
parent e88b18be09
commit 4bf1af4e85
18 changed files with 570 additions and 521 deletions

View File

@@ -8,12 +8,11 @@
mod processor;
use crate::error;
use crate::service::NetworkMessage;
use crate::service::{NetworkMessage, RequestId};
use beacon_chain::{BeaconChain, BeaconChainTypes};
use futures::prelude::*;
use lighthouse_network::{
rpc::RequestId, MessageId, NetworkGlobals, PeerId, PeerRequestId, PubsubMessage, Request,
Response,
MessageId, NetworkGlobals, PeerId, PeerRequestId, PubsubMessage, Request, Response,
};
use processor::Processor;
use slog::{debug, o, trace};

View File

@@ -1,7 +1,8 @@
use crate::beacon_processor::{
BeaconProcessor, WorkEvent as BeaconWorkEvent, MAX_WORK_EVENT_QUEUE_LEN,
};
use crate::service::NetworkMessage;
use crate::service::{NetworkMessage, RequestId};
use crate::sync::manager::RequestId as SyncId;
use crate::sync::SyncMessage;
use beacon_chain::{BeaconChain, BeaconChainError, BeaconChainTypes};
use lighthouse_network::rpc::*;
@@ -100,8 +101,11 @@ impl<T: BeaconChainTypes> Processor<T> {
/// this function notifies the sync manager of the error.
pub fn on_rpc_error(&mut self, peer_id: PeerId, request_id: RequestId) {
// Check if the failed RPC belongs to sync
if let RequestId::Sync(id) = request_id {
self.send_to_sync(SyncMessage::RPCError(peer_id, id));
if let RequestId::Sync(request_id) = request_id {
self.send_to_sync(SyncMessage::RpcError {
peer_id,
request_id,
});
}
}
@@ -176,24 +180,28 @@ impl<T: BeaconChainTypes> Processor<T> {
request_id: RequestId,
beacon_block: Option<Box<SignedBeaconBlock<T::EthSpec>>>,
) {
let request_id = match request_id {
RequestId::Sync(sync_id) => match sync_id {
SyncId::SingleBlock { .. } | SyncId::ParentLookup { .. } => {
unreachable!("Block lookups do not request BBRange requests")
}
id @ (SyncId::BackFillSync { .. } | SyncId::RangeSync { .. }) => id,
},
RequestId::Router => unreachable!("All BBRange requests belong to sync"),
};
trace!(
self.log,
"Received BlocksByRange Response";
"peer" => %peer_id,
);
if let RequestId::Sync(id) = request_id {
self.send_to_sync(SyncMessage::BlocksByRangeResponse {
peer_id,
request_id: id,
beacon_block,
});
} else {
debug!(
self.log,
"All blocks by range responses should belong to sync"
);
}
self.send_to_sync(SyncMessage::RpcBlock {
peer_id,
request_id,
beacon_block,
seen_timestamp: timestamp_now(),
});
}
/// Handle a `BlocksByRoot` response from the peer.
@@ -203,25 +211,27 @@ impl<T: BeaconChainTypes> Processor<T> {
request_id: RequestId,
beacon_block: Option<Box<SignedBeaconBlock<T::EthSpec>>>,
) {
let request_id = match request_id {
RequestId::Sync(sync_id) => match sync_id {
id @ (SyncId::SingleBlock { .. } | SyncId::ParentLookup { .. }) => id,
SyncId::BackFillSync { .. } | SyncId::RangeSync { .. } => {
unreachable!("Batch syncing do not request BBRoot requests")
}
},
RequestId::Router => unreachable!("All BBRoot requests belong to sync"),
};
trace!(
self.log,
"Received BlocksByRoot Response";
"peer" => %peer_id,
);
if let RequestId::Sync(id) = request_id {
self.send_to_sync(SyncMessage::BlocksByRootResponse {
peer_id,
request_id: id,
beacon_block,
seen_timestamp: timestamp_now(),
});
} else {
debug!(
self.log,
"All Blocks by Root responses should belong to sync"
)
}
self.send_to_sync(SyncMessage::RpcBlock {
peer_id,
request_id,
beacon_block,
seen_timestamp: timestamp_now(),
});
}
/// Process a gossip message declaring a new block.

View File

@@ -1,3 +1,4 @@
use super::sync::manager::RequestId as SyncId;
use crate::persisted_dht::{clear_dht, load_dht, persist_dht};
use crate::router::{Router, RouterMessage};
use crate::subnet_service::SyncCommitteeService;
@@ -14,7 +15,7 @@ use lighthouse_network::{
prometheus_client::registry::Registry, MessageAcceptance, Service as LibP2PService,
};
use lighthouse_network::{
rpc::{GoodbyeReason, RPCResponseErrorCode, RequestId},
rpc::{GoodbyeReason, RPCResponseErrorCode},
Context, Libp2pEvent, PeerAction, PeerRequestId, PubsubMessage, ReportSource, Request,
Response, Subnet,
};
@@ -42,6 +43,13 @@ const SUBSCRIBE_DELAY_SLOTS: u64 = 2;
/// Delay after a fork where we unsubscribe from pre-fork topics.
const UNSUBSCRIBE_DELAY_EPOCHS: u64 = 2;
/// Application level requests sent to the network.
#[derive(Debug, Clone, Copy)]
pub enum RequestId {
Sync(SyncId),
Router,
}
/// Types of messages that the network service can receive.
#[derive(Debug)]
pub enum NetworkMessage<T: EthSpec> {
@@ -112,7 +120,7 @@ pub struct NetworkService<T: BeaconChainTypes> {
/// A reference to the underlying beacon chain.
beacon_chain: Arc<BeaconChain<T>>,
/// The underlying libp2p service that drives all the network interactions.
libp2p: LibP2PService<T::EthSpec>,
libp2p: LibP2PService<RequestId, T::EthSpec>,
/// An attestation and subnet manager service.
attestation_service: AttestationService<T>,
/// A sync committeee subnet manager service.
@@ -389,7 +397,7 @@ impl<T: BeaconChainTypes> NetworkService<T> {
/// Handle an event received from the network.
async fn on_libp2p_event(
&mut self,
ev: Libp2pEvent<T::EthSpec>,
ev: Libp2pEvent<RequestId, T::EthSpec>,
shutdown_sender: &mut Sender<ShutdownReason>,
) {
match ev {

View File

@@ -8,9 +8,8 @@
//! If a batch fails, the backfill sync cannot progress. In this scenario, we mark the backfill
//! sync as failed, log an error and attempt to retry once a new peer joins the node.
use super::RequestId;
use crate::beacon_processor::{ProcessId, WorkEvent as BeaconWorkEvent};
use crate::sync::manager::BatchProcessResult;
use crate::sync::manager::{BatchProcessResult, Id};
use crate::sync::network_context::SyncNetworkContext;
use crate::sync::range_sync::{BatchConfig, BatchId, BatchInfo, BatchState};
use beacon_chain::{BeaconChain, BeaconChainTypes};
@@ -357,7 +356,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
network: &mut SyncNetworkContext<T::EthSpec>,
batch_id: BatchId,
peer_id: &PeerId,
request_id: RequestId,
request_id: Id,
) -> Result<(), BackFillError> {
if let Some(batch) = self.batches.get_mut(&batch_id) {
// A batch could be retried without the peer failing the request (disconnecting/
@@ -392,7 +391,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
network: &mut SyncNetworkContext<T::EthSpec>,
batch_id: BatchId,
peer_id: &PeerId,
request_id: RequestId,
request_id: Id,
beacon_block: Option<SignedBeaconBlock<T::EthSpec>>,
) -> Result<ProcessResult, BackFillError> {
// check if we have this batch

View File

@@ -37,7 +37,6 @@ use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart};
use super::network_context::SyncNetworkContext;
use super::peer_sync_info::{remote_sync_type, PeerSyncType};
use super::range_sync::{ChainId, RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
use super::RequestId;
use crate::beacon_processor::{ProcessId, WorkEvent as BeaconWorkEvent};
use crate::service::NetworkMessage;
use crate::status::ToStatusMessage;
@@ -52,6 +51,7 @@ use slog::{crit, debug, error, info, trace, warn, Logger};
use smallvec::SmallVec;
use ssz_types::VariableList;
use std::boxed::Box;
use std::collections::hash_map::Entry;
use std::ops::Sub;
use std::sync::Arc;
use std::time::Duration;
@@ -73,23 +73,31 @@ const PARENT_FAIL_TOLERANCE: usize = 5;
/// is further back than the most recent head slot.
const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2;
pub type Id = u32;
/// Id of rpc requests sent by sync to the network.
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
pub enum RequestId {
/// Request searching for a block given a hash.
SingleBlock { id: Id },
/// Request searching for a block's parent. The id is the chain
ParentLookup { id: Id },
/// Request was from the backfill sync algorithm.
BackFillSync { id: Id },
/// The request was from a chain in the range sync algorithm.
RangeSync { id: Id },
}
#[derive(Debug)]
/// A message than can be sent to the sync manager thread.
pub enum SyncMessage<T: EthSpec> {
/// A useful peer has been discovered.
AddPeer(PeerId, SyncInfo),
/// A [`BlocksByRange`] response has been received.
BlocksByRangeResponse {
peer_id: PeerId,
/// A block has been received from the RPC.
RpcBlock {
request_id: RequestId,
beacon_block: Option<Box<SignedBeaconBlock<T>>>,
},
/// A [`BlocksByRoot`] response has been received.
BlocksByRootResponse {
peer_id: PeerId,
request_id: RequestId,
beacon_block: Option<Box<SignedBeaconBlock<T>>>,
seen_timestamp: Duration,
},
@@ -105,7 +113,10 @@ pub enum SyncMessage<T: EthSpec> {
Disconnect(PeerId),
/// An RPC Error has occurred on a request.
RPCError(PeerId, RequestId),
RpcError {
peer_id: PeerId,
request_id: RequestId,
},
/// A batch has been processed by the block processor thread.
BatchProcessed {
@@ -157,7 +168,7 @@ struct ParentRequests<T: EthSpec> {
last_submitted_peer: PeerId,
/// The request ID of this lookup is in progress.
pending: Option<RequestId>,
pending: Option<Id>,
}
/// The primary object for handling and driving all the current syncing logic. It maintains the
@@ -193,7 +204,7 @@ pub struct SyncManager<T: BeaconChainTypes> {
/// received or not.
///
/// The flag allows us to determine if the peer returned data or sent us nothing.
single_block_lookups: FnvHashMap<RequestId, SingleBlockRequest>,
single_block_lookups: FnvHashMap<Id, SingleBlockRequest>,
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T>>,
@@ -313,46 +324,31 @@ impl<T: BeaconChainTypes> SyncManager<T> {
/// There are two reasons we could have received a BlocksByRoot response
/// - We requested a single hash and have received a response for the single_block_lookup
/// - We are looking up parent blocks in parent lookup search
async fn blocks_by_root_response(
async fn parent_lookup_response(
&mut self,
peer_id: PeerId,
request_id: RequestId,
request_id: Id,
block: Option<SignedBeaconBlock<T::EthSpec>>,
seen_timestamp: Duration,
_seen_timestamp: Duration,
) {
let mut parent_request = if let Some(pos) = self
.parent_queue
.iter()
.position(|request| request.pending == Some(request_id))
{
// we remove from the queue and process it. It will get re-added if required
self.parent_queue.remove(pos)
} else {
if block.is_some() {
debug!(self.log, "Response for a parent lookup request that was not found"; "peer_id" => %peer_id);
}
return;
};
match block {
Some(block) => {
// data was returned, not just a stream termination
// check if this is a single block lookup - i.e we were searching for a specific hash
let mut single_block_hash = None;
if let Some(block_request) = self.single_block_lookups.get_mut(&request_id) {
// update the state of the lookup indicating a block was received from the peer
block_request.block_returned = true;
single_block_hash = Some(block_request.hash);
}
if let Some(block_hash) = single_block_hash {
self.single_block_lookup_response(peer_id, block, block_hash, seen_timestamp)
.await;
return;
}
// This wasn't a single block lookup request, it must be a response to a parent request search
// find the request
let mut parent_request = match self
.parent_queue
.iter()
.position(|request| request.pending == Some(request_id))
{
// we remove from the queue and process it. It will get re-added if required
Some(pos) => self.parent_queue.remove(pos),
None => {
// No pending request, invalid request_id or coding error
warn!(self.log, "BlocksByRoot response unknown"; "request_id" => request_id);
return;
}
};
// check if the parent of this block isn't in our failed cache. If it is, this
// chain should be dropped and the peer downscored.
if self.failed_chains.contains(&block.message().parent_root()) {
@@ -382,38 +378,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
self.process_parent_request(parent_request).await;
}
None => {
// this is a stream termination
// stream termination for a single block lookup, remove the key
if let Some(single_block_request) = self.single_block_lookups.remove(&request_id) {
// The peer didn't respond with a block that it referenced.
// This can be allowed as some clients may implement pruning. We mildly
// tolerate this behaviour.
if !single_block_request.block_returned {
warn!(self.log, "Peer didn't respond with a block it referenced"; "referenced_block_hash" => %single_block_request.hash, "peer_id" => %peer_id);
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"bbroot_no_block",
);
}
return;
}
// This wasn't a single block lookup request, it must be a response to a parent request search
// find the request and remove it
let mut parent_request = match self
.parent_queue
.iter()
.position(|request| request.pending == Some(request_id))
{
Some(pos) => self.parent_queue.remove(pos),
None => {
// No pending request, the parent request has been processed and this is
// the resulting stream termination.
return;
}
};
// An empty response has been returned to a parent request
// if an empty response is given, the peer didn't have the requested block, try again
parent_request.failed_attempts += 1;
@@ -458,71 +422,95 @@ impl<T: BeaconChainTypes> SyncManager<T> {
/// lookup search is started.
async fn single_block_lookup_response(
&mut self,
request_id: Id,
peer_id: PeerId,
block: SignedBeaconBlock<T::EthSpec>,
expected_block_hash: Hash256,
block: Option<SignedBeaconBlock<T::EthSpec>>,
seen_timestamp: Duration,
) {
// verify the hash is correct and try and process the block
if expected_block_hash != block.canonical_root() {
// The peer that sent this, sent us the wrong block.
// We do not tolerate this behaviour. The peer is instantly disconnected and banned.
warn!(self.log, "Peer sent incorrect block for single block lookup"; "peer_id" => %peer_id);
self.network.goodbye_peer(peer_id, GoodbyeReason::Fault);
return;
}
let block_result = match self.process_block_async(block.clone()).await {
Some(block_result) => block_result,
None => return,
};
// we have the correct block, try and process it
match block_result {
Ok(block_root) => {
// Block has been processed, so write the block time to the cache.
self.chain.block_times_cache.write().set_time_observed(
block_root,
block.slot(),
seen_timestamp,
None,
None,
);
info!(self.log, "Processed block"; "block" => %block_root);
match self.chain.fork_choice() {
Ok(()) => trace!(
self.log,
"Fork choice success";
"location" => "single block"
),
Err(e) => error!(
self.log,
"Fork choice failed";
"error" => ?e,
"location" => "single block"
),
if let Entry::Occupied(mut entry) = self.single_block_lookups.entry(request_id) {
match block {
None => {
// Stream termination. Remove the lookup
let (_, single_block_request) = entry.remove_entry();
// The peer didn't respond with a block that it referenced.
// This can be allowed as some clients may implement pruning. We mildly
// tolerate this behaviour.
if !single_block_request.block_returned {
warn!(self.log, "Peer didn't respond with a block it referenced";
"referenced_block_hash" => %single_block_request.hash, "peer_id" => %peer_id);
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"bbroot_no_block",
);
}
}
Some(block) => {
// update the state of the lookup indicating a block was received from the peer
entry.get_mut().block_returned = true;
// verify the hash is correct and try and process the block
if entry.get().hash != block.canonical_root() {
// The peer that sent this, sent us the wrong block.
// We do not tolerate this behaviour. The peer is instantly disconnected and banned.
warn!(self.log, "Peer sent incorrect block for single block lookup"; "peer_id" => %peer_id);
self.network.goodbye_peer(peer_id, GoodbyeReason::Fault);
return;
}
let block_result = match self.process_block_async(block.clone()).await {
Some(block_result) => block_result,
None => return,
};
// we have the correct block, try and process it
match block_result {
Ok(block_root) => {
// Block has been processed, so write the block time to the cache.
self.chain.block_times_cache.write().set_time_observed(
block_root,
block.slot(),
seen_timestamp,
None,
None,
);
info!(self.log, "Processed block"; "block" => %block_root);
match self.chain.fork_choice() {
Ok(()) => trace!(
self.log,
"Fork choice success";
"location" => "single block"
),
Err(e) => error!(
self.log,
"Fork choice failed";
"error" => ?e,
"location" => "single block"
),
}
}
Err(BlockError::ParentUnknown { .. }) => {
// We don't know of the blocks parent, begin a parent lookup search
self.add_unknown_block(peer_id, block);
}
Err(BlockError::BlockIsAlreadyKnown) => {
trace!(self.log, "Single block lookup already known");
}
Err(BlockError::BeaconChainError(e)) => {
warn!(self.log, "Unexpected block processing error"; "error" => ?e);
}
outcome => {
warn!(self.log, "Single block lookup failed"; "outcome" => ?outcome);
// This could be a range of errors. But we couldn't process the block.
// For now we consider this a mid tolerance error.
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"single_block_lookup_failed",
);
}
}
}
}
Err(BlockError::ParentUnknown { .. }) => {
// We don't know of the blocks parent, begin a parent lookup search
self.add_unknown_block(peer_id, block);
}
Err(BlockError::BlockIsAlreadyKnown) => {
trace!(self.log, "Single block lookup already known");
}
Err(BlockError::BeaconChainError(e)) => {
warn!(self.log, "Unexpected block processing error"; "error" => ?e);
}
outcome => {
warn!(self.log, "Single block lookup failed"; "outcome" => ?outcome);
// This could be a range of errors. But we couldn't process the block.
// For now we consider this a mid tolerance error.
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"single_block_lookup_failed",
);
}
}
}
@@ -612,7 +600,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
block_roots: VariableList::from(vec![block_hash]),
};
if let Ok(request_id) = self.network.blocks_by_root_request(peer_id, request) {
if let Ok(request_id) = self.network.single_block_lookup_request(peer_id, request) {
self.single_block_lookups
.insert(request_id, SingleBlockRequest::new(block_hash));
}
@@ -621,27 +609,47 @@ impl<T: BeaconChainTypes> SyncManager<T> {
/// Handles RPC errors related to requests that were emitted from the sync manager.
fn inject_error(&mut self, peer_id: PeerId, request_id: RequestId) {
trace!(self.log, "Sync manager received a failed RPC");
// remove any single block lookups
if self.single_block_lookups.remove(&request_id).is_some() {
// this was a single block request lookup, look no further
return;
match request_id {
RequestId::SingleBlock { id } => {
self.single_block_lookups.remove(&id);
}
RequestId::ParentLookup { id } => {
if let Some(pos) = self
.parent_queue
.iter()
.position(|request| request.pending == Some(id))
{
// increment the failure of a parent lookup if the request matches a parent search
let mut parent_request = self.parent_queue.remove(pos);
parent_request.failed_attempts += 1;
parent_request.last_submitted_peer = peer_id;
self.request_parent(parent_request);
}
}
RequestId::BackFillSync { id } => {
if let Some(batch_id) = self.network.backfill_sync_response(id, true) {
match self
.backfill_sync
.inject_error(&mut self.network, batch_id, &peer_id, id)
{
Ok(_) => {}
Err(_) => self.update_sync_state(),
}
}
}
RequestId::RangeSync { id } => {
if let Some((chain_id, batch_id)) = self.network.range_sync_response(id, true) {
self.range_sync.inject_error(
&mut self.network,
peer_id,
batch_id,
chain_id,
id,
);
self.update_sync_state()
}
}
}
// increment the failure of a parent lookup if the request matches a parent search
if let Some(pos) = self
.parent_queue
.iter()
.position(|request| request.pending == Some(request_id))
{
let mut parent_request = self.parent_queue.remove(pos);
parent_request.failed_attempts += 1;
parent_request.last_submitted_peer = peer_id;
self.request_parent(parent_request);
return;
}
// Otherwise this error matches no known request.
trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
}
fn peer_disconnect(&mut self, peer_id: &PeerId) {
@@ -978,7 +986,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// guaranteed to have this chain of blocks.
let peer_id = parent_request.last_submitted_peer;
if let Ok(request_id) = self.network.blocks_by_root_request(peer_id, request) {
if let Ok(request_id) = self.network.parent_lookup_request(peer_id, request) {
// if the request was successful add the queue back into self
parent_request.pending = Some(request_id);
self.parent_queue.push(parent_request);
@@ -994,59 +1002,15 @@ impl<T: BeaconChainTypes> SyncManager<T> {
SyncMessage::AddPeer(peer_id, info) => {
self.add_peer(peer_id, info);
}
SyncMessage::BlocksByRangeResponse {
peer_id,
SyncMessage::RpcBlock {
request_id,
beacon_block,
} => {
let beacon_block = beacon_block.map(|b| *b);
// Obtain which sync requested these blocks and divert accordingly.
match self
.network
.blocks_by_range_response(request_id, beacon_block.is_none())
{
Some(SyncRequestType::RangeSync(batch_id, chain_id)) => {
self.range_sync.blocks_by_range_response(
&mut self.network,
peer_id,
chain_id,
batch_id,
request_id,
beacon_block,
);
self.update_sync_state();
}
Some(SyncRequestType::BackFillSync(batch_id)) => {
match self.backfill_sync.on_block_response(
&mut self.network,
batch_id,
&peer_id,
request_id,
beacon_block,
) {
Ok(ProcessResult::SyncCompleted) => self.update_sync_state(),
Ok(ProcessResult::Successful) => {}
Err(_error) => {
// The backfill sync has failed, errors are reported
// within.
self.update_sync_state();
}
}
}
None => {
trace!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
}
}
}
SyncMessage::BlocksByRootResponse {
peer_id,
request_id,
beacon_block,
seen_timestamp,
} => {
self.blocks_by_root_response(
peer_id,
self.rpc_block_received(
request_id,
peer_id,
beacon_block.map(|b| *b),
seen_timestamp,
)
@@ -1061,38 +1025,10 @@ impl<T: BeaconChainTypes> SyncManager<T> {
SyncMessage::Disconnect(peer_id) => {
self.peer_disconnect(&peer_id);
}
SyncMessage::RPCError(peer_id, request_id) => {
// Redirect to a sync mechanism if the error is related to one of their
// requests.
match self.network.blocks_by_range_response(request_id, true) {
Some(SyncRequestType::RangeSync(batch_id, chain_id)) => {
self.range_sync.inject_error(
&mut self.network,
peer_id,
batch_id,
chain_id,
request_id,
);
self.update_sync_state();
}
Some(SyncRequestType::BackFillSync(batch_id)) => {
match self.backfill_sync.inject_error(
&mut self.network,
batch_id,
&peer_id,
request_id,
) {
Ok(_) => {}
Err(_) => self.update_sync_state(),
}
}
None => {
// This is a request not belonging to a sync algorithm.
// Process internally.
self.inject_error(peer_id, request_id);
}
}
}
SyncMessage::RpcError {
peer_id,
request_id,
} => self.inject_error(peer_id, request_id),
SyncMessage::BatchProcessed { sync_type, result } => match sync_type {
SyncRequestType::RangeSync(epoch, chain_id) => {
self.range_sync.handle_block_process_result(
@@ -1136,4 +1072,60 @@ impl<T: BeaconChainTypes> SyncManager<T> {
}
}
}
async fn rpc_block_received(
&mut self,
request_id: RequestId,
peer_id: PeerId,
beacon_block: Option<SignedBeaconBlock<T::EthSpec>>,
seen_timestamp: Duration,
) {
match request_id {
RequestId::SingleBlock { id } => {
self.single_block_lookup_response(id, peer_id, beacon_block, seen_timestamp)
.await;
}
RequestId::ParentLookup { id } => {
self.parent_lookup_response(peer_id, id, beacon_block, seen_timestamp)
.await
}
RequestId::BackFillSync { id } => {
if let Some(batch_id) = self
.network
.backfill_sync_response(id, beacon_block.is_none())
{
match self.backfill_sync.on_block_response(
&mut self.network,
batch_id,
&peer_id,
id,
beacon_block,
) {
Ok(ProcessResult::SyncCompleted) => self.update_sync_state(),
Ok(ProcessResult::Successful) => {}
Err(_error) => {
// The backfill sync has failed, errors are reported
// within.
self.update_sync_state();
}
}
}
}
RequestId::RangeSync { id } => {
if let Some((chain_id, batch_id)) =
self.network.range_sync_response(id, beacon_block.is_none())
{
self.range_sync.blocks_by_range_response(
&mut self.network,
peer_id,
chain_id,
batch_id,
id,
beacon_block,
);
self.update_sync_state();
}
}
}
}
}

View File

@@ -9,6 +9,3 @@ mod range_sync;
pub use manager::{BatchProcessResult, SyncMessage};
pub use range_sync::ChainId;
/// Type of id of rpc requests sent by sync
pub type RequestId = usize;

View File

@@ -1,15 +1,12 @@
//! Provides network functionality for the Syncing thread. This fundamentally wraps a network
//! channel and stores a global RPC ID to perform requests.
use super::manager::SyncRequestType;
use super::manager::{Id, RequestId as SyncRequestId};
use super::range_sync::{BatchId, ChainId};
use super::RequestId as SyncRequestId;
use crate::service::NetworkMessage;
use crate::service::{NetworkMessage, RequestId};
use crate::status::ToStatusMessage;
use fnv::FnvHashMap;
use lighthouse_network::rpc::{
BlocksByRangeRequest, BlocksByRootRequest, GoodbyeReason, RequestId,
};
use lighthouse_network::rpc::{BlocksByRangeRequest, BlocksByRootRequest, GoodbyeReason};
use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource, Request};
use slog::{debug, trace, warn};
use std::sync::Arc;
@@ -26,10 +23,12 @@ pub struct SyncNetworkContext<T: EthSpec> {
network_globals: Arc<NetworkGlobals<T>>,
/// A sequential ID for all RPC requests.
request_id: SyncRequestId,
request_id: Id,
/// BlocksByRange requests made by syncing algorithms.
range_requests: FnvHashMap<SyncRequestId, SyncRequestType>,
/// BlocksByRange requests made by the range syncing algorithm.
range_requests: FnvHashMap<Id, (ChainId, BatchId)>,
backfill_requests: FnvHashMap<Id, BatchId>,
/// Logger for the `SyncNetworkContext`.
log: slog::Logger,
@@ -46,6 +45,7 @@ impl<T: EthSpec> SyncNetworkContext<T> {
network_globals,
request_id: 1,
range_requests: FnvHashMap::default(),
backfill_requests: FnvHashMap::default(),
log,
}
}
@@ -78,7 +78,13 @@ impl<T: EthSpec> SyncNetworkContext<T> {
"head_slot" => %status_message.head_slot,
);
let _ = self.send_rpc_request(peer_id, Request::Status(status_message.clone()));
let request = Request::Status(status_message.clone());
let request_id = RequestId::Router;
let _ = self.send_network_msg(NetworkMessage::SendRequest {
peer_id,
request,
request_id,
});
}
}
}
@@ -90,7 +96,7 @@ impl<T: EthSpec> SyncNetworkContext<T> {
request: BlocksByRangeRequest,
chain_id: ChainId,
batch_id: BatchId,
) -> Result<SyncRequestId, &'static str> {
) -> Result<Id, &'static str> {
trace!(
self.log,
"Sending BlocksByRange Request";
@@ -98,10 +104,16 @@ impl<T: EthSpec> SyncNetworkContext<T> {
"count" => request.count,
"peer" => %peer_id,
);
let req_id = self.send_rpc_request(peer_id, Request::BlocksByRange(request))?;
self.range_requests
.insert(req_id, SyncRequestType::RangeSync(batch_id, chain_id));
Ok(req_id)
let request = Request::BlocksByRange(request);
let id = self.next_id();
let request_id = RequestId::Sync(SyncRequestId::RangeSync { id });
self.send_network_msg(NetworkMessage::SendRequest {
peer_id,
request,
request_id,
})?;
self.range_requests.insert(id, (chain_id, batch_id));
Ok(id)
}
/// A blocks by range request sent by the backfill sync algorithm
@@ -110,7 +122,7 @@ impl<T: EthSpec> SyncNetworkContext<T> {
peer_id: PeerId,
request: BlocksByRangeRequest,
batch_id: BatchId,
) -> Result<SyncRequestId, &'static str> {
) -> Result<Id, &'static str> {
trace!(
self.log,
"Sending backfill BlocksByRange Request";
@@ -118,21 +130,24 @@ impl<T: EthSpec> SyncNetworkContext<T> {
"count" => request.count,
"peer" => %peer_id,
);
let req_id = self.send_rpc_request(peer_id, Request::BlocksByRange(request))?;
self.range_requests
.insert(req_id, SyncRequestType::BackFillSync(batch_id));
Ok(req_id)
let request = Request::BlocksByRange(request);
let id = self.next_id();
let request_id = RequestId::Sync(SyncRequestId::BackFillSync { id });
self.send_network_msg(NetworkMessage::SendRequest {
peer_id,
request,
request_id,
})?;
self.backfill_requests.insert(id, batch_id);
Ok(id)
}
/// Received a blocks by range response.
pub fn blocks_by_range_response(
pub fn range_sync_response(
&mut self,
request_id: usize,
request_id: Id,
remove: bool,
) -> Option<SyncRequestType> {
// NOTE: we can't guarantee that the request must be registered as it could receive more
// than an error, and be removed after receiving the first one.
// FIXME: https://github.com/sigp/lighthouse/issues/1634
) -> Option<(ChainId, BatchId)> {
if remove {
self.range_requests.remove(&request_id)
} else {
@@ -140,12 +155,21 @@ impl<T: EthSpec> SyncNetworkContext<T> {
}
}
/// Sends a blocks by root request.
pub fn blocks_by_root_request(
/// Received a blocks by range response.
pub fn backfill_sync_response(&mut self, request_id: Id, remove: bool) -> Option<BatchId> {
if remove {
self.backfill_requests.remove(&request_id)
} else {
self.backfill_requests.get(&request_id).cloned()
}
}
/// Sends a blocks by root request for a single block lookup.
pub fn single_block_lookup_request(
&mut self,
peer_id: PeerId,
request: BlocksByRootRequest,
) -> Result<usize, &'static str> {
) -> Result<Id, &'static str> {
trace!(
self.log,
"Sending BlocksByRoot Request";
@@ -153,7 +177,39 @@ impl<T: EthSpec> SyncNetworkContext<T> {
"count" => request.block_roots.len(),
"peer" => %peer_id
);
self.send_rpc_request(peer_id, Request::BlocksByRoot(request))
let request = Request::BlocksByRoot(request);
let id = self.next_id();
let request_id = RequestId::Sync(SyncRequestId::SingleBlock { id });
self.send_network_msg(NetworkMessage::SendRequest {
peer_id,
request,
request_id,
})?;
Ok(id)
}
/// Sends a blocks by root request for a parent request.
pub fn parent_lookup_request(
&mut self,
peer_id: PeerId,
request: BlocksByRootRequest,
) -> Result<Id, &'static str> {
trace!(
self.log,
"Sending BlocksByRoot Request";
"method" => "BlocksByRoot",
"count" => request.block_roots.len(),
"peer" => %peer_id
);
let request = Request::BlocksByRoot(request);
let id = self.next_id();
let request_id = RequestId::Sync(SyncRequestId::ParentLookup { id });
self.send_network_msg(NetworkMessage::SendRequest {
peer_id,
request,
request_id,
})?;
Ok(id)
}
/// Terminates the connection with the peer and bans them.
@@ -184,22 +240,6 @@ impl<T: EthSpec> SyncNetworkContext<T> {
});
}
/// Sends an RPC request.
fn send_rpc_request(
&mut self,
peer_id: PeerId,
request: Request,
) -> Result<usize, &'static str> {
let request_id = self.request_id;
self.request_id += 1;
self.send_network_msg(NetworkMessage::SendRequest {
peer_id,
request_id: RequestId::Sync(request_id),
request,
})?;
Ok(request_id)
}
/// Subscribes to core topics.
pub fn subscribe_core_topics(&mut self) {
self.network_send
@@ -216,4 +256,10 @@ impl<T: EthSpec> SyncNetworkContext<T> {
"Network channel send Failed"
})
}
fn next_id(&mut self) -> Id {
let id = self.request_id;
self.request_id += 1;
id
}
}

View File

@@ -1,4 +1,4 @@
use crate::sync::RequestId;
use crate::sync::manager::Id;
use lighthouse_network::rpc::methods::BlocksByRangeRequest;
use lighthouse_network::PeerId;
use std::collections::HashSet;
@@ -93,7 +93,7 @@ pub enum BatchState<T: EthSpec> {
/// The batch has failed either downloading or processing, but can be requested again.
AwaitingDownload,
/// The batch is being downloaded.
Downloading(PeerId, Vec<SignedBeaconBlock<T>>, RequestId),
Downloading(PeerId, Vec<SignedBeaconBlock<T>>, Id),
/// The batch has been completely downloaded and is ready for processing.
AwaitingProcessing(PeerId, Vec<SignedBeaconBlock<T>>),
/// The batch is being processed.
@@ -167,7 +167,7 @@ impl<T: EthSpec, B: BatchConfig> BatchInfo<T, B> {
}
/// Verifies if an incomming block belongs to this batch.
pub fn is_expecting_block(&self, peer_id: &PeerId, request_id: &RequestId) -> bool {
pub fn is_expecting_block(&self, peer_id: &PeerId, request_id: &Id) -> bool {
if let BatchState::Downloading(expected_peer, _, expected_id) = &self.state {
return peer_id == expected_peer && expected_id == request_id;
}
@@ -312,7 +312,7 @@ impl<T: EthSpec, B: BatchConfig> BatchInfo<T, B> {
pub fn start_downloading_from_peer(
&mut self,
peer: PeerId,
request_id: RequestId,
request_id: Id,
) -> Result<(), WrongState> {
match self.state.poison() {
BatchState::AwaitingDownload => {

View File

@@ -1,7 +1,7 @@
use super::batch::{BatchInfo, BatchState};
use crate::beacon_processor::ProcessId;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::sync::{network_context::SyncNetworkContext, BatchProcessResult, RequestId};
use crate::sync::{manager::Id, network_context::SyncNetworkContext, BatchProcessResult};
use beacon_chain::BeaconChainTypes;
use fnv::FnvHashMap;
use lighthouse_network::{PeerAction, PeerId};
@@ -214,7 +214,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
network: &mut SyncNetworkContext<T::EthSpec>,
batch_id: BatchId,
peer_id: &PeerId,
request_id: RequestId,
request_id: Id,
beacon_block: Option<SignedBeaconBlock<T::EthSpec>>,
) -> ProcessingResult {
// check if we have this batch
@@ -807,7 +807,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
network: &mut SyncNetworkContext<T::EthSpec>,
batch_id: BatchId,
peer_id: &PeerId,
request_id: RequestId,
request_id: Id,
) -> ProcessingResult {
if let Some(batch) = self.batches.get_mut(&batch_id) {
// A batch could be retried without the peer failing the request (disconnecting/

View File

@@ -45,8 +45,9 @@ use super::chain_collection::ChainCollection;
use super::sync_type::RangeSyncType;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::status::ToStatusMessage;
use crate::sync::manager::Id;
use crate::sync::network_context::SyncNetworkContext;
use crate::sync::{BatchProcessResult, RequestId};
use crate::sync::BatchProcessResult;
use beacon_chain::{BeaconChain, BeaconChainTypes};
use lighthouse_network::PeerId;
use lighthouse_network::SyncInfo;
@@ -201,7 +202,7 @@ where
peer_id: PeerId,
chain_id: ChainId,
batch_id: BatchId,
request_id: RequestId,
request_id: Id,
beacon_block: Option<SignedBeaconBlock<T::EthSpec>>,
) {
// check if this chunk removes the chain
@@ -300,7 +301,7 @@ where
peer_id: PeerId,
batch_id: BatchId,
chain_id: ChainId,
request_id: RequestId,
request_id: Id,
) {
// check that this request is pending
match self.chains.call_by_id(chain_id, |chain| {
@@ -364,6 +365,7 @@ where
#[cfg(test)]
mod tests {
use crate::service::RequestId;
use crate::NetworkMessage;
use super::*;
@@ -494,10 +496,7 @@ mod tests {
}
/// Reads an BlocksByRange request to a given peer from the network receiver channel.
fn grab_request(
&mut self,
expected_peer: &PeerId,
) -> (lighthouse_network::rpc::RequestId, BlocksByRangeRequest) {
fn grab_request(&mut self, expected_peer: &PeerId) -> (RequestId, BlocksByRangeRequest) {
if let Some(NetworkMessage::SendRequest {
peer_id,
request: Request::BlocksByRange(request),