Fix syncing bugs by recursively attempting to process parents in the … (#429)

* Fix syncing bugs by recursively attempting to process parents in the import queue, change BlockRootsIterator

* Swap from crossbeam channel to tokio mpsc

* Recursion fix

* Remove exess block processing

* Fix network lag, correct attestation topic

* Correct network poll logic

* Overhaul of SimpleSync and modify BlockRootsIterator to return start_slot

* Fix bug in tests relating to StateRootsIterator

* Remove old, commented-out heartbeat code.

* Tidy docs on import queue enum

* Change source logging msg in simple sync

* Rename function parameter in simple sync

* Use `BestBlockRootsIterator` in `reduced_tree`

* Update comments for `BestBlockRootsIterator`

* Fix duplicate dep in cargo.toml
This commit is contained in:
Kirk Baird
2019-07-16 17:28:15 +10:00
committed by Paul Hauner
parent 88c6d15c32
commit 0513559252
21 changed files with 515 additions and 252 deletions

View File

@@ -41,31 +41,23 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
}
}
/// Completes all possible partials into `BeaconBlock` and returns them, sorted by increasing
/// slot number. Does not delete the partials from the queue, this must be done manually.
///
/// Returns `(queue_index, block, sender)`:
///
/// - `block_root`: may be used to remove the entry if it is successfully processed.
/// - `block`: the completed block.
/// - `sender`: the `PeerId` the provided the `BeaconBlockBody` which completed the partial.
pub fn complete_blocks(&self) -> Vec<(Hash256, BeaconBlock, PeerId)> {
let mut complete: Vec<(Hash256, BeaconBlock, PeerId)> = self
.partials
.iter()
.filter_map(|(_, partial)| partial.clone().complete())
.collect();
// Sort the completable partials to be in ascending slot order.
complete.sort_unstable_by(|a, b| a.1.slot.partial_cmp(&b.1.slot).unwrap());
complete
}
/// Returns true of the if the `BlockRoot` is found in the `import_queue`.
pub fn contains_block_root(&self, block_root: Hash256) -> bool {
self.partials.contains_key(&block_root)
}
/// Attempts to complete the `BlockRoot` if it is found in the `import_queue`.
///
/// Returns an Enum with a `PartialBeaconBlockCompletion`.
/// Does not remove the `block_root` from the `import_queue`.
pub fn attempt_complete_block(&self, block_root: Hash256) -> PartialBeaconBlockCompletion {
if let Some(partial) = self.partials.get(&block_root) {
partial.attempt_complete()
} else {
PartialBeaconBlockCompletion::MissingRoot
}
}
/// Removes the first `PartialBeaconBlock` with a matching `block_root`, returning the partial
/// if it exists.
pub fn remove(&mut self, block_root: Hash256) -> Option<PartialBeaconBlock> {
@@ -102,6 +94,8 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
block_roots: &[BlockRootSlot],
sender: PeerId,
) -> Vec<BlockRootSlot> {
// TODO: This will currently not return a `BlockRootSlot` if this root exists but there is no header.
// It would be more robust if it did.
let new_block_root_slots: Vec<BlockRootSlot> = block_roots
.iter()
// Ignore any roots already stored in the queue.
@@ -135,12 +129,8 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
/// the queue and it's block root is included in the output.
///
/// If a `header` is already in the queue, but not yet processed by the chain the block root is
/// included in the output and the `inserted` time for the partial record is set to
/// not included in the output and the `inserted` time for the partial record is set to
/// `Instant::now()`. Updating the `inserted` time stops the partial from becoming stale.
///
/// Presently the queue enforces that a `BeaconBlockHeader` _must_ be received before its
/// `BeaconBlockBody`. This is not a natural requirement and we could enhance the queue to lift
/// this restraint.
pub fn enqueue_headers(
&mut self,
headers: Vec<BeaconBlockHeader>,
@@ -152,8 +142,10 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
let block_root = Hash256::from_slice(&header.canonical_root()[..]);
if self.chain_has_not_seen_block(&block_root) {
self.insert_header(block_root, header, sender.clone());
required_bodies.push(block_root);
if !self.insert_header(block_root, header, sender.clone()) {
// If a body is empty
required_bodies.push(block_root);
}
}
}
@@ -163,10 +155,17 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
/// If there is a matching `header` for this `body`, adds it to the queue.
///
/// If there is no `header` for the `body`, the body is simply discarded.
pub fn enqueue_bodies(&mut self, bodies: Vec<BeaconBlockBody>, sender: PeerId) {
pub fn enqueue_bodies(
&mut self,
bodies: Vec<BeaconBlockBody>,
sender: PeerId,
) -> Option<Hash256> {
let mut last_block_hash = None;
for body in bodies {
self.insert_body(body, sender.clone());
last_block_hash = self.insert_body(body, sender.clone());
}
last_block_hash
}
pub fn enqueue_full_blocks(&mut self, blocks: Vec<BeaconBlock>, sender: PeerId) {
@@ -179,12 +178,22 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
///
/// If the header already exists, the `inserted` time is set to `now` and not other
/// modifications are made.
fn insert_header(&mut self, block_root: Hash256, header: BeaconBlockHeader, sender: PeerId) {
/// Returns true is `body` exists.
fn insert_header(
&mut self,
block_root: Hash256,
header: BeaconBlockHeader,
sender: PeerId,
) -> bool {
let mut exists = false;
self.partials
.entry(block_root)
.and_modify(|partial| {
partial.header = Some(header.clone());
partial.inserted = Instant::now();
if partial.body.is_some() {
exists = true;
}
})
.or_insert_with(|| PartialBeaconBlock {
slot: header.slot,
@@ -194,28 +203,30 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
inserted: Instant::now(),
sender,
});
exists
}
/// Updates an existing partial with the `body`.
///
/// If there is no header for the `body`, the body is simply discarded.
///
/// If the body already existed, the `inserted` time is set to `now`.
fn insert_body(&mut self, body: BeaconBlockBody, sender: PeerId) {
///
/// Returns the block hash of the inserted body
fn insert_body(&mut self, body: BeaconBlockBody, sender: PeerId) -> Option<Hash256> {
let body_root = Hash256::from_slice(&body.tree_hash_root()[..]);
let mut last_root = None;
self.partials.iter_mut().for_each(|(_, mut p)| {
self.partials.iter_mut().for_each(|(root, mut p)| {
if let Some(header) = &mut p.header {
if body_root == header.block_body_root {
p.inserted = Instant::now();
if p.body.is_none() {
p.body = Some(body.clone());
p.sender = sender.clone();
}
p.body = Some(body.clone());
p.sender = sender.clone();
last_root = Some(*root);
}
}
});
last_root
}
/// Updates an existing `partial` with the completed block, or adds a new (complete) partial.
@@ -257,13 +268,33 @@ pub struct PartialBeaconBlock {
}
impl PartialBeaconBlock {
/// Consumes `self` and returns a full built `BeaconBlock`, it's root and the `sender`
/// `PeerId`, if enough information exists to complete the block. Otherwise, returns `None`.
pub fn complete(self) -> Option<(Hash256, BeaconBlock, PeerId)> {
Some((
self.block_root,
self.header?.into_block(self.body?),
self.sender,
))
/// Attempts to build a block.
///
/// Does not comsume the `PartialBeaconBlock`.
pub fn attempt_complete(&self) -> PartialBeaconBlockCompletion {
if self.header.is_none() {
PartialBeaconBlockCompletion::MissingHeader(self.slot)
} else if self.body.is_none() {
PartialBeaconBlockCompletion::MissingBody
} else {
PartialBeaconBlockCompletion::Complete(
self.header
.clone()
.unwrap()
.into_block(self.body.clone().unwrap()),
)
}
}
}
/// The result of trying to convert a `BeaconBlock` into a `PartialBeaconBlock`.
pub enum PartialBeaconBlockCompletion {
/// The partial contains a valid BeaconBlock.
Complete(BeaconBlock),
/// The partial does not exist.
MissingRoot,
/// The partial contains a `BeaconBlockRoot` but no `BeaconBlockHeader`.
MissingHeader(Slot),
/// The partial contains a `BeaconBlockRoot` and `BeaconBlockHeader` but no `BeaconBlockBody`.
MissingBody,
}

View File

@@ -1,4 +1,4 @@
use super::import_queue::ImportQueue;
use super::import_queue::{ImportQueue, PartialBeaconBlockCompletion};
use crate::message_handler::NetworkContext;
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessingOutcome};
use eth2_libp2p::rpc::methods::*;
@@ -17,7 +17,7 @@ use types::{
const SLOT_IMPORT_TOLERANCE: u64 = 100;
/// The amount of seconds a block (or partial block) may exist in the import queue.
const QUEUE_STALE_SECS: u64 = 6;
const QUEUE_STALE_SECS: u64 = 100;
/// If a block is more than `FUTURE_SLOT_TOLERANCE` slots ahead of our slot clock, we drop it.
/// Otherwise we queue it.
@@ -227,7 +227,12 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
//
// Therefore, there are some blocks between the local finalized epoch and the remote
// head that are worth downloading.
debug!(self.log, "UsefulPeer"; "peer" => format!("{:?}", peer_id));
debug!(
self.log, "UsefulPeer";
"peer" => format!("{:?}", peer_id),
"local_finalized_epoch" => local.latest_finalized_epoch,
"remote_latest_finalized_epoch" => remote.latest_finalized_epoch,
);
let start_slot = local
.latest_finalized_epoch
@@ -238,7 +243,7 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
peer_id,
BeaconBlockRootsRequest {
start_slot,
count: required_slots.into(),
count: required_slots.as_u64(),
},
network,
);
@@ -247,7 +252,7 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
fn root_at_slot(&self, target_slot: Slot) -> Option<Hash256> {
self.chain
.rev_iter_block_roots(target_slot)
.rev_iter_best_block_roots(target_slot)
.take(1)
.find(|(_root, slot)| *slot == target_slot)
.map(|(root, _slot)| root)
@@ -271,8 +276,7 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
let mut roots: Vec<BlockRootSlot> = self
.chain
.rev_iter_block_roots(req.start_slot + req.count)
.skip(1)
.rev_iter_best_block_roots(req.start_slot + req.count)
.take(req.count as usize)
.map(|(block_root, slot)| BlockRootSlot { slot, block_root })
.collect();
@@ -356,7 +360,7 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
BeaconBlockHeadersRequest {
start_root: first.block_root,
start_slot: first.slot,
max_headers: (last.slot - first.slot).as_u64(),
max_headers: (last.slot - first.slot + 1).as_u64(),
skip_slots: 0,
},
network,
@@ -386,7 +390,7 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
// unnecessary block deserialization when `req.skip_slots > 0`.
let mut roots: Vec<Hash256> = self
.chain
.rev_iter_block_roots(req.start_slot + (count - 1))
.rev_iter_best_block_roots(req.start_slot + count)
.take(count as usize)
.map(|(root, _slot)| root)
.collect();
@@ -499,14 +503,26 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
"count" => res.block_bodies.len(),
);
self.import_queue
.enqueue_bodies(res.block_bodies, peer_id.clone());
if !res.block_bodies.is_empty() {
// Import all blocks to queue
let last_root = self
.import_queue
.enqueue_bodies(res.block_bodies, peer_id.clone());
// Attempt to process all recieved bodies by recursively processing the latest block
if let Some(root) = last_root {
match self.attempt_process_partial_block(peer_id, root, network, &"rpc") {
Some(BlockProcessingOutcome::Processed { block_root: _ }) => {
// If processing is successful remove from `import_queue`
self.import_queue.remove(root);
}
_ => {}
}
}
}
// Clear out old entries
self.import_queue.remove_stale();
// Import blocks, if possible.
self.process_import_queue(network);
}
/// Process a gossip message declaring a new block.
@@ -526,31 +542,35 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
match outcome {
BlockProcessingOutcome::Processed { .. } => SHOULD_FORWARD_GOSSIP_BLOCK,
BlockProcessingOutcome::ParentUnknown { parent } => {
// Clean the stale entries from the queue.
self.import_queue.remove_stale();
// Add this block to the queue
self.import_queue
.enqueue_full_blocks(vec![block], peer_id.clone());
trace!(
self.log,
"NewGossipBlock";
.enqueue_full_blocks(vec![block.clone()], peer_id.clone());
debug!(
self.log, "RequestParentBlock";
"parent_root" => format!("{}", parent),
"parent_slot" => block.slot - 1,
"peer" => format!("{:?}", peer_id),
);
// Unless the parent is in the queue, request the parent block from the peer.
//
// It is likely that this is duplicate work, given we already send a hello
// request. However, I believe there are some edge-cases where the hello
// message doesn't suffice, so we perform this request as well.
if !self.import_queue.contains_block_root(parent) {
// Send a hello to learn of the clients best slot so we can then sync the required
// parent(s).
network.send_rpc_request(
peer_id.clone(),
RPCRequest::Hello(hello_message(&self.chain)),
);
}
// Request roots between parent and start of finality from peer.
let start_slot = self
.chain
.head()
.beacon_state
.finalized_epoch
.start_slot(T::EthSpec::slots_per_epoch());
self.request_block_roots(
peer_id,
BeaconBlockRootsRequest {
// Request blocks between `latest_finalized_slot` and the `block`
start_slot,
count: block.slot.as_u64() - start_slot.as_u64(),
},
network,
);
// Clean the stale entries from the queue.
self.import_queue.remove_stale();
SHOULD_FORWARD_GOSSIP_BLOCK
}
@@ -592,40 +612,6 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
}
}
/// Iterate through the `import_queue` and process any complete blocks.
///
/// If a block is successfully processed it is removed from the queue, otherwise it remains in
/// the queue.
pub fn process_import_queue(&mut self, network: &mut NetworkContext) {
let mut successful = 0;
// Loop through all of the complete blocks in the queue.
for (block_root, block, sender) in self.import_queue.complete_blocks() {
let processing_result = self.process_block(sender, block.clone(), network, &"gossip");
let should_dequeue = match processing_result {
Some(BlockProcessingOutcome::ParentUnknown { .. }) => false,
Some(BlockProcessingOutcome::FutureSlot {
present_slot,
block_slot,
}) if present_slot + FUTURE_SLOT_TOLERANCE >= block_slot => false,
_ => true,
};
if processing_result == Some(BlockProcessingOutcome::Processed { block_root }) {
successful += 1;
}
if should_dequeue {
self.import_queue.remove(block_root);
}
}
if successful > 0 {
info!(self.log, "Imported {} blocks", successful)
}
}
/// Request some `BeaconBlockRoots` from the remote peer.
fn request_block_roots(
&mut self,
@@ -700,6 +686,89 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
hello_message(&self.chain)
}
/// Helper function to attempt to process a partial block.
///
/// If the block can be completed recursively call `process_block`
/// else request missing parts.
fn attempt_process_partial_block(
&mut self,
peer_id: PeerId,
block_root: Hash256,
network: &mut NetworkContext,
source: &str,
) -> Option<BlockProcessingOutcome> {
match self.import_queue.attempt_complete_block(block_root) {
PartialBeaconBlockCompletion::MissingBody => {
// Unable to complete the block because the block body is missing.
debug!(
self.log, "RequestParentBody";
"source" => source,
"block_root" => format!("{}", block_root),
"peer" => format!("{:?}", peer_id),
);
// Request the block body from the peer.
self.request_block_bodies(
peer_id,
BeaconBlockBodiesRequest {
block_roots: vec![block_root],
},
network,
);
None
}
PartialBeaconBlockCompletion::MissingHeader(slot) => {
// Unable to complete the block because the block header is missing.
debug!(
self.log, "RequestParentHeader";
"source" => source,
"block_root" => format!("{}", block_root),
"peer" => format!("{:?}", peer_id),
);
// Request the block header from the peer.
self.request_block_headers(
peer_id,
BeaconBlockHeadersRequest {
start_root: block_root,
start_slot: slot,
max_headers: 1,
skip_slots: 0,
},
network,
);
None
}
PartialBeaconBlockCompletion::MissingRoot => {
// The `block_root` is not known to the queue.
debug!(
self.log, "MissingParentRoot";
"source" => source,
"block_root" => format!("{}", block_root),
"peer" => format!("{:?}", peer_id),
);
// Do nothing.
None
}
PartialBeaconBlockCompletion::Complete(block) => {
// The block exists in the queue, attempt to process it
trace!(
self.log, "AttemptProcessParent";
"source" => source,
"block_root" => format!("{}", block_root),
"parent_slot" => block.slot,
"peer" => format!("{:?}", peer_id),
);
self.process_block(peer_id.clone(), block, network, source)
}
}
}
/// Processes the `block` that was received from `peer_id`.
///
/// If the block was submitted to the beacon chain without internal error, `Some(outcome)` is
@@ -726,6 +795,7 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
if let Ok(outcome) = processing_result {
match outcome {
BlockProcessingOutcome::Processed { block_root } => {
// The block was valid and we processed it successfully.
debug!(
self.log, "Imported block from network";
"source" => source,
@@ -735,26 +805,29 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
);
}
BlockProcessingOutcome::ParentUnknown { parent } => {
// The block was valid and we processed it successfully.
debug!(
// The parent has not been processed
trace!(
self.log, "ParentBlockUnknown";
"source" => source,
"parent_root" => format!("{}", parent),
"baby_block_slot" => block.slot,
"peer" => format!("{:?}", peer_id),
);
// Unless the parent is in the queue, request the parent block from the peer.
//
// It is likely that this is duplicate work, given we already send a hello
// request. However, I believe there are some edge-cases where the hello
// message doesn't suffice, so we perform this request as well.
if !self.import_queue.contains_block_root(parent) {
// Send a hello to learn of the clients best slot so we can then sync the require
// parent(s).
network.send_rpc_request(
peer_id.clone(),
RPCRequest::Hello(hello_message(&self.chain)),
);
// If the parent is in the `import_queue` attempt to complete it then process it.
match self.attempt_process_partial_block(peer_id, parent, network, source) {
// If processing parent is sucessful, re-process block and remove parent from queue
Some(BlockProcessingOutcome::Processed { block_root: _ }) => {
self.import_queue.remove(parent);
// Attempt to process `block` again
match self.chain.process_block(block) {
Ok(outcome) => return Some(outcome),
Err(_) => return None,
}
}
// All other cases leave `parent` in `import_queue` and return original outcome.
_ => {}
}
}
BlockProcessingOutcome::FutureSlot {