Race condition fix + Reliability improvements around forks pruning (#1132)

* Improve error handling in block iteration

* Introduce atomic DB operations

* Fix race condition

An invariant was violated:  For every block hash in head_tracker, that
block is accessible from the store.
This commit is contained in:
Adam Szkoda
2020-05-16 05:23:32 +02:00
committed by GitHub
parent 1cb274008d
commit 59ead67f76
9 changed files with 163 additions and 52 deletions

View File

@@ -1482,7 +1482,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
metrics::stop_timer(fork_choice_register_timer);
self.head_tracker.register_block(block_root, &block);
metrics::observe(
&metrics::OPERATIONS_PER_BLOCK_ATTESTATION,
block.body.attestations.len() as f64,
@@ -1503,6 +1502,9 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
self.store.put_state(&block.state_root, &state)?;
self.store.put_block(&block_root, signed_block.clone())?;
let parent_root = block.parent_root;
let slot = block.slot;
self.snapshot_cache
.try_write_for(BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT)
.map(|mut snapshot_cache| {
@@ -1522,6 +1524,9 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
);
});
self.head_tracker
.register_block(block_root, parent_root, slot);
metrics::stop_timer(db_write_timer);
metrics::inc_counter(&metrics::BLOCK_PROCESSING_SUCCESSES);
@@ -2007,9 +2012,8 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
};
for (head_hash, _head_slot) in heads {
for (block_hash, signed_beacon_block) in
ParentRootBlockIterator::new(&*self.store, head_hash)
{
for maybe_pair in ParentRootBlockIterator::new(&*self.store, head_hash) {
let (block_hash, signed_beacon_block) = maybe_pair.unwrap();
if visited.contains(&block_hash) {
break;
}

View File

@@ -2,7 +2,7 @@ use parking_lot::RwLock;
use ssz_derive::{Decode, Encode};
use std::collections::HashMap;
use std::iter::FromIterator;
use types::{BeaconBlock, EthSpec, Hash256, Slot};
use types::{Hash256, Slot};
#[derive(Debug, PartialEq)]
pub enum Error {
@@ -23,10 +23,10 @@ impl HeadTracker {
/// This function assumes that no block is imported without its parent having already been
/// imported. It cannot detect an error if this is not the case, it is the responsibility of
/// the upstream user.
pub fn register_block<E: EthSpec>(&self, block_root: Hash256, block: &BeaconBlock<E>) {
pub fn register_block(&self, block_root: Hash256, parent_root: Hash256, slot: Slot) {
let mut map = self.0.write();
map.remove(&block.parent_root);
map.insert(block_root, block.slot);
map.remove(&parent_root);
map.insert(block_root, slot);
}
/// Removes abandoned head.
@@ -107,7 +107,7 @@ pub struct SszHeadTracker {
mod test {
use super::*;
use ssz::{Decode, Encode};
use types::MainnetEthSpec;
use types::{BeaconBlock, EthSpec, MainnetEthSpec};
type E = MainnetEthSpec;
@@ -118,7 +118,7 @@ mod test {
let head_tracker = HeadTracker::default();
for i in 0..16 {
let mut block = BeaconBlock::empty(spec);
let mut block: BeaconBlock<E> = BeaconBlock::empty(spec);
let block_root = Hash256::from_low_u64_be(i);
block.slot = Slot::new(i);
@@ -128,7 +128,7 @@ mod test {
Hash256::from_low_u64_be(i - 1)
};
head_tracker.register_block::<E>(block_root, &block);
head_tracker.register_block(block_root, block.parent_root, block.slot);
}
assert_eq!(
@@ -137,11 +137,11 @@ mod test {
"should only have one head"
);
let mut block = BeaconBlock::empty(spec);
let mut block: BeaconBlock<E> = BeaconBlock::empty(spec);
let block_root = Hash256::from_low_u64_be(42);
block.slot = Slot::new(15);
block.parent_root = Hash256::from_low_u64_be(14);
head_tracker.register_block::<E>(block_root, &block);
head_tracker.register_block(block_root, block.parent_root, block.slot);
let heads = head_tracker.heads();

View File

@@ -3,13 +3,12 @@ use crate::head_tracker::HeadTracker;
use parking_lot::Mutex;
use slog::{debug, warn, Logger};
use std::collections::{HashMap, HashSet};
use std::iter::FromIterator;
use std::mem;
use std::sync::mpsc;
use std::sync::Arc;
use std::thread;
use store::iter::{ParentRootBlockIterator, RootsIterator};
use store::{hot_cold_store::HotColdDBError, Error, SimpleDiskStore, Store};
use store::{hot_cold_store::HotColdDBError, Error, SimpleDiskStore, Store, StoreOp};
pub use store::{DiskStore, MemoryStore};
use types::*;
use types::{BeaconState, EthSpec, Hash256, Slot};
@@ -49,18 +48,21 @@ pub trait Migrate<S: Store<E>, E: EthSpec>: Send + Sync + 'static {
// Collect hashes from new_finalized_block back to old_finalized_block (inclusive)
let mut found_block = false; // hack for `take_until`
let newly_finalized_blocks: HashMap<SignedBeaconBlockHash, Slot> = HashMap::from_iter(
let newly_finalized_blocks: HashMap<SignedBeaconBlockHash, Slot> =
ParentRootBlockIterator::new(&*store, new_finalized_block_hash.into())
.take_while(|(block_hash, _)| {
if found_block {
false
} else {
found_block |= *block_hash == old_finalized_block_hash.into();
true
.take_while(|result| match result {
Ok((block_hash, _)) => {
if found_block {
false
} else {
found_block |= *block_hash == old_finalized_block_hash.into();
true
}
}
Err(_) => true,
})
.map(|(block_hash, block)| (block_hash.into(), block.slot())),
);
.map(|result| result.map(|(block_hash, block)| (block_hash.into(), block.slot())))
.collect::<Result<_, _>>()?;
// We don't know which blocks are shared among abandoned chains, so we buffer and delete
// everything in one fell swoop.
@@ -141,14 +143,16 @@ pub trait Migrate<S: Store<E>, E: EthSpec>: Send + Sync + 'static {
}
}
// XXX Should be performed atomically, see
// https://github.com/sigp/lighthouse/issues/692
for block_hash in abandoned_blocks.into_iter() {
store.delete_block(&block_hash.into())?;
}
for (slot, state_hash) in abandoned_states.into_iter() {
store.delete_state(&state_hash.into(), slot)?;
}
let batch: Vec<StoreOp> = abandoned_blocks
.into_iter()
.map(|block_hash| StoreOp::DeleteBlock(block_hash))
.chain(
abandoned_states
.into_iter()
.map(|(slot, state_hash)| StoreOp::DeleteState(state_hash, slot)),
)
.collect();
store.do_atomically(&batch)?;
for head_hash in abandoned_heads.into_iter() {
head_tracker.remove_head(head_hash);
}