Fix head tracker concurrency bugs (#1771)

## Issue Addressed

Closes #1557

## Proposed Changes

Modify the pruning algorithm so that it mutates the head-tracker _before_ committing the database transaction to disk, and _only if_ all the heads to be removed are still present in the head-tracker (i.e. no concurrent mutations).

In the process of writing and testing this I also had to make a few other changes:

* Use internal mutability for all `BeaconChainHarness` functions (namely the RNG and the graffiti), in order to enable parallel calls (see testing section below).
* Disable logging in harness tests unless the `test_logger` feature is turned on

And chose to make some clean-ups:

* Delete the `NullMigrator`
* Remove type-based configuration for the migrator in favour of runtime config (simpler, less duplicated code)
* Use the non-blocking migrator unless the blocking migrator is required. In the store tests we need the blocking migrator because some tests make asserts about the state of the DB after the migration has run.
* Rename `validators_keypairs` -> `validator_keypairs` in the `BeaconChainHarness`

## Testing

To confirm that the fix worked, I wrote a test using [Hiatus](https://crates.io/crates/hiatus), which can be found here:

https://github.com/michaelsproul/lighthouse/tree/hiatus-issue-1557

That test can't be merged because it inserts random breakpoints everywhere, but if you check out that branch you can run the test with:

```
$ cd beacon_node/beacon_chain
$ cargo test --release --test parallel_tests --features test_logger
```

It should pass, and the log output should show:

```
WARN Pruning deferred because of a concurrent mutation, message: this is expected only very rarely!
```

## Additional Info

This is a backwards-compatible change with no impact on consensus.
This commit is contained in:
Michael Sproul
2020-10-19 05:58:39 +00:00
parent 6ba997b88e
commit 703c33bdc7
25 changed files with 538 additions and 729 deletions

View File

@@ -4,7 +4,7 @@ use crate::beacon_chain::{
use crate::eth1_chain::{CachingEth1Backend, SszEth1};
use crate::events::NullEventHandler;
use crate::head_tracker::HeadTracker;
use crate::migrate::Migrate;
use crate::migrate::{BackgroundMigrator, MigratorConfig};
use crate::persisted_beacon_chain::PersistedBeaconChain;
use crate::persisted_fork_choice::PersistedForkChoice;
use crate::shuffling_cache::ShufflingCache;
@@ -21,7 +21,7 @@ use fork_choice::ForkChoice;
use futures::channel::mpsc::Sender;
use operation_pool::{OperationPool, PersistedOperationPool};
use parking_lot::RwLock;
use slog::{info, Logger};
use slog::{crit, info, Logger};
use slot_clock::{SlotClock, TestingSlotClock};
use std::marker::PhantomData;
use std::path::PathBuf;
@@ -37,17 +37,8 @@ pub const PUBKEY_CACHE_FILENAME: &str = "pubkey_cache.ssz";
/// An empty struct used to "witness" all the `BeaconChainTypes` traits. It has no user-facing
/// functionality and only exists to satisfy the type system.
pub struct Witness<
TStoreMigrator,
TSlotClock,
TEth1Backend,
TEthSpec,
TEventHandler,
THotStore,
TColdStore,
>(
pub struct Witness<TSlotClock, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>(
PhantomData<(
TStoreMigrator,
TSlotClock,
TEth1Backend,
TEthSpec,
@@ -57,21 +48,11 @@ pub struct Witness<
)>,
);
impl<TStoreMigrator, TSlotClock, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>
BeaconChainTypes
for Witness<
TStoreMigrator,
TSlotClock,
TEth1Backend,
TEthSpec,
TEventHandler,
THotStore,
TColdStore,
>
impl<TSlotClock, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore> BeaconChainTypes
for Witness<TSlotClock, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>
where
THotStore: ItemStore<TEthSpec> + 'static,
TColdStore: ItemStore<TEthSpec> + 'static,
TStoreMigrator: Migrate<TEthSpec, THotStore, TColdStore> + 'static,
TSlotClock: SlotClock + 'static,
TEth1Backend: Eth1ChainBackend<TEthSpec> + 'static,
TEthSpec: EthSpec + 'static,
@@ -79,7 +60,6 @@ where
{
type HotStore = THotStore;
type ColdStore = TColdStore;
type StoreMigrator = TStoreMigrator;
type SlotClock = TSlotClock;
type Eth1Chain = TEth1Backend;
type EthSpec = TEthSpec;
@@ -97,7 +77,7 @@ where
pub struct BeaconChainBuilder<T: BeaconChainTypes> {
#[allow(clippy::type_complexity)]
store: Option<Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>>,
store_migrator: Option<T::StoreMigrator>,
store_migrator_config: Option<MigratorConfig>,
pub genesis_time: Option<u64>,
genesis_block_root: Option<Hash256>,
#[allow(clippy::type_complexity)]
@@ -120,22 +100,13 @@ pub struct BeaconChainBuilder<T: BeaconChainTypes> {
graffiti: Graffiti,
}
impl<TStoreMigrator, TSlotClock, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>
impl<TSlotClock, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>
BeaconChainBuilder<
Witness<
TStoreMigrator,
TSlotClock,
TEth1Backend,
TEthSpec,
TEventHandler,
THotStore,
TColdStore,
>,
Witness<TSlotClock, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>,
>
where
THotStore: ItemStore<TEthSpec> + 'static,
TColdStore: ItemStore<TEthSpec> + 'static,
TStoreMigrator: Migrate<TEthSpec, THotStore, TColdStore> + 'static,
TSlotClock: SlotClock + 'static,
TEth1Backend: Eth1ChainBackend<TEthSpec> + 'static,
TEthSpec: EthSpec + 'static,
@@ -148,7 +119,7 @@ where
pub fn new(_eth_spec_instance: TEthSpec) -> Self {
Self {
store: None,
store_migrator: None,
store_migrator_config: None,
genesis_time: None,
genesis_block_root: None,
fork_choice: None,
@@ -195,9 +166,9 @@ where
self
}
/// Sets the store migrator.
pub fn store_migrator(mut self, store_migrator: TStoreMigrator) -> Self {
self.store_migrator = Some(store_migrator);
/// Sets the store migrator config (optional).
pub fn store_migrator_config(mut self, config: MigratorConfig) -> Self {
self.store_migrator_config = Some(config);
self
}
@@ -448,15 +419,7 @@ where
self,
) -> Result<
BeaconChain<
Witness<
TStoreMigrator,
TSlotClock,
TEth1Backend,
TEthSpec,
TEventHandler,
THotStore,
TColdStore,
>,
Witness<TSlotClock, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>,
>,
String,
> {
@@ -548,13 +511,19 @@ where
.map_err(|e| format!("Unable to init validator pubkey cache: {:?}", e))
})?;
let migrator_config = self.store_migrator_config.unwrap_or_default();
let store_migrator = BackgroundMigrator::new(
store.clone(),
migrator_config,
genesis_block_root,
log.clone(),
);
let beacon_chain = BeaconChain {
spec: self.spec,
config: self.chain_config,
store,
store_migrator: self
.store_migrator
.ok_or_else(|| "Cannot build without store migrator".to_string())?,
store_migrator,
slot_clock,
op_pool: self
.op_pool
@@ -634,10 +603,9 @@ where
}
}
impl<TStoreMigrator, TSlotClock, TEthSpec, TEventHandler, THotStore, TColdStore>
impl<TSlotClock, TEthSpec, TEventHandler, THotStore, TColdStore>
BeaconChainBuilder<
Witness<
TStoreMigrator,
TSlotClock,
CachingEth1Backend<TEthSpec>,
TEthSpec,
@@ -649,7 +617,6 @@ impl<TStoreMigrator, TSlotClock, TEthSpec, TEventHandler, THotStore, TColdStore>
where
THotStore: ItemStore<TEthSpec> + 'static,
TColdStore: ItemStore<TEthSpec> + 'static,
TStoreMigrator: Migrate<TEthSpec, THotStore, TColdStore> + 'static,
TSlotClock: SlotClock + 'static,
TEthSpec: EthSpec + 'static,
TEventHandler: EventHandler<TEthSpec> + 'static,
@@ -675,22 +642,13 @@ where
}
}
impl<TStoreMigrator, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>
impl<TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>
BeaconChainBuilder<
Witness<
TStoreMigrator,
TestingSlotClock,
TEth1Backend,
TEthSpec,
TEventHandler,
THotStore,
TColdStore,
>,
Witness<TestingSlotClock, TEth1Backend, TEthSpec, TEventHandler, THotStore, TColdStore>,
>
where
THotStore: ItemStore<TEthSpec> + 'static,
TColdStore: ItemStore<TEthSpec> + 'static,
TStoreMigrator: Migrate<TEthSpec, THotStore, TColdStore> + 'static,
TEth1Backend: Eth1ChainBackend<TEthSpec> + 'static,
TEthSpec: EthSpec + 'static,
TEventHandler: EventHandler<TEthSpec> + 'static,
@@ -713,10 +671,9 @@ where
}
}
impl<TStoreMigrator, TSlotClock, TEth1Backend, TEthSpec, THotStore, TColdStore>
impl<TSlotClock, TEth1Backend, TEthSpec, THotStore, TColdStore>
BeaconChainBuilder<
Witness<
TStoreMigrator,
TSlotClock,
TEth1Backend,
TEthSpec,
@@ -728,7 +685,6 @@ impl<TStoreMigrator, TSlotClock, TEth1Backend, TEthSpec, THotStore, TColdStore>
where
THotStore: ItemStore<TEthSpec> + 'static,
TColdStore: ItemStore<TEthSpec> + 'static,
TStoreMigrator: Migrate<TEthSpec, THotStore, TColdStore> + 'static,
TSlotClock: SlotClock + 'static,
TEth1Backend: Eth1ChainBackend<TEthSpec> + 'static,
TEthSpec: EthSpec + 'static,
@@ -760,7 +716,6 @@ fn genesis_block<T: EthSpec>(
#[cfg(test)]
mod test {
use super::*;
use crate::migrate::NullMigrator;
use eth2_hashing::hash;
use genesis::{generate_deterministic_keypairs, interop_genesis_state};
use sloggers::{null::NullLoggerBuilder, Build};
@@ -805,7 +760,6 @@ mod test {
let chain = BeaconChainBuilder::new(MinimalEthSpec)
.logger(log.clone())
.store(Arc::new(store))
.store_migrator(NullMigrator)
.data_dir(data_dir.path().to_path_buf())
.genesis_state(genesis_state)
.expect("should build state using recent genesis")