mirror of
https://github.com/sigp/lighthouse.git
synced 2026-05-08 01:05:47 +00:00
Fix data race in load_hot_state
This was found in the wild on a node that was reconstructing states and had a ~350 epoch gap between finalization and the split slot. When the finalization migration ran it deleted some of the hot state summaries that were still being iterated by `load_hot_state`, causing state storage to fail and the database to become corrupt (due to the atomicity bug from `unstable`). This commit additonally adds states created from diffs to the cache. This should help avoid iterating back so far but is strictly a performance improvement and *not* a correctness fix.
This commit is contained in:
@@ -1015,6 +1015,11 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Take a read lock on the split point while we load data from prior states. We need
|
||||||
|
// to prevent the finalization migration from deleting the state summaries and state diffs
|
||||||
|
// that we are iterating back through.
|
||||||
|
let split_read_lock = self.split.read_recursive();
|
||||||
|
|
||||||
// Backtrack until we reach a state that is in the cache, or in the worst case
|
// Backtrack until we reach a state that is in the cache, or in the worst case
|
||||||
// the finalized state (this should only be reachable on first start-up).
|
// the finalized state (this should only be reachable on first start-up).
|
||||||
let state_summary_iter = HotStateRootIter::new(self, target_slot, *state_root);
|
let state_summary_iter = HotStateRootIter::new(self, target_slot, *state_root);
|
||||||
@@ -1052,7 +1057,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
|
|||||||
|
|
||||||
// If the prior state is the split state and it isn't cached then load it in
|
// If the prior state is the split state and it isn't cached then load it in
|
||||||
// entirety from disk. This should only happen on first start up.
|
// entirety from disk. This should only happen on first start up.
|
||||||
if prior_state_root == self.get_split_info().state_root {
|
if prior_state_root == split_read_lock.state_root {
|
||||||
debug!(
|
debug!(
|
||||||
self.log,
|
self.log,
|
||||||
"Using split state as base state for replay";
|
"Using split state as base state for replay";
|
||||||
@@ -1075,6 +1080,9 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
|
|||||||
|
|
||||||
let (_, mut state) = base_state.ok_or(Error::NoBaseStateFound(*state_root))?;
|
let (_, mut state) = base_state.ok_or(Error::NoBaseStateFound(*state_root))?;
|
||||||
|
|
||||||
|
// Finished reading information about prior states, allow the split point to update.
|
||||||
|
drop(split_read_lock);
|
||||||
|
|
||||||
// Construct a mutable iterator for the state roots, which will be iterated through
|
// Construct a mutable iterator for the state roots, which will be iterated through
|
||||||
// consecutive calls to `replay_blocks`.
|
// consecutive calls to `replay_blocks`.
|
||||||
let mut state_roots_iter = state_roots.into_iter();
|
let mut state_roots_iter = state_roots.into_iter();
|
||||||
@@ -1182,6 +1190,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
|
|||||||
|
|
||||||
// Apply state diff. Block replay should have ensured that the diff is now applicable.
|
// Apply state diff. Block replay should have ensured that the diff is now applicable.
|
||||||
if let Some((summary, to_root, diff)) = next_state_diff {
|
if let Some((summary, to_root, diff)) = next_state_diff {
|
||||||
|
let block_root = summary.latest_block_root;
|
||||||
debug!(
|
debug!(
|
||||||
self.log,
|
self.log,
|
||||||
"Applying state diff";
|
"Applying state diff";
|
||||||
@@ -1189,6 +1198,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
|
|||||||
"from_slot" => summary.diff_base_slot,
|
"from_slot" => summary.diff_base_slot,
|
||||||
"to_root" => ?to_root,
|
"to_root" => ?to_root,
|
||||||
"to_slot" => summary.slot,
|
"to_slot" => summary.slot,
|
||||||
|
"block_root" => ?block_root,
|
||||||
);
|
);
|
||||||
debug_assert_eq!(summary.diff_base_slot, state.slot());
|
debug_assert_eq!(summary.diff_base_slot, state.slot());
|
||||||
|
|
||||||
@@ -1196,6 +1206,12 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
|
|||||||
|
|
||||||
state.update_tree_hash_cache()?;
|
state.update_tree_hash_cache()?;
|
||||||
state.build_all_caches(&self.spec)?;
|
state.build_all_caches(&self.spec)?;
|
||||||
|
|
||||||
|
// Add state to the cache, it is by definition an epoch boundary state and likely
|
||||||
|
// to be useful.
|
||||||
|
self.state_cache
|
||||||
|
.lock()
|
||||||
|
.put_state(to_root, block_root, &state)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user