mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-18 04:13:00 +00:00
The head tracker is a persisted piece of state that must be kept in sync with the fork-choice. It has been a source of pruning issues in the past, so we want to remove it - see https://github.com/sigp/lighthouse/issues/1785 When implementing tree-states in the hot DB we have to change the pruning routine (more details below) so we want to do those changes first in isolation. - see https://github.com/sigp/lighthouse/issues/6580 - If you want to see the full feature of tree-states hot https://github.com/dapplion/lighthouse/pull/39 Closes https://github.com/sigp/lighthouse/issues/1785 **Current DB migration routine** - Locate abandoned heads with head tracker - Use a roots iterator to collect the ancestors of those heads can be pruned - Delete those abandoned blocks / states - Migrate the newly finalized chain to the freezer In summary, it computes what it has to delete and keeps the rest. Then it migrates data to the freezer. If the abandoned forks routine has a bug it can break the freezer migration. **Proposed migration routine (this PR)** - Migrate the newly finalized chain to the freezer - Load all state summaries from disk - From those, just knowing the head and finalized block compute two sets: (1) descendants of finalized (2) newly finalized chain - Iterate all summaries, if a summary does not belong to set (1) or (2), delete This strategy is more sound as it just checks what's there in the hot DB, computes what it has to keep and deletes the rest. Because it does not rely and 3rd pieces of data we can drop the head tracker and pruning checkpoint. Since the DB migration happens **first** now, as long as the computation of the sets to keep is correct we won't have pruning issues.
465 lines
17 KiB
Rust
465 lines
17 KiB
Rust
use itertools::Itertools;
|
|
use std::{
|
|
cmp::Ordering,
|
|
collections::{btree_map::Entry, BTreeMap, HashMap},
|
|
};
|
|
use types::{Hash256, Slot};
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub struct DAGStateSummary {
|
|
pub slot: Slot,
|
|
pub latest_block_root: Hash256,
|
|
pub latest_block_slot: Slot,
|
|
pub previous_state_root: Hash256,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub struct DAGStateSummaryV22 {
|
|
pub slot: Slot,
|
|
pub latest_block_root: Hash256,
|
|
pub block_slot: Slot,
|
|
pub block_parent_root: Hash256,
|
|
}
|
|
|
|
pub struct StateSummariesDAG {
|
|
// state_root -> state_summary
|
|
state_summaries_by_state_root: HashMap<Hash256, DAGStateSummary>,
|
|
// block_root -> state slot -> (state_root, state summary)
|
|
state_summaries_by_block_root: HashMap<Hash256, BTreeMap<Slot, (Hash256, DAGStateSummary)>>,
|
|
// parent_state_root -> Vec<children_state_root>
|
|
// cached value to prevent having to recompute in each recursive call into `descendants_of`
|
|
child_state_roots: HashMap<Hash256, Vec<Hash256>>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum Error {
|
|
DuplicateStateSummary {
|
|
block_root: Hash256,
|
|
existing_state_summary: Box<(Slot, Hash256)>,
|
|
new_state_summary: (Slot, Hash256),
|
|
},
|
|
MissingStateSummary(Hash256),
|
|
MissingStateSummaryByBlockRoot {
|
|
state_root: Hash256,
|
|
latest_block_root: Hash256,
|
|
},
|
|
StateSummariesNotContiguous {
|
|
state_root: Hash256,
|
|
state_slot: Slot,
|
|
latest_block_root: Hash256,
|
|
parent_block_root: Box<Hash256>,
|
|
parent_block_latest_state_summary: Box<Option<(Slot, Hash256)>>,
|
|
},
|
|
MissingChildStateRoot(Hash256),
|
|
RequestedSlotAboveSummary {
|
|
starting_state_root: Hash256,
|
|
ancestor_slot: Slot,
|
|
state_root: Hash256,
|
|
state_slot: Slot,
|
|
},
|
|
RootUnknownPreviousStateRoot(Slot, Hash256),
|
|
RootUnknownAncestorStateRoot {
|
|
starting_state_root: Hash256,
|
|
ancestor_slot: Slot,
|
|
root_state_root: Hash256,
|
|
root_state_slot: Slot,
|
|
},
|
|
}
|
|
|
|
impl StateSummariesDAG {
|
|
pub fn new(state_summaries: Vec<(Hash256, DAGStateSummary)>) -> Result<Self, Error> {
|
|
// Group them by latest block root, and sorted state slot
|
|
let mut state_summaries_by_state_root = HashMap::new();
|
|
let mut state_summaries_by_block_root = HashMap::<_, BTreeMap<_, _>>::new();
|
|
let mut child_state_roots = HashMap::<_, Vec<_>>::new();
|
|
|
|
for (state_root, summary) in state_summaries.into_iter() {
|
|
let summaries = state_summaries_by_block_root
|
|
.entry(summary.latest_block_root)
|
|
.or_default();
|
|
|
|
// Sanity check to ensure no duplicate summaries for the tuple (block_root, state_slot)
|
|
match summaries.entry(summary.slot) {
|
|
Entry::Vacant(entry) => {
|
|
entry.insert((state_root, summary));
|
|
}
|
|
Entry::Occupied(existing) => {
|
|
return Err(Error::DuplicateStateSummary {
|
|
block_root: summary.latest_block_root,
|
|
existing_state_summary: (summary.slot, state_root).into(),
|
|
new_state_summary: (*existing.key(), existing.get().0),
|
|
})
|
|
}
|
|
}
|
|
|
|
state_summaries_by_state_root.insert(state_root, summary);
|
|
|
|
child_state_roots
|
|
.entry(summary.previous_state_root)
|
|
.or_default()
|
|
.push(state_root);
|
|
// Add empty entry for the child state
|
|
child_state_roots.entry(state_root).or_default();
|
|
}
|
|
|
|
Ok(Self {
|
|
state_summaries_by_state_root,
|
|
state_summaries_by_block_root,
|
|
child_state_roots,
|
|
})
|
|
}
|
|
|
|
/// Computes a DAG from a sequence of state summaries, including their parent block
|
|
/// relationships.
|
|
///
|
|
/// - Expects summaries to be contiguous per slot: there must exist a summary at every slot
|
|
/// of each tree branch
|
|
/// - Maybe include multiple disjoint trees. The root of each tree will have a ZERO parent state
|
|
/// root, which will error later when calling `previous_state_root`.
|
|
pub fn new_from_v22(
|
|
state_summaries_v22: Vec<(Hash256, DAGStateSummaryV22)>,
|
|
) -> Result<Self, Error> {
|
|
// Group them by latest block root, and sorted state slot
|
|
let mut state_summaries_by_block_root = HashMap::<_, BTreeMap<_, _>>::new();
|
|
for (state_root, summary) in state_summaries_v22.iter() {
|
|
let summaries = state_summaries_by_block_root
|
|
.entry(summary.latest_block_root)
|
|
.or_default();
|
|
|
|
// Sanity check to ensure no duplicate summaries for the tuple (block_root, state_slot)
|
|
match summaries.entry(summary.slot) {
|
|
Entry::Vacant(entry) => {
|
|
entry.insert((state_root, summary));
|
|
}
|
|
Entry::Occupied(existing) => {
|
|
return Err(Error::DuplicateStateSummary {
|
|
block_root: summary.latest_block_root,
|
|
existing_state_summary: (summary.slot, *state_root).into(),
|
|
new_state_summary: (*existing.key(), *existing.get().0),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
let state_summaries = state_summaries_v22
|
|
.iter()
|
|
.map(|(state_root, summary)| {
|
|
let previous_state_root = if summary.slot == 0 {
|
|
Hash256::ZERO
|
|
} else {
|
|
let previous_slot = summary.slot - 1;
|
|
|
|
// Check the set of states in the same state's block root
|
|
let same_block_root_summaries = state_summaries_by_block_root
|
|
.get(&summary.latest_block_root)
|
|
// Should never error: we construct the HashMap here and must have at least
|
|
// one entry per block root
|
|
.ok_or(Error::MissingStateSummaryByBlockRoot {
|
|
state_root: *state_root,
|
|
latest_block_root: summary.latest_block_root,
|
|
})?;
|
|
if let Some((state_root, _)) = same_block_root_summaries.get(&previous_slot) {
|
|
// Skipped slot: block root at previous slot is the same as latest block root.
|
|
**state_root
|
|
} else {
|
|
// Common case: not a skipped slot.
|
|
let parent_block_root = summary.block_parent_root;
|
|
if let Some(parent_block_summaries) =
|
|
state_summaries_by_block_root.get(&parent_block_root)
|
|
{
|
|
*parent_block_summaries
|
|
.get(&previous_slot)
|
|
// Should never error: summaries are contiguous, so if there's an
|
|
// entry it must contain at least one summary at the previous slot.
|
|
.ok_or(Error::StateSummariesNotContiguous {
|
|
state_root: *state_root,
|
|
state_slot: summary.slot,
|
|
latest_block_root: summary.latest_block_root,
|
|
parent_block_root: parent_block_root.into(),
|
|
parent_block_latest_state_summary: parent_block_summaries
|
|
.iter()
|
|
.max_by(|a, b| a.0.cmp(b.0))
|
|
.map(|(slot, (state_root, _))| (*slot, **state_root))
|
|
.into(),
|
|
})?
|
|
.0
|
|
} else {
|
|
// We don't know of any summary with this parent block root. We'll
|
|
// consider this summary to be a root of `state_summaries_v22`
|
|
// collection and mark it as zero.
|
|
// The test store_tests::finalizes_non_epoch_start_slot manages to send two
|
|
// disjoint trees on its second migration.
|
|
Hash256::ZERO
|
|
}
|
|
}
|
|
};
|
|
|
|
Ok((
|
|
*state_root,
|
|
DAGStateSummary {
|
|
slot: summary.slot,
|
|
latest_block_root: summary.latest_block_root,
|
|
latest_block_slot: summary.block_slot,
|
|
previous_state_root,
|
|
},
|
|
))
|
|
})
|
|
.collect::<Result<Vec<_>, _>>()?;
|
|
|
|
Self::new(state_summaries)
|
|
}
|
|
|
|
// Returns all non-unique latest block roots of a given set of states
|
|
pub fn blocks_of_states<'a, I: Iterator<Item = &'a Hash256>>(
|
|
&self,
|
|
state_roots: I,
|
|
) -> Result<Vec<(Hash256, Slot)>, Error> {
|
|
state_roots
|
|
.map(|state_root| {
|
|
let summary = self
|
|
.state_summaries_by_state_root
|
|
.get(state_root)
|
|
.ok_or(Error::MissingStateSummary(*state_root))?;
|
|
Ok((summary.latest_block_root, summary.latest_block_slot))
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
// Returns all unique latest blocks of this DAG's summaries
|
|
pub fn iter_blocks(&self) -> impl Iterator<Item = (Hash256, Slot)> + '_ {
|
|
self.state_summaries_by_state_root
|
|
.values()
|
|
.map(|summary| (summary.latest_block_root, summary.latest_block_slot))
|
|
.unique()
|
|
}
|
|
|
|
/// Returns a vec of state summaries that have an unknown parent when forming the DAG tree
|
|
pub fn tree_roots(&self) -> Vec<(Hash256, DAGStateSummary)> {
|
|
self.state_summaries_by_state_root
|
|
.iter()
|
|
.filter_map(|(state_root, summary)| {
|
|
if self
|
|
.state_summaries_by_state_root
|
|
.contains_key(&summary.previous_state_root)
|
|
{
|
|
// Summaries with a known parent are not roots
|
|
None
|
|
} else {
|
|
Some((*state_root, *summary))
|
|
}
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
pub fn summaries_count(&self) -> usize {
|
|
self.state_summaries_by_block_root
|
|
.values()
|
|
.map(|s| s.len())
|
|
.sum()
|
|
}
|
|
|
|
pub fn summaries_by_slot_ascending(&self) -> BTreeMap<Slot, Vec<(Hash256, DAGStateSummary)>> {
|
|
let mut summaries = BTreeMap::<Slot, Vec<_>>::new();
|
|
for (state_root, summary) in self.state_summaries_by_state_root.iter() {
|
|
summaries
|
|
.entry(summary.slot)
|
|
.or_default()
|
|
.push((*state_root, *summary));
|
|
}
|
|
summaries
|
|
}
|
|
|
|
pub fn previous_state_root(&self, state_root: Hash256) -> Result<Hash256, Error> {
|
|
let summary = self
|
|
.state_summaries_by_state_root
|
|
.get(&state_root)
|
|
.ok_or(Error::MissingStateSummary(state_root))?;
|
|
if summary.previous_state_root == Hash256::ZERO {
|
|
Err(Error::RootUnknownPreviousStateRoot(
|
|
summary.slot,
|
|
state_root,
|
|
))
|
|
} else {
|
|
Ok(summary.previous_state_root)
|
|
}
|
|
}
|
|
|
|
pub fn ancestor_state_root_at_slot(
|
|
&self,
|
|
starting_state_root: Hash256,
|
|
ancestor_slot: Slot,
|
|
) -> Result<Hash256, Error> {
|
|
let mut state_root = starting_state_root;
|
|
// Walk backwards until we reach the state at `ancestor_slot`.
|
|
loop {
|
|
let summary = self
|
|
.state_summaries_by_state_root
|
|
.get(&state_root)
|
|
.ok_or(Error::MissingStateSummary(state_root))?;
|
|
|
|
// Assumes all summaries are contiguous
|
|
match summary.slot.cmp(&ancestor_slot) {
|
|
Ordering::Less => {
|
|
return Err(Error::RequestedSlotAboveSummary {
|
|
starting_state_root,
|
|
ancestor_slot,
|
|
state_root,
|
|
state_slot: summary.slot,
|
|
})
|
|
}
|
|
Ordering::Equal => {
|
|
return Ok(state_root);
|
|
}
|
|
Ordering::Greater => {
|
|
if summary.previous_state_root == Hash256::ZERO {
|
|
return Err(Error::RootUnknownAncestorStateRoot {
|
|
starting_state_root,
|
|
ancestor_slot,
|
|
root_state_root: state_root,
|
|
root_state_slot: summary.slot,
|
|
});
|
|
} else {
|
|
state_root = summary.previous_state_root;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns all ancestors of `state_root` INCLUDING `state_root` until the next parent is not
|
|
/// known.
|
|
pub fn ancestors_of(&self, mut state_root: Hash256) -> Result<Vec<(Hash256, Slot)>, Error> {
|
|
// Sanity check that the first summary exists
|
|
if !self.state_summaries_by_state_root.contains_key(&state_root) {
|
|
return Err(Error::MissingStateSummary(state_root));
|
|
}
|
|
|
|
let mut ancestors = vec![];
|
|
loop {
|
|
if let Some(summary) = self.state_summaries_by_state_root.get(&state_root) {
|
|
ancestors.push((state_root, summary.slot));
|
|
state_root = summary.previous_state_root
|
|
} else {
|
|
return Ok(ancestors);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns of the descendant state summaries roots given an initiail state root.
|
|
pub fn descendants_of(&self, query_state_root: &Hash256) -> Result<Vec<Hash256>, Error> {
|
|
let mut descendants = vec![];
|
|
for child_root in self
|
|
.child_state_roots
|
|
.get(query_state_root)
|
|
.ok_or(Error::MissingChildStateRoot(*query_state_root))?
|
|
{
|
|
descendants.push(*child_root);
|
|
descendants.extend(self.descendants_of(child_root)?);
|
|
}
|
|
Ok(descendants)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{DAGStateSummaryV22, Error, StateSummariesDAG};
|
|
use bls::FixedBytesExtended;
|
|
use types::{Hash256, Slot};
|
|
|
|
fn root(n: u64) -> Hash256 {
|
|
Hash256::from_low_u64_le(n)
|
|
}
|
|
|
|
#[test]
|
|
fn new_from_v22_empty() {
|
|
StateSummariesDAG::new_from_v22(vec![]).unwrap();
|
|
}
|
|
|
|
fn assert_previous_state_root_is_zero(dag: &StateSummariesDAG, root: Hash256) {
|
|
assert!(matches!(
|
|
dag.previous_state_root(root).unwrap_err(),
|
|
Error::RootUnknownPreviousStateRoot { .. }
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn new_from_v22_one_state() {
|
|
let root_a = root(0xa);
|
|
let root_1 = root(1);
|
|
let root_2 = root(2);
|
|
let summary_1 = DAGStateSummaryV22 {
|
|
slot: Slot::new(1),
|
|
latest_block_root: root_1,
|
|
block_parent_root: root_2,
|
|
block_slot: Slot::new(1),
|
|
};
|
|
|
|
let dag = StateSummariesDAG::new_from_v22(vec![(root_a, summary_1)]).unwrap();
|
|
|
|
// The parent of the root summary is ZERO
|
|
assert_previous_state_root_is_zero(&dag, root_a);
|
|
}
|
|
|
|
#[test]
|
|
fn new_from_v22_multiple_states() {
|
|
let dag = StateSummariesDAG::new_from_v22(vec![
|
|
(
|
|
root(0xa),
|
|
DAGStateSummaryV22 {
|
|
slot: Slot::new(3),
|
|
latest_block_root: root(3),
|
|
block_parent_root: root(1),
|
|
block_slot: Slot::new(3),
|
|
},
|
|
),
|
|
(
|
|
root(0xb),
|
|
DAGStateSummaryV22 {
|
|
slot: Slot::new(4),
|
|
latest_block_root: root(4),
|
|
block_parent_root: root(3),
|
|
block_slot: Slot::new(4),
|
|
},
|
|
),
|
|
// fork 1
|
|
(
|
|
root(0xc),
|
|
DAGStateSummaryV22 {
|
|
slot: Slot::new(5),
|
|
latest_block_root: root(5),
|
|
block_parent_root: root(4),
|
|
block_slot: Slot::new(5),
|
|
},
|
|
),
|
|
// fork 2
|
|
// skipped slot
|
|
(
|
|
root(0xd),
|
|
DAGStateSummaryV22 {
|
|
slot: Slot::new(5),
|
|
latest_block_root: root(4),
|
|
block_parent_root: root(3),
|
|
block_slot: Slot::new(4),
|
|
},
|
|
),
|
|
// normal slot
|
|
(
|
|
root(0xe),
|
|
DAGStateSummaryV22 {
|
|
slot: Slot::new(6),
|
|
latest_block_root: root(6),
|
|
block_parent_root: root(4),
|
|
block_slot: Slot::new(6),
|
|
},
|
|
),
|
|
])
|
|
.unwrap();
|
|
|
|
// The parent of the root summary is ZERO
|
|
assert_previous_state_root_is_zero(&dag, root(0xa));
|
|
assert_eq!(dag.previous_state_root(root(0xc)).unwrap(), root(0xb));
|
|
assert_eq!(dag.previous_state_root(root(0xd)).unwrap(), root(0xb));
|
|
assert_eq!(dag.previous_state_root(root(0xe)).unwrap(), root(0xd));
|
|
}
|
|
}
|