Tree hash cache arena (#836)

* Start adding interop genesis state to lcli

* Use more efficient method to generate genesis state

* Remove duplicate int_to_bytes32

* Add lcli command to change state genesis time

* Add option to allow VC to start with unsynced BN

* Set VC to do parallel key loading

* Don't default to dummy eth1 backend

* Add endpoint to dump operation pool

* Add metrics for op pool

* Remove state clone for slot notifier

* Add mem size approximation for tree hash cache

* Avoid cloning tree hash when getting head

* Avoid cloning tree hash when getting head

* Add working arena-based cached tree hash

* Add another benchmark

* Add pre-allocation for caches

* Make cache nullable

* Fix bugs in cache tree hash

* Add validator tree hash optimization

* Optimize hash_concat

* Make hash32_concat return fixed-len array

* Fix failing API tests

* Add new beacon state cache struct

* Add validator-specific cache

* Separate list and values arenas

* Add parallel validator registry hashing

* Remove MultiTreeHashCache

* Remove cached tree hash macro

* Fix failing tree hash test

* Address Michael's comments

* Add CachedTreeHash impl for ef tests

* Fix messy merge conflict

* Rename cache struct, add comments

* Rename cache struct, add comments

* Remove unnecessary mutability

* Wrap iter in result

* Tidy cached tree hash

* Address Michael comments

* Address more comments

* Use ring::Context
This commit is contained in:
Paul Hauner
2020-02-07 12:42:49 +11:00
committed by GitHub
parent f267bf2afe
commit c3182e3c1c
20 changed files with 1341 additions and 378 deletions

View File

@@ -1,46 +1,71 @@
use crate::cache_arena;
use crate::{Error, Hash256};
use eth2_hashing::{hash_concat, ZERO_HASHES};
use eth2_hashing::{hash32_concat, ZERO_HASHES};
use ssz_derive::{Decode, Encode};
use tree_hash::BYTES_PER_CHUNK;
type CacheArena = cache_arena::CacheArena<Hash256>;
type CacheArenaAllocation = cache_arena::CacheArenaAllocation<Hash256>;
/// Sparse Merkle tree suitable for tree hashing vectors and lists.
#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
pub struct TreeHashCache {
pub initialized: bool,
/// Depth is such that the tree has a capacity for 2^depth leaves
depth: usize,
/// Sparse layers.
///
/// The leaves are contained in `self.layers[self.depth]`, and each other layer `i`
/// contains the parents of the nodes in layer `i + 1`.
layers: Vec<Vec<Hash256>>,
layers: Vec<CacheArenaAllocation>,
}
impl TreeHashCache {
/// Create a new cache with the given `depth`, but no actual content.
pub fn new(depth: usize) -> Self {
/// Create a new cache with the given `depth` with enough nodes allocated to suit `leaves`. All
/// leaves are set to `Hash256::zero()`.
pub fn new(arena: &mut CacheArena, depth: usize, leaves: usize) -> Self {
// TODO: what about when leaves is zero?
let layers = (0..=depth)
.map(|i| {
let vec = arena.alloc();
vec.extend_with_vec(
arena,
vec![Hash256::zero(); nodes_per_layer(i, depth, leaves)],
)
.expect(
"A newly allocated sub-arena cannot fail unless it has reached max capacity",
);
vec
})
.collect();
TreeHashCache {
initialized: false,
depth,
layers: vec![vec![]; depth + 1],
layers,
}
}
/// Compute the updated Merkle root for the given `leaves`.
pub fn recalculate_merkle_root(
&mut self,
arena: &mut CacheArena,
leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
) -> Result<Hash256, Error> {
let dirty_indices = self.update_leaves(leaves)?;
self.update_merkle_root(dirty_indices)
let dirty_indices = self.update_leaves(arena, leaves)?;
self.update_merkle_root(arena, dirty_indices)
}
/// Phase 1 of the algorithm: compute the indices of all dirty leaves.
pub fn update_leaves(
&mut self,
arena: &mut CacheArena,
mut leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
) -> Result<Vec<usize>, Error> {
let new_leaf_count = leaves.len();
if new_leaf_count < self.leaves().len() {
if new_leaf_count < self.leaves().len(arena)? {
return Err(Error::CannotShrink);
} else if new_leaf_count > 2usize.pow(self.depth as u32) {
return Err(Error::TooManyLeaves);
@@ -49,11 +74,11 @@ impl TreeHashCache {
// Update the existing leaves
let mut dirty = self
.leaves()
.iter_mut()
.iter_mut(arena)?
.enumerate()
.zip(&mut leaves)
.flat_map(|((i, leaf), new_leaf)| {
if leaf.as_bytes() != new_leaf {
if !self.initialized || leaf.as_bytes() != new_leaf {
leaf.assign_from_slice(&new_leaf);
Some(i)
} else {
@@ -63,9 +88,9 @@ impl TreeHashCache {
.collect::<Vec<_>>();
// Push the rest of the new leaves (if any)
dirty.extend(self.leaves().len()..new_leaf_count);
dirty.extend(self.leaves().len(arena)?..new_leaf_count);
self.leaves()
.extend(leaves.map(|l| Hash256::from_slice(&l)));
.extend_with_vec(arena, leaves.map(|l| Hash256::from_slice(&l)).collect())?;
Ok(dirty)
}
@@ -73,9 +98,13 @@ impl TreeHashCache {
/// Phase 2: propagate changes upwards from the leaves of the tree, and compute the root.
///
/// Returns an error if `dirty_indices` is inconsistent with the cache.
pub fn update_merkle_root(&mut self, mut dirty_indices: Vec<usize>) -> Result<Hash256, Error> {
pub fn update_merkle_root(
&mut self,
arena: &mut CacheArena,
mut dirty_indices: Vec<usize>,
) -> Result<Hash256, Error> {
if dirty_indices.is_empty() {
return Ok(self.root());
return Ok(self.root(arena));
}
let mut depth = self.depth;
@@ -87,24 +116,26 @@ impl TreeHashCache {
let left_idx = 2 * idx;
let right_idx = left_idx + 1;
let left = self.layers[depth][left_idx];
let left = self.layers[depth]
.get(arena, left_idx)?
.ok_or_else(|| Error::MissingLeftIdx(left_idx))?;
let right = self.layers[depth]
.get(right_idx)
.get(arena, right_idx)?
.copied()
.unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth - depth]));
let new_hash = hash_concat(left.as_bytes(), right.as_bytes());
let new_hash = hash32_concat(left.as_bytes(), right.as_bytes());
match self.layers[depth - 1].get_mut(idx) {
match self.layers[depth - 1].get_mut(arena, idx)? {
Some(hash) => {
hash.assign_from_slice(&new_hash);
}
None => {
// Parent layer should already contain nodes for all non-dirty indices
if idx != self.layers[depth - 1].len() {
if idx != self.layers[depth - 1].len(arena)? {
return Err(Error::CacheInconsistent);
}
self.layers[depth - 1].push(Hash256::from_slice(&new_hash));
self.layers[depth - 1].push(arena, Hash256::from_slice(&new_hash))?;
}
}
}
@@ -113,29 +144,23 @@ impl TreeHashCache {
depth -= 1;
}
Ok(self.root())
self.initialized = true;
Ok(self.root(arena))
}
/// Get the root of this cache, without doing any updates/computation.
pub fn root(&self) -> Hash256 {
pub fn root(&self, arena: &CacheArena) -> Hash256 {
self.layers[0]
.get(0)
.get(arena, 0)
.expect("cached tree should have a root layer")
.copied()
.unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth]))
}
pub fn leaves(&mut self) -> &mut Vec<Hash256> {
pub fn leaves(&mut self) -> &mut CacheArenaAllocation {
&mut self.layers[self.depth]
}
/// Returns the approximate size of the cache in bytes.
///
/// The size is approximate because we ignore some stack-allocated `u64` and `Vec` pointers.
/// We focus instead on the lists of hashes, which should massively outweigh the items that we
/// ignore.
pub fn approx_mem_size(&self) -> usize {
self.layers.iter().map(|layer| layer.len() * 32).sum()
}
}
/// Compute the dirty indices for one layer up.
@@ -144,3 +169,52 @@ fn lift_dirty(dirty_indices: &[usize]) -> Vec<usize> {
new_dirty.dedup();
new_dirty
}
/// Returns the number of nodes that should be at each layer of a tree with the given `depth` and
/// number of `leaves`.
///
/// Note: the top-most layer is `0` and a tree that has 8 leaves (4 layers) has a depth of 3 (_not_
/// a depth of 4).
///
/// ## Example
///
/// Consider the following tree that has `depth = 3` and `leaves = 5`.
///
///```ignore
/// 0 o <-- height 0 has 1 node
/// / \
/// 1 o o <-- height 1 has 2 nodes
/// / \ /
/// 2 o o o <-- height 2 has 3 nodes
/// /\ /\ /
/// 3 o o o o o <-- height 3 have 5 nodes
/// ```
fn nodes_per_layer(layer: usize, depth: usize, leaves: usize) -> usize {
if layer == depth {
leaves
} else {
let leaves_per_node = 1 << (depth - layer);
(leaves + leaves_per_node - 1) / leaves_per_node
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_node_per_layer_unbalanced_tree() {
assert_eq!(nodes_per_layer(0, 3, 5), 1);
assert_eq!(nodes_per_layer(1, 3, 5), 2);
assert_eq!(nodes_per_layer(2, 3, 5), 3);
assert_eq!(nodes_per_layer(3, 3, 5), 5);
}
#[test]
fn test_node_per_layer_balanced_tree() {
assert_eq!(nodes_per_layer(0, 3, 8), 1);
assert_eq!(nodes_per_layer(1, 3, 8), 2);
assert_eq!(nodes_per_layer(2, 3, 8), 4);
assert_eq!(nodes_per_layer(3, 3, 8), 8);
}
}