Tree hash cache arena (#836)

* Start adding interop genesis state to lcli * Use more efficient method to generate genesis state * Remove duplicate int_to_bytes32 * Add lcli command to change state genesis time * Add option to allow VC to start with unsynced BN * Set VC to do parallel key loading * Don't default to dummy eth1 backend * Add endpoint to dump operation pool * Add metrics for op pool * Remove state clone for slot notifier * Add mem size approximation for tree hash cache * Avoid cloning tree hash when getting head * Avoid cloning tree hash when getting head * Add working arena-based cached tree hash * Add another benchmark * Add pre-allocation for caches * Make cache nullable * Fix bugs in cache tree hash * Add validator tree hash optimization * Optimize hash_concat * Make hash32_concat return fixed-len array * Fix failing API tests * Add new beacon state cache struct * Add validator-specific cache * Separate list and values arenas * Add parallel validator registry hashing * Remove MultiTreeHashCache * Remove cached tree hash macro * Fix failing tree hash test * Address Michael's comments * Add CachedTreeHash impl for ef tests * Fix messy merge conflict * Rename cache struct, add comments * Rename cache struct, add comments * Remove unnecessary mutability * Wrap iter in result * Tidy cached tree hash * Address Michael comments * Address more comments * Use ring::Context
2026-06-17 10:48:28 +00:00 · 2020-02-07 12:42:49 +11:00
parent f267bf2afe
commit c3182e3c1c
20 changed files with 1341 additions and 378 deletions
--- a/eth2/utils/cached_tree_hash/src/cache.rs
+++ b/eth2/utils/cached_tree_hash/src/cache.rs
@@ -1,46 +1,71 @@
+use crate::cache_arena;
 use crate::{Error, Hash256};
-use eth2_hashing::{hash_concat, ZERO_HASHES};
+use eth2_hashing::{hash32_concat, ZERO_HASHES};
 use ssz_derive::{Decode, Encode};
 use tree_hash::BYTES_PER_CHUNK;

+type CacheArena = cache_arena::CacheArena<Hash256>;
+type CacheArenaAllocation = cache_arena::CacheArenaAllocation<Hash256>;
+
 /// Sparse Merkle tree suitable for tree hashing vectors and lists.
 #[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
 pub struct TreeHashCache {
+    pub initialized: bool,
    /// Depth is such that the tree has a capacity for 2^depth leaves
    depth: usize,
    /// Sparse layers.
    ///
    /// The leaves are contained in `self.layers[self.depth]`, and each other layer `i`
    /// contains the parents of the nodes in layer `i + 1`.
-    layers: Vec<Vec<Hash256>>,
+    layers: Vec<CacheArenaAllocation>,
 }

 impl TreeHashCache {
-    /// Create a new cache with the given `depth`, but no actual content.
-    pub fn new(depth: usize) -> Self {
+    /// Create a new cache with the given `depth` with enough nodes allocated to suit `leaves`. All
+    /// leaves are set to `Hash256::zero()`.
+    pub fn new(arena: &mut CacheArena, depth: usize, leaves: usize) -> Self {
+        // TODO: what about when leaves is zero?
+        let layers = (0..=depth)
+            .map(|i| {
+                let vec = arena.alloc();
+                vec.extend_with_vec(
+                    arena,
+                    vec![Hash256::zero(); nodes_per_layer(i, depth, leaves)],
+                )
+                .expect(
+                    "A newly allocated sub-arena cannot fail unless it has reached max capacity",
+                );
+
+                vec
+            })
+            .collect();
+
        TreeHashCache {
+            initialized: false,
            depth,
-            layers: vec![vec![]; depth + 1],
+            layers,
        }
    }

    /// Compute the updated Merkle root for the given `leaves`.
    pub fn recalculate_merkle_root(
        &mut self,
+        arena: &mut CacheArena,
        leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
    ) -> Result<Hash256, Error> {
-        let dirty_indices = self.update_leaves(leaves)?;
-        self.update_merkle_root(dirty_indices)
+        let dirty_indices = self.update_leaves(arena, leaves)?;
+        self.update_merkle_root(arena, dirty_indices)
    }

    /// Phase 1 of the algorithm: compute the indices of all dirty leaves.
    pub fn update_leaves(
        &mut self,
+        arena: &mut CacheArena,
        mut leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
    ) -> Result<Vec<usize>, Error> {
        let new_leaf_count = leaves.len();

-        if new_leaf_count < self.leaves().len() {
+        if new_leaf_count < self.leaves().len(arena)? {
            return Err(Error::CannotShrink);
        } else if new_leaf_count > 2usize.pow(self.depth as u32) {
            return Err(Error::TooManyLeaves);
@@ -49,11 +74,11 @@ impl TreeHashCache {
        // Update the existing leaves
        let mut dirty = self
            .leaves()
-            .iter_mut()
+            .iter_mut(arena)?
            .enumerate()
            .zip(&mut leaves)
            .flat_map(|((i, leaf), new_leaf)| {
-                if leaf.as_bytes() != new_leaf {
+                if !self.initialized || leaf.as_bytes() != new_leaf {
                    leaf.assign_from_slice(&new_leaf);
                    Some(i)
                } else {
@@ -63,9 +88,9 @@ impl TreeHashCache {
            .collect::<Vec<_>>();

        // Push the rest of the new leaves (if any)
-        dirty.extend(self.leaves().len()..new_leaf_count);
+        dirty.extend(self.leaves().len(arena)?..new_leaf_count);
        self.leaves()
-            .extend(leaves.map(|l| Hash256::from_slice(&l)));
+            .extend_with_vec(arena, leaves.map(|l| Hash256::from_slice(&l)).collect())?;

        Ok(dirty)
    }
@@ -73,9 +98,13 @@ impl TreeHashCache {
    /// Phase 2: propagate changes upwards from the leaves of the tree, and compute the root.
    ///
    /// Returns an error if `dirty_indices` is inconsistent with the cache.
-    pub fn update_merkle_root(&mut self, mut dirty_indices: Vec<usize>) -> Result<Hash256, Error> {
+    pub fn update_merkle_root(
+        &mut self,
+        arena: &mut CacheArena,
+        mut dirty_indices: Vec<usize>,
+    ) -> Result<Hash256, Error> {
        if dirty_indices.is_empty() {
-            return Ok(self.root());
+            return Ok(self.root(arena));
        }

        let mut depth = self.depth;
@@ -87,24 +116,26 @@ impl TreeHashCache {
                let left_idx = 2 * idx;
                let right_idx = left_idx + 1;

-                let left = self.layers[depth][left_idx];
+                let left = self.layers[depth]
+                    .get(arena, left_idx)?
+                    .ok_or_else(|| Error::MissingLeftIdx(left_idx))?;
                let right = self.layers[depth]
-                    .get(right_idx)
+                    .get(arena, right_idx)?
                    .copied()
                    .unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth - depth]));

-                let new_hash = hash_concat(left.as_bytes(), right.as_bytes());
+                let new_hash = hash32_concat(left.as_bytes(), right.as_bytes());

-                match self.layers[depth - 1].get_mut(idx) {
+                match self.layers[depth - 1].get_mut(arena, idx)? {
                    Some(hash) => {
                        hash.assign_from_slice(&new_hash);
                    }
                    None => {
                        // Parent layer should already contain nodes for all non-dirty indices
-                        if idx != self.layers[depth - 1].len() {
+                        if idx != self.layers[depth - 1].len(arena)? {
                            return Err(Error::CacheInconsistent);
                        }
-                        self.layers[depth - 1].push(Hash256::from_slice(&new_hash));
+                        self.layers[depth - 1].push(arena, Hash256::from_slice(&new_hash))?;
                    }
                }
            }
@@ -113,29 +144,23 @@ impl TreeHashCache {
            depth -= 1;
        }

-        Ok(self.root())
+        self.initialized = true;
+
+        Ok(self.root(arena))
    }

    /// Get the root of this cache, without doing any updates/computation.
-    pub fn root(&self) -> Hash256 {
+    pub fn root(&self, arena: &CacheArena) -> Hash256 {
        self.layers[0]
-            .get(0)
+            .get(arena, 0)
+            .expect("cached tree should have a root layer")
            .copied()
            .unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth]))
    }

-    pub fn leaves(&mut self) -> &mut Vec<Hash256> {
+    pub fn leaves(&mut self) -> &mut CacheArenaAllocation {
        &mut self.layers[self.depth]
    }
-
-    /// Returns the approximate size of the cache in bytes.
-    ///
-    /// The size is approximate because we ignore some stack-allocated `u64` and `Vec` pointers.
-    /// We focus instead on the lists of hashes, which should massively outweigh the items that we
-    /// ignore.
-    pub fn approx_mem_size(&self) -> usize {
-        self.layers.iter().map(|layer| layer.len() * 32).sum()
-    }
 }

 /// Compute the dirty indices for one layer up.
@@ -144,3 +169,52 @@ fn lift_dirty(dirty_indices: &[usize]) -> Vec<usize> {
    new_dirty.dedup();
    new_dirty
 }
+
+/// Returns the number of nodes that should be at each layer of a tree with the given `depth` and
+/// number of `leaves`.
+///
+/// Note: the top-most layer is `0` and a tree that has 8 leaves (4 layers) has a depth of 3 (_not_
+/// a depth of 4).
+///
+/// ## Example
+///
+/// Consider the following tree that has `depth = 3` and `leaves = 5`.
+///
+///```ignore
+/// 0        o      <-- height 0 has 1 node
+///        /   \
+/// 1    o      o   <-- height 1 has 2 nodes
+///     / \    /
+/// 2  o   o   o    <-- height 2 has 3 nodes
+///   /\   /\ /
+/// 3 o o o o o     <-- height 3 have 5 nodes
+/// ```
+fn nodes_per_layer(layer: usize, depth: usize, leaves: usize) -> usize {
+    if layer == depth {
+        leaves
+    } else {
+        let leaves_per_node = 1 << (depth - layer);
+        (leaves + leaves_per_node - 1) / leaves_per_node
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_node_per_layer_unbalanced_tree() {
+        assert_eq!(nodes_per_layer(0, 3, 5), 1);
+        assert_eq!(nodes_per_layer(1, 3, 5), 2);
+        assert_eq!(nodes_per_layer(2, 3, 5), 3);
+        assert_eq!(nodes_per_layer(3, 3, 5), 5);
+    }
+
+    #[test]
+    fn test_node_per_layer_balanced_tree() {
+        assert_eq!(nodes_per_layer(0, 3, 8), 1);
+        assert_eq!(nodes_per_layer(1, 3, 8), 2);
+        assert_eq!(nodes_per_layer(2, 3, 8), 4);
+        assert_eq!(nodes_per_layer(3, 3, 8), 8);
+    }
+}
--- a/eth2/utils/cached_tree_hash/src/cache_arena.rs
+++ b/eth2/utils/cached_tree_hash/src/cache_arena.rs
@@ -0,0 +1,497 @@
+use ssz::{Decode, Encode};
+use ssz_derive::{Decode, Encode};
+use std::marker::PhantomData;
+use std::ops::Range;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Error {
+    UnknownAllocId(usize),
+    OffsetOverflow,
+    OffsetUnderflow,
+    RangeOverFlow,
+}
+
+/// Inspired by the `TypedArena` crate, the `CachedArena` provides a single contiguous memory
+/// allocation from which smaller allocations can be produced. In effect this allows for having
+/// many `Vec<T>`-like objects all stored contiguously on the heap with the aim of reducing memory
+/// fragmentation.
+///
+/// Because all of the allocations are stored in one big `Vec`, resizing any of the allocations
+/// will mean all items to the right of that allocation will be moved.
+#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
+pub struct CacheArena<T: Encode + Decode> {
+    /// The backing array, storing cached values.
+    backing: Vec<T>,
+    /// A list of offsets indicating the start of each allocation.
+    offsets: Vec<usize>,
+}
+
+impl<T: Encode + Decode> CacheArena<T> {
+    /// Produce an allocation of zero length at the end of the backing array.
+    pub fn alloc(&mut self) -> CacheArenaAllocation<T> {
+        let alloc_id = self.offsets.len();
+        self.offsets.push(self.backing.len());
+
+        CacheArenaAllocation {
+            alloc_id,
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Update `self.offsets` to reflect an allocation increasing in size.
+    fn grow(&mut self, alloc_id: usize, grow_by: usize) -> Result<(), Error> {
+        if alloc_id < self.offsets.len() {
+            self.offsets
+                .iter_mut()
+                .skip(alloc_id + 1)
+                .try_for_each(|offset| {
+                    *offset = offset
+                        .checked_add(grow_by)
+                        .ok_or_else(|| Error::OffsetOverflow)?;
+
+                    Ok(())
+                })
+        } else {
+            Err(Error::UnknownAllocId(alloc_id))
+        }
+    }
+
+    /// Update `self.offsets` to reflect an allocation decreasing in size.
+    fn shrink(&mut self, alloc_id: usize, shrink_by: usize) -> Result<(), Error> {
+        if alloc_id < self.offsets.len() {
+            self.offsets
+                .iter_mut()
+                .skip(alloc_id + 1)
+                .try_for_each(|offset| {
+                    *offset = offset
+                        .checked_sub(shrink_by)
+                        .ok_or_else(|| Error::OffsetUnderflow)?;
+
+                    Ok(())
+                })
+        } else {
+            Err(Error::UnknownAllocId(alloc_id))
+        }
+    }
+
+    /// Similar to `Vec::splice`, however the range is relative to some allocation (`alloc_id`) and
+    /// the replaced items are not returned (i.e., it is forgetful).
+    ///
+    /// To reiterate, the given `range` should be relative to the given `alloc_id`, not
+    /// `self.backing`. E.g., if the allocation has an offset of `20` and the range is `0..1`, then
+    /// the splice will translate to `self.backing[20..21]`.
+    fn splice_forgetful<I: IntoIterator<Item = T>>(
+        &mut self,
+        alloc_id: usize,
+        range: Range<usize>,
+        replace_with: I,
+    ) -> Result<(), Error> {
+        let offset = *self
+            .offsets
+            .get(alloc_id)
+            .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+        let start = range
+            .start
+            .checked_add(offset)
+            .ok_or_else(|| Error::RangeOverFlow)?;
+        let end = range
+            .end
+            .checked_add(offset)
+            .ok_or_else(|| Error::RangeOverFlow)?;
+
+        let prev_len = self.backing.len();
+
+        self.backing.splice(start..end, replace_with);
+
+        if prev_len < self.backing.len() {
+            self.grow(alloc_id, self.backing.len() - prev_len)?;
+        } else if prev_len > self.backing.len() {
+            self.shrink(alloc_id, prev_len - self.backing.len())?;
+        }
+
+        Ok(())
+    }
+
+    /// Returns the length of the specified allocation.
+    fn len(&self, alloc_id: usize) -> Result<usize, Error> {
+        let start = self
+            .offsets
+            .get(alloc_id)
+            .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+        let end = self
+            .offsets
+            .get(alloc_id + 1)
+            .copied()
+            .unwrap_or_else(|| self.backing.len());
+
+        Ok(end - start)
+    }
+
+    /// Get the value at position `i`, relative to the offset at `alloc_id`.
+    fn get(&self, alloc_id: usize, i: usize) -> Result<Option<&T>, Error> {
+        if i < self.len(alloc_id)? {
+            let offset = self
+                .offsets
+                .get(alloc_id)
+                .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+            Ok(self.backing.get(i + offset))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Mutably get the value at position `i`, relative to the offset at `alloc_id`.
+    fn get_mut(&mut self, alloc_id: usize, i: usize) -> Result<Option<&mut T>, Error> {
+        if i < self.len(alloc_id)? {
+            let offset = self
+                .offsets
+                .get(alloc_id)
+                .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+            Ok(self.backing.get_mut(i + offset))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Returns the range in `self.backing` that is occupied by some allocation.
+    fn range(&self, alloc_id: usize) -> Result<Range<usize>, Error> {
+        let start = *self
+            .offsets
+            .get(alloc_id)
+            .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+        let end = self
+            .offsets
+            .get(alloc_id + 1)
+            .copied()
+            .unwrap_or_else(|| self.backing.len());
+
+        Ok(start..end)
+    }
+
+    /// Iterate through all values in some allocation.
+    fn iter(&self, alloc_id: usize) -> Result<impl Iterator<Item = &T>, Error> {
+        Ok(self.backing[self.range(alloc_id)?].iter())
+    }
+
+    /// Mutably iterate through all values in some allocation.
+    fn iter_mut(&mut self, alloc_id: usize) -> Result<impl Iterator<Item = &mut T>, Error> {
+        let range = self.range(alloc_id)?;
+        Ok(self.backing[range].iter_mut())
+    }
+
+    /// Returns the total number of items stored in the arena, the sum of all values in all
+    /// allocations.
+    pub fn backing_len(&self) -> usize {
+        self.backing.len()
+    }
+}
+
+/// An allocation from a `CacheArena` that behaves like a `Vec<T>`.
+///
+/// All functions will modify the given `arena` instead of `self`. As such, it is safe to have
+/// multiple instances of this allocation at once.
+///
+/// For all functions that accept a `CacheArena<T>` parameter, that arena should always be the one
+/// that created `Self`. I.e., do not mix-and-match allocations and arenas unless you _really_ know
+/// what you're doing (or want to have a bad time).
+#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
+pub struct CacheArenaAllocation<T> {
+    alloc_id: usize,
+    #[ssz(skip_serializing)]
+    #[ssz(skip_deserializing)]
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Encode + Decode> CacheArenaAllocation<T> {
+    /// Grow the allocation in `arena`, appending `vec` to the current values.
+    pub fn extend_with_vec(&self, arena: &mut CacheArena<T>, vec: Vec<T>) -> Result<(), Error> {
+        let len = arena.len(self.alloc_id)?;
+        arena.splice_forgetful(self.alloc_id, len..len, vec)?;
+        Ok(())
+    }
+
+    /// Push `item` to the end of the current allocation in `arena`.
+    ///
+    /// An error is returned if this allocation is not known to the given `arena`.
+    pub fn push(&self, arena: &mut CacheArena<T>, item: T) -> Result<(), Error> {
+        let len = arena.len(self.alloc_id)?;
+        arena.splice_forgetful(self.alloc_id, len..len, vec![item])?;
+        Ok(())
+    }
+
+    /// Get the i'th item in the `arena` (relative to this allocation).
+    ///
+    /// An error is returned if this allocation is not known to the given `arena`.
+    pub fn get<'a>(&self, arena: &'a CacheArena<T>, i: usize) -> Result<Option<&'a T>, Error> {
+        arena.get(self.alloc_id, i)
+    }
+
+    /// Mutably get the i'th item in the `arena` (relative to this allocation).
+    ///
+    /// An error is returned if this allocation is not known to the given `arena`.
+    pub fn get_mut<'a>(
+        &self,
+        arena: &'a mut CacheArena<T>,
+        i: usize,
+    ) -> Result<Option<&'a mut T>, Error> {
+        arena.get_mut(self.alloc_id, i)
+    }
+
+    /// Iterate through all items in the `arena` (relative to this allocation).
+    pub fn iter<'a>(&self, arena: &'a CacheArena<T>) -> Result<impl Iterator<Item = &'a T>, Error> {
+        arena.iter(self.alloc_id)
+    }
+
+    /// Mutably iterate through all items in the `arena` (relative to this allocation).
+    pub fn iter_mut<'a>(
+        &self,
+        arena: &'a mut CacheArena<T>,
+    ) -> Result<impl Iterator<Item = &'a mut T>, Error> {
+        arena.iter_mut(self.alloc_id)
+    }
+
+    /// Return the number of items stored in this allocation.
+    pub fn len(&self, arena: &CacheArena<T>) -> Result<usize, Error> {
+        arena.len(self.alloc_id)
+    }
+
+    /// Returns true if this allocation is empty.
+    pub fn is_empty(&self, arena: &CacheArena<T>) -> Result<bool, Error> {
+        self.len(arena).map(|len| len == 0)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::Hash256;
+
+    type CacheArena = super::CacheArena<Hash256>;
+    type CacheArenaAllocation = super::CacheArenaAllocation<Hash256>;
+
+    fn hash(i: usize) -> Hash256 {
+        Hash256::from_low_u64_be(i as u64)
+    }
+
+    fn test_routine(arena: &mut CacheArena, sub: &mut CacheArenaAllocation) {
+        let mut len = sub.len(arena).expect("should exist");
+
+        sub.push(arena, hash(len)).expect("should push");
+        len += 1;
+
+        assert_eq!(
+            sub.len(arena).expect("should exist"),
+            len,
+            "after first push sub should have len {}",
+            len
+        );
+        assert_eq!(
+            sub.is_empty(arena).expect("should exist"),
+            false,
+            "new sub should not be empty"
+        );
+
+        sub.push(arena, hash(len)).expect("should push again");
+        len += 1;
+
+        assert_eq!(
+            sub.len(arena).expect("should exist"),
+            len,
+            "after second push sub should have len {}",
+            len
+        );
+
+        sub.extend_with_vec(arena, vec![hash(len), hash(len + 1)])
+            .expect("should extend with vec");
+        len += 2;
+
+        assert_eq!(
+            sub.len(arena).expect("should exist"),
+            len,
+            "after extend sub should have len {}",
+            len
+        );
+
+        let collected = sub
+            .iter(arena)
+            .expect("should get iter")
+            .cloned()
+            .collect::<Vec<_>>();
+        let collected_mut = sub
+            .iter_mut(arena)
+            .expect("should get mut iter")
+            .map(|v| *v)
+            .collect::<Vec<_>>();
+
+        for i in 0..len {
+            assert_eq!(
+                *sub.get(arena, i)
+                    .expect("should exist")
+                    .expect("should get sub index"),
+                hash(i),
+                "get({}) should be hash({})",
+                i,
+                i
+            );
+
+            assert_eq!(
+                collected[i],
+                hash(i),
+                "collected[{}] should be hash({})",
+                i,
+                i
+            );
+
+            assert_eq!(
+                collected_mut[i],
+                hash(i),
+                "collected_mut[{}] should be hash({})",
+                i,
+                i
+            );
+        }
+    }
+
+    #[test]
+    fn single() {
+        let arena = &mut CacheArena::default();
+
+        assert_eq!(arena.backing.len(), 0, "should start with an empty backing");
+        assert_eq!(arena.offsets.len(), 0, "should start without any offsets");
+
+        let mut sub = arena.alloc();
+
+        assert_eq!(
+            sub.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        test_routine(arena, &mut sub);
+    }
+
+    #[test]
+    fn double() {
+        let arena = &mut CacheArena::default();
+
+        assert_eq!(arena.backing.len(), 0, "should start with an empty backing");
+        assert_eq!(arena.offsets.len(), 0, "should start without any offsets");
+
+        let mut sub_01 = arena.alloc();
+        assert_eq!(
+            sub_01.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub_01.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        let mut sub_02 = arena.alloc();
+        assert_eq!(
+            sub_02.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub_02.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        test_routine(arena, &mut sub_01);
+        test_routine(arena, &mut sub_02);
+    }
+
+    #[test]
+    fn one_then_other() {
+        let arena = &mut CacheArena::default();
+
+        assert_eq!(arena.backing.len(), 0, "should start with an empty backing");
+        assert_eq!(arena.offsets.len(), 0, "should start without any offsets");
+
+        let mut sub_01 = arena.alloc();
+        assert_eq!(
+            sub_01.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub_01.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        test_routine(arena, &mut sub_01);
+
+        let mut sub_02 = arena.alloc();
+        assert_eq!(
+            sub_02.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub_02.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        test_routine(arena, &mut sub_02);
+        test_routine(arena, &mut sub_01);
+        test_routine(arena, &mut sub_02);
+    }
+
+    #[test]
+    fn many() {
+        let arena = &mut CacheArena::default();
+
+        assert_eq!(arena.backing.len(), 0, "should start with an empty backing");
+        assert_eq!(arena.offsets.len(), 0, "should start without any offsets");
+
+        let mut subs = vec![];
+
+        for i in 0..50 {
+            if i == 0 {
+                let sub = arena.alloc();
+                assert_eq!(
+                    sub.len(arena).expect("should exist"),
+                    0,
+                    "new sub should have len 0"
+                );
+                assert_eq!(
+                    sub.is_empty(arena).expect("should exist"),
+                    true,
+                    "new sub should be empty"
+                );
+                subs.push(sub);
+
+                continue;
+            } else if i % 2 == 0 {
+                test_routine(arena, &mut subs[i - 1]);
+            }
+
+            let sub = arena.alloc();
+            assert_eq!(
+                sub.len(arena).expect("should exist"),
+                0,
+                "new sub should have len 0"
+            );
+            assert_eq!(
+                sub.is_empty(arena).expect("should exist"),
+                true,
+                "new sub should be empty"
+            );
+            subs.push(sub);
+        }
+
+        for mut sub in subs.iter_mut() {
+            test_routine(arena, &mut sub);
+        }
+    }
+}
--- a/eth2/utils/cached_tree_hash/src/impls.rs
+++ b/eth2/utils/cached_tree_hash/src/impls.rs
@@ -1,4 +1,4 @@
-use crate::{CachedTreeHash, Error, Hash256, TreeHashCache};
+use crate::{CacheArena, CachedTreeHash, Error, Hash256, TreeHashCache};
 use ssz_types::{typenum::Unsigned, FixedVector, VariableList};
 use std::mem::size_of;
 use tree_hash::{mix_in_length, BYTES_PER_CHUNK};
@@ -13,6 +13,17 @@ pub fn int_log(n: usize) -> usize {
    }
 }

+pub fn hash256_leaf_count(len: usize) -> usize {
+    len
+}
+
+pub fn u64_leaf_count(len: usize) -> usize {
+    let type_size = size_of::<u64>();
+    let vals_per_chunk = BYTES_PER_CHUNK / type_size;
+
+    (len + vals_per_chunk - 1) / vals_per_chunk
+}
+
 pub fn hash256_iter<'a>(
    values: &'a [Hash256],
 ) -> impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator + 'a {
@@ -36,35 +47,59 @@ pub fn u64_iter<'a>(
 }

 impl<N: Unsigned> CachedTreeHash<TreeHashCache> for FixedVector<Hash256, N> {
-    fn new_tree_hash_cache() -> TreeHashCache {
-        TreeHashCache::new(int_log(N::to_usize()))
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> TreeHashCache {
+        TreeHashCache::new(
+            arena,
+            int_log(N::to_usize()),
+            hash256_leaf_count(self.len()),
+        )
    }

-    fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
-        cache.recalculate_merkle_root(hash256_iter(&self))
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut TreeHashCache,
+    ) -> Result<Hash256, Error> {
+        cache.recalculate_merkle_root(arena, hash256_iter(&self))
    }
 }

 impl<N: Unsigned> CachedTreeHash<TreeHashCache> for FixedVector<u64, N> {
-    fn new_tree_hash_cache() -> TreeHashCache {
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> TreeHashCache {
        let vals_per_chunk = BYTES_PER_CHUNK / size_of::<u64>();
-        TreeHashCache::new(int_log(N::to_usize() / vals_per_chunk))
+        TreeHashCache::new(
+            arena,
+            int_log(N::to_usize() / vals_per_chunk),
+            u64_leaf_count(self.len()),
+        )
    }

-    fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
-        cache.recalculate_merkle_root(u64_iter(&self))
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut TreeHashCache,
+    ) -> Result<Hash256, Error> {
+        cache.recalculate_merkle_root(arena, u64_iter(&self))
    }
 }

 impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<Hash256, N> {
-    fn new_tree_hash_cache() -> TreeHashCache {
-        TreeHashCache::new(int_log(N::to_usize()))
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> TreeHashCache {
+        TreeHashCache::new(
+            arena,
+            int_log(N::to_usize()),
+            hash256_leaf_count(self.len()),
+        )
    }

-    fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut TreeHashCache,
+    ) -> Result<Hash256, Error> {
        Ok(Hash256::from_slice(&mix_in_length(
            cache
-                .recalculate_merkle_root(hash256_iter(&self))?
+                .recalculate_merkle_root(arena, hash256_iter(&self))?
                .as_bytes(),
            self.len(),
        )))
@@ -72,14 +107,24 @@ impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<Hash256, N> {
 }

 impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<u64, N> {
-    fn new_tree_hash_cache() -> TreeHashCache {
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> TreeHashCache {
        let vals_per_chunk = BYTES_PER_CHUNK / size_of::<u64>();
-        TreeHashCache::new(int_log(N::to_usize() / vals_per_chunk))
+        TreeHashCache::new(
+            arena,
+            int_log(N::to_usize() / vals_per_chunk),
+            u64_leaf_count(self.len()),
+        )
    }

-    fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut TreeHashCache,
+    ) -> Result<Hash256, Error> {
        Ok(Hash256::from_slice(&mix_in_length(
-            cache.recalculate_merkle_root(u64_iter(&self))?.as_bytes(),
+            cache
+                .recalculate_merkle_root(arena, u64_iter(&self))?
+                .as_bytes(),
            self.len(),
        )))
    }
--- a/eth2/utils/cached_tree_hash/src/lib.rs
+++ b/eth2/utils/cached_tree_hash/src/lib.rs
@@ -1,12 +1,13 @@
 mod cache;
+mod cache_arena;
 mod impls;
-mod multi_cache;
 #[cfg(test)]
 mod test;

+pub type CacheArena = cache_arena::CacheArena<Hash256>;
+
 pub use crate::cache::TreeHashCache;
 pub use crate::impls::int_log;
-pub use crate::multi_cache::MultiTreeHashCache;
 use ethereum_types::H256 as Hash256;
 use tree_hash::TreeHash;

@@ -19,13 +20,26 @@ pub enum Error {
    CannotShrink,
    /// Cache is inconsistent with the list of dirty indices provided.
    CacheInconsistent,
+    CacheArenaError(cache_arena::Error),
+    /// Unable to find left index in Merkle tree.
+    MissingLeftIdx(usize),
+}
+
+impl From<cache_arena::Error> for Error {
+    fn from(e: cache_arena::Error) -> Error {
+        Error::CacheArenaError(e)
+    }
 }

 /// Trait for types which can make use of a cache to accelerate calculation of their tree hash root.
 pub trait CachedTreeHash<Cache>: TreeHash {
    /// Create a new cache appropriate for use with values of this type.
-    fn new_tree_hash_cache() -> Cache;
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> Cache;

    /// Update the cache and use it to compute the tree hash root for `self`.
-    fn recalculate_tree_hash_root(&self, cache: &mut Cache) -> Result<Hash256, Error>;
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut Cache,
+    ) -> Result<Hash256, Error>;
 }
--- a/eth2/utils/cached_tree_hash/src/multi_cache.rs
+++ b/eth2/utils/cached_tree_hash/src/multi_cache.rs
@@ -1,78 +0,0 @@
-use crate::{int_log, CachedTreeHash, Error, Hash256, TreeHashCache};
-use ssz_derive::{Decode, Encode};
-use ssz_types::{typenum::Unsigned, VariableList};
-use tree_hash::mix_in_length;
-
-/// Multi-level tree hash cache.
-///
-/// Suitable for lists/vectors/containers holding values which themselves have caches.
-///
-/// Note: this cache could be made composable by replacing the hardcoded `Vec<TreeHashCache>` with
-/// `Vec<C>`, allowing arbitrary nesting, but for now we stick to 2-level nesting because that's all
-/// we need.
-#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
-pub struct MultiTreeHashCache {
-    list_cache: TreeHashCache,
-    value_caches: Vec<TreeHashCache>,
-}
-
-impl MultiTreeHashCache {
-    /// Returns the approximate size of the cache in bytes.
-    ///
-    /// The size is approximate because we ignore some stack-allocated `u64` and `Vec` pointers.
-    /// We focus instead on the lists of hashes, which should massively outweigh the items that we
-    /// ignore.
-    pub fn approx_mem_size(&self) -> usize {
-        self.list_cache.approx_mem_size()
-            + self
-                .value_caches
-                .iter()
-                .map(TreeHashCache::approx_mem_size)
-                .sum::<usize>()
-    }
-}
-
-impl<T, N> CachedTreeHash<MultiTreeHashCache> for VariableList<T, N>
-where
-    T: CachedTreeHash<TreeHashCache>,
-    N: Unsigned,
-{
-    fn new_tree_hash_cache() -> MultiTreeHashCache {
-        MultiTreeHashCache {
-            list_cache: TreeHashCache::new(int_log(N::to_usize())),
-            value_caches: vec![],
-        }
-    }
-
-    fn recalculate_tree_hash_root(&self, cache: &mut MultiTreeHashCache) -> Result<Hash256, Error> {
-        if self.len() < cache.value_caches.len() {
-            return Err(Error::CannotShrink);
-        }
-
-        // Resize the value caches to the size of the list.
-        cache
-            .value_caches
-            .resize(self.len(), T::new_tree_hash_cache());
-
-        // Update all individual value caches.
-        self.iter()
-            .zip(cache.value_caches.iter_mut())
-            .try_for_each(|(value, cache)| value.recalculate_tree_hash_root(cache).map(|_| ()))?;
-
-        // Pipe the value roots into the list cache, then mix in the length.
-        // Note: it's possible to avoid this 2nd iteration (or an allocation) by using
-        // `itertools::process_results`, but it requires removing the `ExactSizeIterator`
-        // bound from `recalculate_merkle_root`, and only saves about 5% in benchmarks.
-        let list_root = cache.list_cache.recalculate_merkle_root(
-            cache
-                .value_caches
-                .iter()
-                .map(|value_cache| value_cache.root().to_fixed_bytes()),
-        )?;
-
-        Ok(Hash256::from_slice(&mix_in_length(
-            list_root.as_bytes(),
-            self.len(),
-        )))
-    }
-}
--- a/eth2/utils/cached_tree_hash/src/test.rs
+++ b/eth2/utils/cached_tree_hash/src/test.rs
@@ -1,5 +1,5 @@
 use crate::impls::hash256_iter;
-use crate::{CachedTreeHash, Error, Hash256, TreeHashCache};
+use crate::{CacheArena, CachedTreeHash, Error, Hash256, TreeHashCache};
 use eth2_hashing::ZERO_HASHES;
 use quickcheck_macros::quickcheck;
 use ssz_types::{
@@ -18,46 +18,49 @@ type Vector16u64 = FixedVector<u64, U16>;

 #[test]
 fn max_leaves() {
+    let arena = &mut CacheArena::default();
    let depth = 4;
    let max_len = 2u64.pow(depth as u32);
-    let mut cache = TreeHashCache::new(depth);
+    let mut cache = TreeHashCache::new(arena, depth, 2);
    assert!(cache
-        .recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len - 1)))
+        .recalculate_merkle_root(arena, hash256_iter(&int_hashes(0, max_len - 1)))
        .is_ok());
    assert!(cache
-        .recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len)))
+        .recalculate_merkle_root(arena, hash256_iter(&int_hashes(0, max_len)))
        .is_ok());
    assert_eq!(
-        cache.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len + 1))),
+        cache.recalculate_merkle_root(arena, hash256_iter(&int_hashes(0, max_len + 1))),
        Err(Error::TooManyLeaves)
    );
    assert_eq!(
-        cache.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len * 2))),
+        cache.recalculate_merkle_root(arena, hash256_iter(&int_hashes(0, max_len * 2))),
        Err(Error::TooManyLeaves)
    );
 }

 #[test]
 fn cannot_shrink() {
+    let arena = &mut CacheArena::default();
    let init_len = 12;
    let list1 = List16::new(int_hashes(0, init_len)).unwrap();
    let list2 = List16::new(int_hashes(0, init_len - 1)).unwrap();

-    let mut cache = List16::new_tree_hash_cache();
-    assert!(list1.recalculate_tree_hash_root(&mut cache).is_ok());
+    let mut cache = list1.new_tree_hash_cache(arena);
+    assert!(list1.recalculate_tree_hash_root(arena, &mut cache).is_ok());
    assert_eq!(
-        list2.recalculate_tree_hash_root(&mut cache),
+        list2.recalculate_tree_hash_root(arena, &mut cache),
        Err(Error::CannotShrink)
    );
 }

 #[test]
 fn empty_leaves() {
+    let arena = &mut CacheArena::default();
    let depth = 20;
-    let mut cache = TreeHashCache::new(depth);
+    let mut cache = TreeHashCache::new(arena, depth, 0);
    assert_eq!(
        cache
-            .recalculate_merkle_root(vec![].into_iter())
+            .recalculate_merkle_root(arena, vec![].into_iter())
            .unwrap()
            .as_bytes(),
        &ZERO_HASHES[depth][..]
@@ -66,40 +69,43 @@ fn empty_leaves() {

 #[test]
 fn fixed_vector_hash256() {
+    let arena = &mut CacheArena::default();
    let len = 16;
    let vec = Vector16::new(int_hashes(0, len)).unwrap();

-    let mut cache = Vector16::new_tree_hash_cache();
+    let mut cache = vec.new_tree_hash_cache(arena);

    assert_eq!(
        Hash256::from_slice(&vec.tree_hash_root()),
-        vec.recalculate_tree_hash_root(&mut cache).unwrap()
+        vec.recalculate_tree_hash_root(arena, &mut cache).unwrap()
    );
 }

 #[test]
 fn fixed_vector_u64() {
+    let arena = &mut CacheArena::default();
    let len = 16;
    let vec = Vector16u64::new((0..len).collect()).unwrap();

-    let mut cache = Vector16u64::new_tree_hash_cache();
+    let mut cache = vec.new_tree_hash_cache(arena);

    assert_eq!(
        Hash256::from_slice(&vec.tree_hash_root()),
-        vec.recalculate_tree_hash_root(&mut cache).unwrap()
+        vec.recalculate_tree_hash_root(arena, &mut cache).unwrap()
    );
 }

 #[test]
 fn variable_list_hash256() {
+    let arena = &mut CacheArena::default();
    let len = 13;
    let list = List16::new(int_hashes(0, len)).unwrap();

-    let mut cache = List16::new_tree_hash_cache();
+    let mut cache = list.new_tree_hash_cache(arena);

    assert_eq!(
        Hash256::from_slice(&list.tree_hash_root()),
-        list.recalculate_tree_hash_root(&mut cache).unwrap()
+        list.recalculate_tree_hash_root(arena, &mut cache).unwrap()
    );
 }

@@ -119,6 +125,7 @@ fn quickcheck_variable_list_h256_257(leaves_and_skips: Vec<(u64, bool)>) -> bool
 }

 fn variable_list_h256_test<Len: Unsigned>(leaves_and_skips: Vec<(u64, bool)>) -> bool {
+    let arena = &mut CacheArena::default();
    let leaves: Vec<_> = leaves_and_skips
        .iter()
        .map(|(l, _)| Hash256::from_low_u64_be(*l))
@@ -126,14 +133,15 @@ fn variable_list_h256_test<Len: Unsigned>(leaves_and_skips: Vec<(u64, bool)>) ->
        .collect();

    let mut list: VariableList<Hash256, Len>;
-    let mut cache = VariableList::<Hash256, Len>::new_tree_hash_cache();
+    let init: VariableList<Hash256, Len> = VariableList::new(vec![]).unwrap();
+    let mut cache = init.new_tree_hash_cache(arena);

    for (end, (_, update_cache)) in leaves_and_skips.into_iter().enumerate() {
        list = VariableList::new(leaves[..end].to_vec()).unwrap();

        if update_cache
            && list
-                .recalculate_tree_hash_root(&mut cache)
+                .recalculate_tree_hash_root(arena, &mut cache)
                .unwrap()
                .as_bytes()
                != &list.tree_hash_root()[..]