From c3182e3c1c473dc8d65cdde4f8deea2489f0707f Mon Sep 17 00:00:00 2001
From: Paul Hauner <paul@paulhauner.com>
Date: Fri, 7 Feb 2020 12:42:49 +1100
Subject: [PATCH] Tree hash cache arena (#836)

* Start adding interop genesis state to lcli

* Use more efficient method to generate genesis state

* Remove duplicate int_to_bytes32

* Add lcli command to change state genesis time

* Add option to allow VC to start with unsynced BN

* Set VC to do parallel key loading

* Don't default to dummy eth1 backend

* Add endpoint to dump operation pool

* Add metrics for op pool

* Remove state clone for slot notifier

* Add mem size approximation for tree hash cache

* Avoid cloning tree hash when getting head

* Avoid cloning tree hash when getting head

* Add working arena-based cached tree hash

* Add another benchmark

* Add pre-allocation for caches

* Make cache nullable

* Fix bugs in cache tree hash

* Add validator tree hash optimization

* Optimize hash_concat

* Make hash32_concat return fixed-len array

* Fix failing API tests

* Add new beacon state cache struct

* Add validator-specific cache

* Separate list and values arenas

* Add parallel validator registry hashing

* Remove MultiTreeHashCache

* Remove cached tree hash macro

* Fix failing tree hash test

* Address Michael's comments

* Add CachedTreeHash impl for ef tests

* Fix messy merge conflict

* Rename cache struct, add comments

* Rename cache struct, add comments

* Remove unnecessary mutability

* Wrap iter in result

* Tidy cached tree hash

* Address Michael comments

* Address more comments

* Use ring::Context
---
 eth2/types/benches/benches.rs                 |  80 ++-
 eth2/types/examples/clone_state.rs            |  48 ++
 eth2/types/examples/tree_hash_state.rs        |  54 ++
 eth2/types/src/beacon_state.rs                |  97 ++--
 .../types/src/beacon_state/tree_hash_cache.rs | 274 ++++++++++
 eth2/types/src/lib.rs                         |   2 +-
 eth2/types/src/tree_hash_impls.rs             | 106 ++--
 eth2/utils/cached_tree_hash/src/cache.rs      | 140 +++--
 .../utils/cached_tree_hash/src/cache_arena.rs | 497 ++++++++++++++++++
 eth2/utils/cached_tree_hash/src/impls.rs      |  79 ++-
 eth2/utils/cached_tree_hash/src/lib.rs        |  22 +-
 .../utils/cached_tree_hash/src/multi_cache.rs |  78 ---
 eth2/utils/cached_tree_hash/src/test.rs       |  46 +-
 eth2/utils/eth2_hashing/src/lib.rs            |  37 +-
 eth2/utils/int_to_bytes/src/lib.rs            |  15 +
 eth2/utils/merkle_proof/src/lib.rs            |  20 +-
 eth2/utils/tree_hash/src/lib.rs               |   2 +-
 eth2/utils/tree_hash/src/merkleize_padded.rs  |   4 +-
 eth2/utils/tree_hash_derive/src/lib.rs        | 108 ----
 tests/ef_tests/src/cases/ssz_static.rs        |  10 +-
 20 files changed, 1341 insertions(+), 378 deletions(-)
 create mode 100644 eth2/types/examples/clone_state.rs
 create mode 100644 eth2/types/examples/tree_hash_state.rs
 create mode 100644 eth2/types/src/beacon_state/tree_hash_cache.rs
 create mode 100644 eth2/utils/cached_tree_hash/src/cache_arena.rs
 delete mode 100644 eth2/utils/cached_tree_hash/src/multi_cache.rs

diff --git a/eth2/types/benches/benches.rs b/eth2/types/benches/benches.rs
index e38b9ded34..d7ef33ce5d 100644
--- a/eth2/types/benches/benches.rs
+++ b/eth2/types/benches/benches.rs
@@ -90,7 +90,7 @@ fn all_benches(c: &mut Criterion) {
         .sample_size(10),
     );
 
-    let inner_state = state;
+    let inner_state = state.clone();
     c.bench(
         &format!("{}_validators", validator_count),
         Benchmark::new("clone_without_caches/beacon_state", move |b| {
@@ -102,6 +102,84 @@ fn all_benches(c: &mut Criterion) {
         })
         .sample_size(10),
     );
+
+    let inner_state = state.clone();
+    c.bench(
+        &format!("{}_validators", validator_count),
+        Benchmark::new("clone/tree_hash_cache", move |b| {
+            b.iter_batched_ref(
+                || inner_state.clone(),
+                |state| black_box(state.tree_hash_cache.clone()),
+                criterion::BatchSize::SmallInput,
+            )
+        })
+        .sample_size(10),
+    );
+
+    let inner_state = state.clone();
+    c.bench(
+        &format!("{}_validators", validator_count),
+        Benchmark::new(
+            "initialized_cached_tree_hash_without_changes/beacon_state",
+            move |b| {
+                b.iter_batched_ref(
+                    || inner_state.clone(),
+                    |state| black_box(state.update_tree_hash_cache()),
+                    criterion::BatchSize::SmallInput,
+                )
+            },
+        )
+        .sample_size(10),
+    );
+
+    let mut inner_state = state.clone();
+    inner_state.drop_all_caches();
+    c.bench(
+        &format!("{}_validators", validator_count),
+        Benchmark::new("non_initialized_cached_tree_hash/beacon_state", move |b| {
+            b.iter_batched_ref(
+                || inner_state.clone(),
+                |state| {
+                    black_box(
+                        state
+                            .update_tree_hash_cache()
+                            .expect("should update tree hash"),
+                    )
+                },
+                criterion::BatchSize::SmallInput,
+            )
+        })
+        .sample_size(10),
+    );
+
+    let inner_state = state.clone();
+    c.bench(
+        &format!("{}_validators", validator_count),
+        Benchmark::new(
+            "initialized_cached_tree_hash_with_new_validators/beacon_state",
+            move |b| {
+                b.iter_batched_ref(
+                    || {
+                        let mut state = inner_state.clone();
+                        for _ in 0..16 {
+                            state
+                                .validators
+                                .push(Validator::default())
+                                .expect("should push validatorj");
+                            state
+                                .balances
+                                .push(32_000_000_000)
+                                .expect("should push balance");
+                        }
+                        state
+                    },
+                    |state| black_box(state.update_tree_hash_cache()),
+                    criterion::BatchSize::SmallInput,
+                )
+            },
+        )
+        .sample_size(10),
+    );
 }
 
 criterion_group!(benches, all_benches,);
diff --git a/eth2/types/examples/clone_state.rs b/eth2/types/examples/clone_state.rs
new file mode 100644
index 0000000000..73f601bce0
--- /dev/null
+++ b/eth2/types/examples/clone_state.rs
@@ -0,0 +1,48 @@
+//! These examples only really exist so we can use them for flamegraph. If they get annoying to
+//! maintain, feel free to delete.
+
+use types::{
+    test_utils::generate_deterministic_keypair, BeaconState, Eth1Data, EthSpec, Hash256,
+    MinimalEthSpec, Validator,
+};
+
+type E = MinimalEthSpec;
+
+fn get_state(validator_count: usize) -> BeaconState<E> {
+    let spec = &E::default_spec();
+    let eth1_data = Eth1Data {
+        deposit_root: Hash256::zero(),
+        deposit_count: 0,
+        block_hash: Hash256::zero(),
+    };
+
+    let mut state = BeaconState::new(0, eth1_data, spec);
+
+    for i in 0..validator_count {
+        state.balances.push(i as u64).expect("should add balance");
+        state
+            .validators
+            .push(Validator {
+                pubkey: generate_deterministic_keypair(i).pk.into(),
+                withdrawal_credentials: Hash256::from_low_u64_le(i as u64),
+                effective_balance: i as u64,
+                slashed: i % 2 == 0,
+                activation_eligibility_epoch: i.into(),
+                activation_epoch: i.into(),
+                exit_epoch: i.into(),
+                withdrawable_epoch: i.into(),
+            })
+            .expect("should add validator");
+    }
+
+    state
+}
+
+fn main() {
+    let validator_count = 1_024;
+    let state = get_state(validator_count);
+
+    for _ in 0..100_000 {
+        let _ = state.clone();
+    }
+}
diff --git a/eth2/types/examples/tree_hash_state.rs b/eth2/types/examples/tree_hash_state.rs
new file mode 100644
index 0000000000..8c2764e094
--- /dev/null
+++ b/eth2/types/examples/tree_hash_state.rs
@@ -0,0 +1,54 @@
+//! These examples only really exist so we can use them for flamegraph. If they get annoying to
+//! maintain, feel free to delete.
+
+use ssz::{Decode, Encode};
+use types::{
+    test_utils::generate_deterministic_keypair, BeaconState, Eth1Data, EthSpec, Hash256,
+    MinimalEthSpec, Validator,
+};
+
+type E = MinimalEthSpec;
+
+fn get_state(validator_count: usize) -> BeaconState<E> {
+    let spec = &E::default_spec();
+    let eth1_data = Eth1Data {
+        deposit_root: Hash256::zero(),
+        deposit_count: 0,
+        block_hash: Hash256::zero(),
+    };
+
+    let mut state = BeaconState::new(0, eth1_data, spec);
+
+    for i in 0..validator_count {
+        state.balances.push(i as u64).expect("should add balance");
+        state
+            .validators
+            .push(Validator {
+                pubkey: generate_deterministic_keypair(i).pk.into(),
+                withdrawal_credentials: Hash256::from_low_u64_le(i as u64),
+                effective_balance: i as u64,
+                slashed: i % 2 == 0,
+                activation_eligibility_epoch: i.into(),
+                activation_epoch: i.into(),
+                exit_epoch: i.into(),
+                withdrawable_epoch: i.into(),
+            })
+            .expect("should add validator");
+    }
+
+    state
+}
+
+fn main() {
+    let validator_count = 1_024;
+    let mut state = get_state(validator_count);
+    state.update_tree_hash_cache().expect("should update cache");
+
+    actual_thing::<E>(&mut state);
+}
+
+fn actual_thing<T: EthSpec>(state: &mut BeaconState<T>) {
+    for _ in 0..200_024 {
+        let _ = state.update_tree_hash_cache().expect("should update cache");
+    }
+}
diff --git a/eth2/types/src/beacon_state.rs b/eth2/types/src/beacon_state.rs
index b86ba42de7..a9f66f171b 100644
--- a/eth2/types/src/beacon_state.rs
+++ b/eth2/types/src/beacon_state.rs
@@ -2,7 +2,7 @@ use self::committee_cache::get_active_validator_indices;
 use self::exit_cache::ExitCache;
 use crate::test_utils::TestRandom;
 use crate::*;
-use cached_tree_hash::{CachedTreeHash, MultiTreeHashCache, TreeHashCache};
+use cached_tree_hash::{CacheArena, CachedTreeHash};
 use compare_fields_derive::CompareFields;
 use eth2_hashing::hash;
 use int_to_bytes::{int_to_bytes4, int_to_bytes8};
@@ -14,16 +14,18 @@ use ssz_types::{typenum::Unsigned, BitVector, FixedVector};
 use swap_or_not_shuffle::compute_shuffled_index;
 use test_random_derive::TestRandom;
 use tree_hash::TreeHash;
-use tree_hash_derive::{CachedTreeHash, TreeHash};
+use tree_hash_derive::TreeHash;
 
 pub use self::committee_cache::CommitteeCache;
 pub use eth_spec::*;
+pub use tree_hash_cache::BeaconTreeHashCache;
 
 #[macro_use]
 mod committee_cache;
 mod exit_cache;
 mod pubkey_cache;
 mod tests;
+mod tree_hash_cache;
 
 pub const CACHED_EPOCHS: usize = 3;
 const MAX_RANDOM_BYTE: u64 = (1 << 8) - 1;
@@ -61,8 +63,11 @@ pub enum Error {
     ExitCacheUninitialized,
     CommitteeCacheUninitialized(Option<RelativeEpoch>),
     SszTypesError(ssz_types::Error),
+    TreeHashCacheNotInitialized,
     CachedTreeHashError(cached_tree_hash::Error),
     InvalidValidatorPubkey(ssz::DecodeError),
+    ValidatorRegistryShrunk,
+    TreeHashCacheInconsistent,
 }
 
 /// Control whether an epoch-indexed field can be indexed at the next epoch or not.
@@ -81,39 +86,6 @@ impl AllowNextEpoch {
     }
 }
 
-#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
-pub struct BeaconTreeHashCache {
-    initialized: bool,
-    block_roots: TreeHashCache,
-    state_roots: TreeHashCache,
-    historical_roots: TreeHashCache,
-    validators: MultiTreeHashCache,
-    balances: TreeHashCache,
-    randao_mixes: TreeHashCache,
-    slashings: TreeHashCache,
-}
-
-impl BeaconTreeHashCache {
-    pub fn is_initialized(&self) -> bool {
-        self.initialized
-    }
-
-    /// Returns the approximate size of the cache in bytes.
-    ///
-    /// The size is approximate because we ignore some stack-allocated `u64` and `Vec` pointers.
-    /// We focus instead on the lists of hashes, which should massively outweigh the items that we
-    /// ignore.
-    pub fn approx_mem_size(&self) -> usize {
-        self.block_roots.approx_mem_size()
-            + self.state_roots.approx_mem_size()
-            + self.historical_roots.approx_mem_size()
-            + self.validators.approx_mem_size()
-            + self.balances.approx_mem_size()
-            + self.randao_mixes.approx_mem_size()
-            + self.slashings.approx_mem_size()
-    }
-}
-
 /// The state of the `BeaconChain` at some slot.
 ///
 /// Spec v0.9.1
@@ -127,11 +99,9 @@ impl BeaconTreeHashCache {
     Encode,
     Decode,
     TreeHash,
-    CachedTreeHash,
     CompareFields,
 )]
 #[serde(bound = "T: EthSpec")]
-#[cached_tree_hash(type = "BeaconTreeHashCache")]
 pub struct BeaconState<T>
 where
     T: EthSpec,
@@ -144,12 +114,9 @@ where
     // History
     pub latest_block_header: BeaconBlockHeader,
     #[compare_fields(as_slice)]
-    #[cached_tree_hash(block_roots)]
     pub block_roots: FixedVector<Hash256, T::SlotsPerHistoricalRoot>,
     #[compare_fields(as_slice)]
-    #[cached_tree_hash(state_roots)]
     pub state_roots: FixedVector<Hash256, T::SlotsPerHistoricalRoot>,
-    #[cached_tree_hash(historical_roots)]
     pub historical_roots: VariableList<Hash256, T::HistoricalRootsLimit>,
 
     // Ethereum 1.0 chain data
@@ -159,18 +126,14 @@ where
 
     // Registry
     #[compare_fields(as_slice)]
-    #[cached_tree_hash(validators)]
     pub validators: VariableList<Validator, T::ValidatorRegistryLimit>,
     #[compare_fields(as_slice)]
-    #[cached_tree_hash(balances)]
     pub balances: VariableList<u64, T::ValidatorRegistryLimit>,
 
     // Randomness
-    #[cached_tree_hash(randao_mixes)]
     pub randao_mixes: FixedVector<Hash256, T::EpochsPerHistoricalVector>,
 
     // Slashings
-    #[cached_tree_hash(slashings)]
     pub slashings: FixedVector<u64, T::EpochsPerSlashingsVector>,
 
     // Attestations
@@ -208,7 +171,7 @@ where
     #[ssz(skip_deserializing)]
     #[tree_hash(skip_hashing)]
     #[test_random(default)]
-    pub tree_hash_cache: BeaconTreeHashCache,
+    pub tree_hash_cache: Option<BeaconTreeHashCache>,
 }
 
 impl<T: EthSpec> BeaconState<T> {
@@ -263,7 +226,7 @@ impl<T: EthSpec> BeaconState<T> {
             ],
             pubkey_cache: PubkeyCache::default(),
             exit_cache: ExitCache::default(),
-            tree_hash_cache: BeaconTreeHashCache::default(),
+            tree_hash_cache: None,
         }
     }
 
@@ -928,8 +891,8 @@ impl<T: EthSpec> BeaconState<T> {
 
     /// Initialize but don't fill the tree hash cache, if it isn't already initialized.
     pub fn initialize_tree_hash_cache(&mut self) {
-        if !self.tree_hash_cache.initialized {
-            self.tree_hash_cache = Self::new_tree_hash_cache();
+        if self.tree_hash_cache.is_none() {
+            self.tree_hash_cache = Some(BeaconTreeHashCache::new(self))
         }
     }
 
@@ -940,7 +903,7 @@ impl<T: EthSpec> BeaconState<T> {
 
     /// Build the tree hash cache, with blatant disregard for any existing cache.
     pub fn force_build_tree_hash_cache(&mut self) -> Result<(), Error> {
-        self.tree_hash_cache.initialized = false;
+        self.tree_hash_cache = None;
         self.build_tree_hash_cache()
     }
 
@@ -950,16 +913,22 @@ impl<T: EthSpec> BeaconState<T> {
     pub fn update_tree_hash_cache(&mut self) -> Result<Hash256, Error> {
         self.initialize_tree_hash_cache();
 
-        let mut cache = std::mem::replace(&mut self.tree_hash_cache, <_>::default());
-        let result = self.recalculate_tree_hash_root(&mut cache);
-        std::mem::replace(&mut self.tree_hash_cache, cache);
+        let cache = self.tree_hash_cache.take();
 
-        Ok(result?)
+        if let Some(mut cache) = cache {
+            // Note: we return early if the tree hash fails, leaving `self.tree_hash_cache` as
+            // None. There's no need to keep a cache that fails.
+            let root = cache.recalculate_tree_hash_root(self)?;
+            self.tree_hash_cache = Some(cache);
+            Ok(root)
+        } else {
+            Err(Error::TreeHashCacheNotInitialized)
+        }
     }
 
     /// Completely drops the tree hash cache, replacing it with a new, empty cache.
     pub fn drop_tree_hash_cache(&mut self) {
-        self.tree_hash_cache = BeaconTreeHashCache::default();
+        self.tree_hash_cache = None;
     }
 
     /// Iterate through all validators and decompress their public key, unless it has already been
@@ -1008,7 +977,7 @@ impl<T: EthSpec> BeaconState<T> {
             ],
             pubkey_cache: PubkeyCache::default(),
             exit_cache: ExitCache::default(),
-            tree_hash_cache: BeaconTreeHashCache::default(),
+            tree_hash_cache: None,
         }
     }
 
@@ -1019,6 +988,24 @@ impl<T: EthSpec> BeaconState<T> {
     }
 }
 
+/// This implementation primarily exists to satisfy some testing requirements (ef_tests). It is
+/// recommended to use the methods directly on the beacon state instead.
+impl<T: EthSpec> CachedTreeHash<BeaconTreeHashCache> for BeaconState<T> {
+    fn new_tree_hash_cache(&self, _arena: &mut CacheArena) -> BeaconTreeHashCache {
+        BeaconTreeHashCache::new(self)
+    }
+
+    fn recalculate_tree_hash_root(
+        &self,
+        _arena: &mut CacheArena,
+        cache: &mut BeaconTreeHashCache,
+    ) -> Result<Hash256, cached_tree_hash::Error> {
+        cache
+            .recalculate_tree_hash_root(self)
+            .map_err(|_| cached_tree_hash::Error::CacheInconsistent)
+    }
+}
+
 impl From<RelativeEpochError> for Error {
     fn from(e: RelativeEpochError) -> Error {
         Error::RelativeEpochError(e)
diff --git a/eth2/types/src/beacon_state/tree_hash_cache.rs b/eth2/types/src/beacon_state/tree_hash_cache.rs
new file mode 100644
index 0000000000..a6271b7290
--- /dev/null
+++ b/eth2/types/src/beacon_state/tree_hash_cache.rs
@@ -0,0 +1,274 @@
+use super::Error;
+use crate::{BeaconState, EthSpec, Hash256, Unsigned, Validator};
+use cached_tree_hash::{int_log, CacheArena, CachedTreeHash, TreeHashCache};
+use rayon::prelude::*;
+use ssz_derive::{Decode, Encode};
+use tree_hash::{mix_in_length, TreeHash};
+
+/// The number of validator record tree hash caches stored in each arena.
+///
+/// This is primarily used for concurrency; if we have 16 validators and set `VALIDATORS_PER_ARENA
+/// == 8` then it is possible to do a 2-core concurrent hash.
+///
+/// Do not set to 0.
+const VALIDATORS_PER_ARENA: usize = 4_096;
+
+/// A cache that performs a caching tree hash of the entire `BeaconState` struct.
+#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
+pub struct BeaconTreeHashCache {
+    // Validators cache
+    validators: ValidatorsListTreeHashCache,
+    // Arenas
+    fixed_arena: CacheArena,
+    balances_arena: CacheArena,
+    slashings_arena: CacheArena,
+    // Caches
+    block_roots: TreeHashCache,
+    state_roots: TreeHashCache,
+    historical_roots: TreeHashCache,
+    balances: TreeHashCache,
+    randao_mixes: TreeHashCache,
+    slashings: TreeHashCache,
+}
+
+impl BeaconTreeHashCache {
+    /// Instantiates a new cache.
+    ///
+    /// Allocates the necessary memory to store all of the cached Merkle trees but does perform any
+    /// hashing.
+    pub fn new<T: EthSpec>(state: &BeaconState<T>) -> Self {
+        let mut fixed_arena = CacheArena::default();
+        let block_roots = state.block_roots.new_tree_hash_cache(&mut fixed_arena);
+        let state_roots = state.state_roots.new_tree_hash_cache(&mut fixed_arena);
+        let historical_roots = state.historical_roots.new_tree_hash_cache(&mut fixed_arena);
+        let randao_mixes = state.randao_mixes.new_tree_hash_cache(&mut fixed_arena);
+
+        let validators = ValidatorsListTreeHashCache::new::<T>(&state.validators[..]);
+
+        let mut balances_arena = CacheArena::default();
+        let balances = state.balances.new_tree_hash_cache(&mut balances_arena);
+
+        let mut slashings_arena = CacheArena::default();
+        let slashings = state.slashings.new_tree_hash_cache(&mut slashings_arena);
+
+        Self {
+            validators,
+            fixed_arena,
+            balances_arena,
+            slashings_arena,
+            block_roots,
+            state_roots,
+            historical_roots,
+            balances,
+            randao_mixes,
+            slashings,
+        }
+    }
+
+    /// Updates the cache and returns the tree hash root for the given `state`.
+    ///
+    /// The provided `state` should be a descendant of the last `state` given to this function, or
+    /// the `Self::new` function.
+    pub fn recalculate_tree_hash_root<T: EthSpec>(
+        &mut self,
+        state: &BeaconState<T>,
+    ) -> Result<Hash256, Error> {
+        let mut leaves = vec![];
+
+        leaves.append(&mut state.genesis_time.tree_hash_root());
+        leaves.append(&mut state.slot.tree_hash_root());
+        leaves.append(&mut state.fork.tree_hash_root());
+        leaves.append(&mut state.latest_block_header.tree_hash_root());
+        leaves.extend_from_slice(
+            state
+                .block_roots
+                .recalculate_tree_hash_root(&mut self.fixed_arena, &mut self.block_roots)?
+                .as_bytes(),
+        );
+        leaves.extend_from_slice(
+            state
+                .state_roots
+                .recalculate_tree_hash_root(&mut self.fixed_arena, &mut self.state_roots)?
+                .as_bytes(),
+        );
+        leaves.extend_from_slice(
+            state
+                .historical_roots
+                .recalculate_tree_hash_root(&mut self.fixed_arena, &mut self.historical_roots)?
+                .as_bytes(),
+        );
+        leaves.append(&mut state.eth1_data.tree_hash_root());
+        leaves.append(&mut state.eth1_data_votes.tree_hash_root());
+        leaves.append(&mut state.eth1_deposit_index.tree_hash_root());
+        leaves.extend_from_slice(
+            self.validators
+                .recalculate_tree_hash_root(&state.validators[..])?
+                .as_bytes(),
+        );
+        leaves.extend_from_slice(
+            state
+                .balances
+                .recalculate_tree_hash_root(&mut self.balances_arena, &mut self.balances)?
+                .as_bytes(),
+        );
+        leaves.extend_from_slice(
+            state
+                .randao_mixes
+                .recalculate_tree_hash_root(&mut self.fixed_arena, &mut self.randao_mixes)?
+                .as_bytes(),
+        );
+        leaves.extend_from_slice(
+            state
+                .slashings
+                .recalculate_tree_hash_root(&mut self.slashings_arena, &mut self.slashings)?
+                .as_bytes(),
+        );
+        leaves.append(&mut state.previous_epoch_attestations.tree_hash_root());
+        leaves.append(&mut state.current_epoch_attestations.tree_hash_root());
+        leaves.append(&mut state.justification_bits.tree_hash_root());
+        leaves.append(&mut state.previous_justified_checkpoint.tree_hash_root());
+        leaves.append(&mut state.current_justified_checkpoint.tree_hash_root());
+        leaves.append(&mut state.finalized_checkpoint.tree_hash_root());
+
+        Ok(Hash256::from_slice(&tree_hash::merkle_root(&leaves, 0)))
+    }
+}
+
+/// A specialized cache for computing the tree hash root of `state.validators`.
+#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
+struct ValidatorsListTreeHashCache {
+    list_arena: CacheArena,
+    list_cache: TreeHashCache,
+    values: ParallelValidatorTreeHash,
+}
+
+impl ValidatorsListTreeHashCache {
+    /// Instantiates a new cache.
+    ///
+    /// Allocates the necessary memory to store all of the cached Merkle trees but does perform any
+    /// hashing.
+    fn new<E: EthSpec>(validators: &[Validator]) -> Self {
+        let mut list_arena = CacheArena::default();
+        Self {
+            list_cache: TreeHashCache::new(
+                &mut list_arena,
+                int_log(E::ValidatorRegistryLimit::to_usize()),
+                validators.len(),
+            ),
+            list_arena,
+            values: ParallelValidatorTreeHash::new::<E>(validators),
+        }
+    }
+
+    /// Updates the cache and returns the tree hash root for the given `state`.
+    ///
+    /// This function makes assumptions that the `validators` list will only change in accordance
+    /// with valid per-block/per-slot state transitions.
+    fn recalculate_tree_hash_root(&mut self, validators: &[Validator]) -> Result<Hash256, Error> {
+        let mut list_arena = std::mem::replace(&mut self.list_arena, CacheArena::default());
+
+        let leaves = self
+            .values
+            .leaves(validators)?
+            .into_iter()
+            .flatten()
+            .map(|h| h.to_fixed_bytes())
+            .collect::<Vec<_>>();
+
+        let list_root = self
+            .list_cache
+            .recalculate_merkle_root(&mut list_arena, leaves.into_iter())?;
+
+        std::mem::replace(&mut self.list_arena, list_arena);
+
+        Ok(Hash256::from_slice(&mix_in_length(
+            list_root.as_bytes(),
+            validators.len(),
+        )))
+    }
+}
+
+/// Provides a cache for each of the `Validator` objects in `state.validators` and computes the
+/// roots of these using Rayon parallelization.
+#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
+pub struct ParallelValidatorTreeHash {
+    /// Each arena and its associated sub-trees.
+    arenas: Vec<(CacheArena, Vec<TreeHashCache>)>,
+}
+
+impl ParallelValidatorTreeHash {
+    /// Instantiates a new cache.
+    ///
+    /// Allocates the necessary memory to store all of the cached Merkle trees but does perform any
+    /// hashing.
+    fn new<E: EthSpec>(validators: &[Validator]) -> Self {
+        let num_arenas = (validators.len() + VALIDATORS_PER_ARENA - 1) / VALIDATORS_PER_ARENA;
+        let mut arenas = vec![(CacheArena::default(), vec![]); num_arenas];
+
+        validators.iter().enumerate().for_each(|(i, v)| {
+            let (arena, caches) = &mut arenas[i / VALIDATORS_PER_ARENA];
+            caches.push(v.new_tree_hash_cache(arena))
+        });
+
+        Self { arenas }
+    }
+
+    /// Returns the number of validators stored in self.
+    fn len(&self) -> usize {
+        self.arenas.last().map_or(0, |last| {
+            // Subtraction cannot underflow because `.last()` ensures the `.len() > 0`.
+            (self.arenas.len() - 1) * VALIDATORS_PER_ARENA + last.1.len()
+        })
+    }
+
+    /// Updates the caches for each `Validator` in `validators` and returns a list that maps 1:1
+    /// with `validators` to the hash of each validator.
+    ///
+    /// This function makes assumptions that the `validators` list will only change in accordance
+    /// with valid per-block/per-slot state transitions.
+    fn leaves(&mut self, validators: &[Validator]) -> Result<Vec<Vec<Hash256>>, Error> {
+        if self.len() < validators.len() {
+            validators.iter().skip(self.len()).for_each(|v| {
+                if self
+                    .arenas
+                    .last()
+                    .map_or(true, |last| last.1.len() >= VALIDATORS_PER_ARENA)
+                {
+                    let mut arena = CacheArena::default();
+                    let cache = v.new_tree_hash_cache(&mut arena);
+                    self.arenas.push((arena, vec![cache]))
+                } else {
+                    let (arena, caches) = &mut self
+                        .arenas
+                        .last_mut()
+                        .expect("Cannot reach this block if arenas is empty.");
+                    caches.push(v.new_tree_hash_cache(arena))
+                }
+            })
+        } else if validators.len() < self.len() {
+            return Err(Error::ValidatorRegistryShrunk);
+        }
+
+        self.arenas
+            .par_iter_mut()
+            .enumerate()
+            .map(|(arena_index, (arena, caches))| {
+                caches
+                    .iter_mut()
+                    .enumerate()
+                    .map(move |(cache_index, cache)| {
+                        let val_index = (arena_index * VALIDATORS_PER_ARENA) + cache_index;
+
+                        let validator = validators
+                            .get(val_index)
+                            .ok_or_else(|| Error::TreeHashCacheInconsistent)?;
+
+                        validator
+                            .recalculate_tree_hash_root(arena, cache)
+                            .map_err(Error::CachedTreeHashError)
+                    })
+                    .collect()
+            })
+            .collect()
+    }
+}
diff --git a/eth2/types/src/lib.rs b/eth2/types/src/lib.rs
index a82d398f91..ca255dad25 100644
--- a/eth2/types/src/lib.rs
+++ b/eth2/types/src/lib.rs
@@ -47,7 +47,7 @@ pub use crate::beacon_block::BeaconBlock;
 pub use crate::beacon_block_body::BeaconBlockBody;
 pub use crate::beacon_block_header::BeaconBlockHeader;
 pub use crate::beacon_committee::{BeaconCommittee, OwnedBeaconCommittee};
-pub use crate::beacon_state::{Error as BeaconStateError, *};
+pub use crate::beacon_state::{BeaconTreeHashCache, Error as BeaconStateError, *};
 pub use crate::chain_spec::{ChainSpec, Domain, YamlConfig};
 pub use crate::checkpoint::Checkpoint;
 pub use crate::deposit::{Deposit, DEPOSIT_TREE_DEPTH};
diff --git a/eth2/types/src/tree_hash_impls.rs b/eth2/types/src/tree_hash_impls.rs
index 2d652c475c..bc2be51e52 100644
--- a/eth2/types/src/tree_hash_impls.rs
+++ b/eth2/types/src/tree_hash_impls.rs
@@ -2,40 +2,38 @@
 //!
 //! It makes some assumptions about the layouts and update patterns of other structs in this
 //! crate, and should be updated carefully whenever those structs are changed.
-use crate::{Hash256, Validator};
-use cached_tree_hash::{int_log, CachedTreeHash, Error, TreeHashCache};
+use crate::{Epoch, Hash256, Validator};
+use cached_tree_hash::{int_log, CacheArena, CachedTreeHash, Error, TreeHashCache};
+use int_to_bytes::int_to_fixed_bytes32;
 use tree_hash::TreeHash;
 
 /// Number of struct fields on `Validator`.
 const NUM_VALIDATOR_FIELDS: usize = 8;
 
 impl CachedTreeHash<TreeHashCache> for Validator {
-    fn new_tree_hash_cache() -> TreeHashCache {
-        TreeHashCache::new(int_log(NUM_VALIDATOR_FIELDS))
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> TreeHashCache {
+        TreeHashCache::new(arena, int_log(NUM_VALIDATOR_FIELDS), NUM_VALIDATOR_FIELDS)
     }
 
     /// Efficiently tree hash a `Validator`, assuming it was updated by a valid state transition.
     ///
     /// Specifically, we assume that the `pubkey` and `withdrawal_credentials` fields are constant.
-    fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
-        // If the cache is empty, hash every field to fill it.
-        if cache.leaves().is_empty() {
-            return cache.recalculate_merkle_root(field_tree_hash_iter(self));
-        }
-
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut TreeHashCache,
+    ) -> Result<Hash256, Error> {
         // Otherwise just check the fields which might have changed.
         let dirty_indices = cache
             .leaves()
-            .iter_mut()
+            .iter_mut(arena)?
             .enumerate()
             .flat_map(|(i, leaf)| {
                 // Fields pubkey and withdrawal_credentials are constant
-                if i == 0 || i == 1 {
+                if (i == 0 || i == 1) && cache.initialized {
                     None
                 } else {
-                    let new_tree_hash = field_tree_hash_by_index(self, i);
-                    if leaf.as_bytes() != &new_tree_hash[..] {
-                        leaf.assign_from_slice(&new_tree_hash);
+                    if process_field_by_index(self, i, leaf, !cache.initialized) {
                         Some(i)
                     } else {
                         None
@@ -44,21 +42,25 @@ impl CachedTreeHash<TreeHashCache> for Validator {
             })
             .collect();
 
-        cache.update_merkle_root(dirty_indices)
+        cache.update_merkle_root(arena, dirty_indices)
     }
 }
 
-/// Get the tree hash root of a validator field by its position/index in the struct.
-fn field_tree_hash_by_index(v: &Validator, field_idx: usize) -> Vec<u8> {
+fn process_field_by_index(
+    v: &Validator,
+    field_idx: usize,
+    leaf: &mut Hash256,
+    force_update: bool,
+) -> bool {
     match field_idx {
-        0 => v.pubkey.tree_hash_root(),
-        1 => v.withdrawal_credentials.tree_hash_root(),
-        2 => v.effective_balance.tree_hash_root(),
-        3 => v.slashed.tree_hash_root(),
-        4 => v.activation_eligibility_epoch.tree_hash_root(),
-        5 => v.activation_epoch.tree_hash_root(),
-        6 => v.exit_epoch.tree_hash_root(),
-        7 => v.withdrawable_epoch.tree_hash_root(),
+        0 => process_vec_field(v.pubkey.tree_hash_root(), leaf, force_update),
+        1 => process_slice_field(v.withdrawal_credentials.as_bytes(), leaf, force_update),
+        2 => process_u64_field(v.effective_balance, leaf, force_update),
+        3 => process_bool_field(v.slashed, leaf, force_update),
+        4 => process_epoch_field(v.activation_eligibility_epoch, leaf, force_update),
+        5 => process_epoch_field(v.activation_epoch, leaf, force_update),
+        6 => process_epoch_field(v.exit_epoch, leaf, force_update),
+        7 => process_epoch_field(v.withdrawable_epoch, leaf, force_update),
         _ => panic!(
             "Validator type only has {} fields, {} out of bounds",
             NUM_VALIDATOR_FIELDS, field_idx
@@ -66,17 +68,35 @@ fn field_tree_hash_by_index(v: &Validator, field_idx: usize) -> Vec<u8> {
     }
 }
 
-/// Iterator over the tree hash roots of `Validator` fields.
-fn field_tree_hash_iter<'a>(
-    v: &'a Validator,
-) -> impl Iterator<Item = [u8; 32]> + ExactSizeIterator + 'a {
-    (0..NUM_VALIDATOR_FIELDS)
-        .map(move |i| field_tree_hash_by_index(v, i))
-        .map(|tree_hash_root| {
-            let mut res = [0; 32];
-            res.copy_from_slice(&tree_hash_root[0..32]);
-            res
-        })
+fn process_vec_field(new_tree_hash: Vec<u8>, leaf: &mut Hash256, force_update: bool) -> bool {
+    if force_update || leaf.as_bytes() != &new_tree_hash[..] {
+        leaf.assign_from_slice(&new_tree_hash);
+        true
+    } else {
+        false
+    }
+}
+
+fn process_slice_field(new_tree_hash: &[u8], leaf: &mut Hash256, force_update: bool) -> bool {
+    if force_update || leaf.as_bytes() != new_tree_hash {
+        leaf.assign_from_slice(&new_tree_hash);
+        true
+    } else {
+        false
+    }
+}
+
+fn process_u64_field(val: u64, leaf: &mut Hash256, force_update: bool) -> bool {
+    let new_tree_hash = int_to_fixed_bytes32(val);
+    process_slice_field(&new_tree_hash[..], leaf, force_update)
+}
+
+fn process_epoch_field(val: Epoch, leaf: &mut Hash256, force_update: bool) -> bool {
+    process_u64_field(val.as_u64(), leaf, force_update)
+}
+
+fn process_bool_field(val: bool, leaf: &mut Hash256, force_update: bool) -> bool {
+    process_u64_field(val as u64, leaf, force_update)
 }
 
 #[cfg(test)]
@@ -88,18 +108,24 @@ mod test {
     use rand_xorshift::XorShiftRng;
 
     fn test_validator_tree_hash(v: &Validator) {
-        let mut cache = Validator::new_tree_hash_cache();
+        let arena = &mut CacheArena::default();
+
+        let mut cache = v.new_tree_hash_cache(arena);
         // With a fresh cache
         assert_eq!(
             &v.tree_hash_root()[..],
-            v.recalculate_tree_hash_root(&mut cache).unwrap().as_bytes(),
+            v.recalculate_tree_hash_root(arena, &mut cache)
+                .unwrap()
+                .as_bytes(),
             "{:?}",
             v
         );
         // With a completely up-to-date cache
         assert_eq!(
             &v.tree_hash_root()[..],
-            v.recalculate_tree_hash_root(&mut cache).unwrap().as_bytes(),
+            v.recalculate_tree_hash_root(arena, &mut cache)
+                .unwrap()
+                .as_bytes(),
             "{:?}",
             v
         );
diff --git a/eth2/utils/cached_tree_hash/src/cache.rs b/eth2/utils/cached_tree_hash/src/cache.rs
index 002b4f9b0b..393cb112d2 100644
--- a/eth2/utils/cached_tree_hash/src/cache.rs
+++ b/eth2/utils/cached_tree_hash/src/cache.rs
@@ -1,46 +1,71 @@
+use crate::cache_arena;
 use crate::{Error, Hash256};
-use eth2_hashing::{hash_concat, ZERO_HASHES};
+use eth2_hashing::{hash32_concat, ZERO_HASHES};
 use ssz_derive::{Decode, Encode};
 use tree_hash::BYTES_PER_CHUNK;
 
+type CacheArena = cache_arena::CacheArena<Hash256>;
+type CacheArenaAllocation = cache_arena::CacheArenaAllocation<Hash256>;
+
 /// Sparse Merkle tree suitable for tree hashing vectors and lists.
 #[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
 pub struct TreeHashCache {
+    pub initialized: bool,
     /// Depth is such that the tree has a capacity for 2^depth leaves
     depth: usize,
     /// Sparse layers.
     ///
     /// The leaves are contained in `self.layers[self.depth]`, and each other layer `i`
     /// contains the parents of the nodes in layer `i + 1`.
-    layers: Vec<Vec<Hash256>>,
+    layers: Vec<CacheArenaAllocation>,
 }
 
 impl TreeHashCache {
-    /// Create a new cache with the given `depth`, but no actual content.
-    pub fn new(depth: usize) -> Self {
+    /// Create a new cache with the given `depth` with enough nodes allocated to suit `leaves`. All
+    /// leaves are set to `Hash256::zero()`.
+    pub fn new(arena: &mut CacheArena, depth: usize, leaves: usize) -> Self {
+        // TODO: what about when leaves is zero?
+        let layers = (0..=depth)
+            .map(|i| {
+                let vec = arena.alloc();
+                vec.extend_with_vec(
+                    arena,
+                    vec![Hash256::zero(); nodes_per_layer(i, depth, leaves)],
+                )
+                .expect(
+                    "A newly allocated sub-arena cannot fail unless it has reached max capacity",
+                );
+
+                vec
+            })
+            .collect();
+
         TreeHashCache {
+            initialized: false,
             depth,
-            layers: vec![vec![]; depth + 1],
+            layers,
         }
     }
 
     /// Compute the updated Merkle root for the given `leaves`.
     pub fn recalculate_merkle_root(
         &mut self,
+        arena: &mut CacheArena,
         leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
     ) -> Result<Hash256, Error> {
-        let dirty_indices = self.update_leaves(leaves)?;
-        self.update_merkle_root(dirty_indices)
+        let dirty_indices = self.update_leaves(arena, leaves)?;
+        self.update_merkle_root(arena, dirty_indices)
     }
 
     /// Phase 1 of the algorithm: compute the indices of all dirty leaves.
     pub fn update_leaves(
         &mut self,
+        arena: &mut CacheArena,
         mut leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
     ) -> Result<Vec<usize>, Error> {
         let new_leaf_count = leaves.len();
 
-        if new_leaf_count < self.leaves().len() {
+        if new_leaf_count < self.leaves().len(arena)? {
             return Err(Error::CannotShrink);
         } else if new_leaf_count > 2usize.pow(self.depth as u32) {
             return Err(Error::TooManyLeaves);
@@ -49,11 +74,11 @@ impl TreeHashCache {
         // Update the existing leaves
         let mut dirty = self
             .leaves()
-            .iter_mut()
+            .iter_mut(arena)?
             .enumerate()
             .zip(&mut leaves)
             .flat_map(|((i, leaf), new_leaf)| {
-                if leaf.as_bytes() != new_leaf {
+                if !self.initialized || leaf.as_bytes() != new_leaf {
                     leaf.assign_from_slice(&new_leaf);
                     Some(i)
                 } else {
@@ -63,9 +88,9 @@ impl TreeHashCache {
             .collect::<Vec<_>>();
 
         // Push the rest of the new leaves (if any)
-        dirty.extend(self.leaves().len()..new_leaf_count);
+        dirty.extend(self.leaves().len(arena)?..new_leaf_count);
         self.leaves()
-            .extend(leaves.map(|l| Hash256::from_slice(&l)));
+            .extend_with_vec(arena, leaves.map(|l| Hash256::from_slice(&l)).collect())?;
 
         Ok(dirty)
     }
@@ -73,9 +98,13 @@ impl TreeHashCache {
     /// Phase 2: propagate changes upwards from the leaves of the tree, and compute the root.
     ///
     /// Returns an error if `dirty_indices` is inconsistent with the cache.
-    pub fn update_merkle_root(&mut self, mut dirty_indices: Vec<usize>) -> Result<Hash256, Error> {
+    pub fn update_merkle_root(
+        &mut self,
+        arena: &mut CacheArena,
+        mut dirty_indices: Vec<usize>,
+    ) -> Result<Hash256, Error> {
         if dirty_indices.is_empty() {
-            return Ok(self.root());
+            return Ok(self.root(arena));
         }
 
         let mut depth = self.depth;
@@ -87,24 +116,26 @@ impl TreeHashCache {
                 let left_idx = 2 * idx;
                 let right_idx = left_idx + 1;
 
-                let left = self.layers[depth][left_idx];
+                let left = self.layers[depth]
+                    .get(arena, left_idx)?
+                    .ok_or_else(|| Error::MissingLeftIdx(left_idx))?;
                 let right = self.layers[depth]
-                    .get(right_idx)
+                    .get(arena, right_idx)?
                     .copied()
                     .unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth - depth]));
 
-                let new_hash = hash_concat(left.as_bytes(), right.as_bytes());
+                let new_hash = hash32_concat(left.as_bytes(), right.as_bytes());
 
-                match self.layers[depth - 1].get_mut(idx) {
+                match self.layers[depth - 1].get_mut(arena, idx)? {
                     Some(hash) => {
                         hash.assign_from_slice(&new_hash);
                     }
                     None => {
                         // Parent layer should already contain nodes for all non-dirty indices
-                        if idx != self.layers[depth - 1].len() {
+                        if idx != self.layers[depth - 1].len(arena)? {
                             return Err(Error::CacheInconsistent);
                         }
-                        self.layers[depth - 1].push(Hash256::from_slice(&new_hash));
+                        self.layers[depth - 1].push(arena, Hash256::from_slice(&new_hash))?;
                     }
                 }
             }
@@ -113,29 +144,23 @@ impl TreeHashCache {
             depth -= 1;
         }
 
-        Ok(self.root())
+        self.initialized = true;
+
+        Ok(self.root(arena))
     }
 
     /// Get the root of this cache, without doing any updates/computation.
-    pub fn root(&self) -> Hash256 {
+    pub fn root(&self, arena: &CacheArena) -> Hash256 {
         self.layers[0]
-            .get(0)
+            .get(arena, 0)
+            .expect("cached tree should have a root layer")
             .copied()
             .unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth]))
     }
 
-    pub fn leaves(&mut self) -> &mut Vec<Hash256> {
+    pub fn leaves(&mut self) -> &mut CacheArenaAllocation {
         &mut self.layers[self.depth]
     }
-
-    /// Returns the approximate size of the cache in bytes.
-    ///
-    /// The size is approximate because we ignore some stack-allocated `u64` and `Vec` pointers.
-    /// We focus instead on the lists of hashes, which should massively outweigh the items that we
-    /// ignore.
-    pub fn approx_mem_size(&self) -> usize {
-        self.layers.iter().map(|layer| layer.len() * 32).sum()
-    }
 }
 
 /// Compute the dirty indices for one layer up.
@@ -144,3 +169,52 @@ fn lift_dirty(dirty_indices: &[usize]) -> Vec<usize> {
     new_dirty.dedup();
     new_dirty
 }
+
+/// Returns the number of nodes that should be at each layer of a tree with the given `depth` and
+/// number of `leaves`.
+///
+/// Note: the top-most layer is `0` and a tree that has 8 leaves (4 layers) has a depth of 3 (_not_
+/// a depth of 4).
+///
+/// ## Example
+///
+/// Consider the following tree that has `depth = 3` and `leaves = 5`.
+///
+///```ignore
+/// 0        o      <-- height 0 has 1 node
+///        /   \
+/// 1    o      o   <-- height 1 has 2 nodes
+///     / \    /
+/// 2  o   o   o    <-- height 2 has 3 nodes
+///   /\   /\ /
+/// 3 o o o o o     <-- height 3 have 5 nodes
+/// ```
+fn nodes_per_layer(layer: usize, depth: usize, leaves: usize) -> usize {
+    if layer == depth {
+        leaves
+    } else {
+        let leaves_per_node = 1 << (depth - layer);
+        (leaves + leaves_per_node - 1) / leaves_per_node
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_node_per_layer_unbalanced_tree() {
+        assert_eq!(nodes_per_layer(0, 3, 5), 1);
+        assert_eq!(nodes_per_layer(1, 3, 5), 2);
+        assert_eq!(nodes_per_layer(2, 3, 5), 3);
+        assert_eq!(nodes_per_layer(3, 3, 5), 5);
+    }
+
+    #[test]
+    fn test_node_per_layer_balanced_tree() {
+        assert_eq!(nodes_per_layer(0, 3, 8), 1);
+        assert_eq!(nodes_per_layer(1, 3, 8), 2);
+        assert_eq!(nodes_per_layer(2, 3, 8), 4);
+        assert_eq!(nodes_per_layer(3, 3, 8), 8);
+    }
+}
diff --git a/eth2/utils/cached_tree_hash/src/cache_arena.rs b/eth2/utils/cached_tree_hash/src/cache_arena.rs
new file mode 100644
index 0000000000..272a9d2db6
--- /dev/null
+++ b/eth2/utils/cached_tree_hash/src/cache_arena.rs
@@ -0,0 +1,497 @@
+use ssz::{Decode, Encode};
+use ssz_derive::{Decode, Encode};
+use std::marker::PhantomData;
+use std::ops::Range;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Error {
+    UnknownAllocId(usize),
+    OffsetOverflow,
+    OffsetUnderflow,
+    RangeOverFlow,
+}
+
+/// Inspired by the `TypedArena` crate, the `CachedArena` provides a single contiguous memory
+/// allocation from which smaller allocations can be produced. In effect this allows for having
+/// many `Vec<T>`-like objects all stored contiguously on the heap with the aim of reducing memory
+/// fragmentation.
+///
+/// Because all of the allocations are stored in one big `Vec`, resizing any of the allocations
+/// will mean all items to the right of that allocation will be moved.
+#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
+pub struct CacheArena<T: Encode + Decode> {
+    /// The backing array, storing cached values.
+    backing: Vec<T>,
+    /// A list of offsets indicating the start of each allocation.
+    offsets: Vec<usize>,
+}
+
+impl<T: Encode + Decode> CacheArena<T> {
+    /// Produce an allocation of zero length at the end of the backing array.
+    pub fn alloc(&mut self) -> CacheArenaAllocation<T> {
+        let alloc_id = self.offsets.len();
+        self.offsets.push(self.backing.len());
+
+        CacheArenaAllocation {
+            alloc_id,
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Update `self.offsets` to reflect an allocation increasing in size.
+    fn grow(&mut self, alloc_id: usize, grow_by: usize) -> Result<(), Error> {
+        if alloc_id < self.offsets.len() {
+            self.offsets
+                .iter_mut()
+                .skip(alloc_id + 1)
+                .try_for_each(|offset| {
+                    *offset = offset
+                        .checked_add(grow_by)
+                        .ok_or_else(|| Error::OffsetOverflow)?;
+
+                    Ok(())
+                })
+        } else {
+            Err(Error::UnknownAllocId(alloc_id))
+        }
+    }
+
+    /// Update `self.offsets` to reflect an allocation decreasing in size.
+    fn shrink(&mut self, alloc_id: usize, shrink_by: usize) -> Result<(), Error> {
+        if alloc_id < self.offsets.len() {
+            self.offsets
+                .iter_mut()
+                .skip(alloc_id + 1)
+                .try_for_each(|offset| {
+                    *offset = offset
+                        .checked_sub(shrink_by)
+                        .ok_or_else(|| Error::OffsetUnderflow)?;
+
+                    Ok(())
+                })
+        } else {
+            Err(Error::UnknownAllocId(alloc_id))
+        }
+    }
+
+    /// Similar to `Vec::splice`, however the range is relative to some allocation (`alloc_id`) and
+    /// the replaced items are not returned (i.e., it is forgetful).
+    ///
+    /// To reiterate, the given `range` should be relative to the given `alloc_id`, not
+    /// `self.backing`. E.g., if the allocation has an offset of `20` and the range is `0..1`, then
+    /// the splice will translate to `self.backing[20..21]`.
+    fn splice_forgetful<I: IntoIterator<Item = T>>(
+        &mut self,
+        alloc_id: usize,
+        range: Range<usize>,
+        replace_with: I,
+    ) -> Result<(), Error> {
+        let offset = *self
+            .offsets
+            .get(alloc_id)
+            .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+        let start = range
+            .start
+            .checked_add(offset)
+            .ok_or_else(|| Error::RangeOverFlow)?;
+        let end = range
+            .end
+            .checked_add(offset)
+            .ok_or_else(|| Error::RangeOverFlow)?;
+
+        let prev_len = self.backing.len();
+
+        self.backing.splice(start..end, replace_with);
+
+        if prev_len < self.backing.len() {
+            self.grow(alloc_id, self.backing.len() - prev_len)?;
+        } else if prev_len > self.backing.len() {
+            self.shrink(alloc_id, prev_len - self.backing.len())?;
+        }
+
+        Ok(())
+    }
+
+    /// Returns the length of the specified allocation.
+    fn len(&self, alloc_id: usize) -> Result<usize, Error> {
+        let start = self
+            .offsets
+            .get(alloc_id)
+            .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+        let end = self
+            .offsets
+            .get(alloc_id + 1)
+            .copied()
+            .unwrap_or_else(|| self.backing.len());
+
+        Ok(end - start)
+    }
+
+    /// Get the value at position `i`, relative to the offset at `alloc_id`.
+    fn get(&self, alloc_id: usize, i: usize) -> Result<Option<&T>, Error> {
+        if i < self.len(alloc_id)? {
+            let offset = self
+                .offsets
+                .get(alloc_id)
+                .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+            Ok(self.backing.get(i + offset))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Mutably get the value at position `i`, relative to the offset at `alloc_id`.
+    fn get_mut(&mut self, alloc_id: usize, i: usize) -> Result<Option<&mut T>, Error> {
+        if i < self.len(alloc_id)? {
+            let offset = self
+                .offsets
+                .get(alloc_id)
+                .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+            Ok(self.backing.get_mut(i + offset))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Returns the range in `self.backing` that is occupied by some allocation.
+    fn range(&self, alloc_id: usize) -> Result<Range<usize>, Error> {
+        let start = *self
+            .offsets
+            .get(alloc_id)
+            .ok_or_else(|| Error::UnknownAllocId(alloc_id))?;
+        let end = self
+            .offsets
+            .get(alloc_id + 1)
+            .copied()
+            .unwrap_or_else(|| self.backing.len());
+
+        Ok(start..end)
+    }
+
+    /// Iterate through all values in some allocation.
+    fn iter(&self, alloc_id: usize) -> Result<impl Iterator<Item = &T>, Error> {
+        Ok(self.backing[self.range(alloc_id)?].iter())
+    }
+
+    /// Mutably iterate through all values in some allocation.
+    fn iter_mut(&mut self, alloc_id: usize) -> Result<impl Iterator<Item = &mut T>, Error> {
+        let range = self.range(alloc_id)?;
+        Ok(self.backing[range].iter_mut())
+    }
+
+    /// Returns the total number of items stored in the arena, the sum of all values in all
+    /// allocations.
+    pub fn backing_len(&self) -> usize {
+        self.backing.len()
+    }
+}
+
+/// An allocation from a `CacheArena` that behaves like a `Vec<T>`.
+///
+/// All functions will modify the given `arena` instead of `self`. As such, it is safe to have
+/// multiple instances of this allocation at once.
+///
+/// For all functions that accept a `CacheArena<T>` parameter, that arena should always be the one
+/// that created `Self`. I.e., do not mix-and-match allocations and arenas unless you _really_ know
+/// what you're doing (or want to have a bad time).
+#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
+pub struct CacheArenaAllocation<T> {
+    alloc_id: usize,
+    #[ssz(skip_serializing)]
+    #[ssz(skip_deserializing)]
+    _phantom: PhantomData<T>,
+}
+
+impl<T: Encode + Decode> CacheArenaAllocation<T> {
+    /// Grow the allocation in `arena`, appending `vec` to the current values.
+    pub fn extend_with_vec(&self, arena: &mut CacheArena<T>, vec: Vec<T>) -> Result<(), Error> {
+        let len = arena.len(self.alloc_id)?;
+        arena.splice_forgetful(self.alloc_id, len..len, vec)?;
+        Ok(())
+    }
+
+    /// Push `item` to the end of the current allocation in `arena`.
+    ///
+    /// An error is returned if this allocation is not known to the given `arena`.
+    pub fn push(&self, arena: &mut CacheArena<T>, item: T) -> Result<(), Error> {
+        let len = arena.len(self.alloc_id)?;
+        arena.splice_forgetful(self.alloc_id, len..len, vec![item])?;
+        Ok(())
+    }
+
+    /// Get the i'th item in the `arena` (relative to this allocation).
+    ///
+    /// An error is returned if this allocation is not known to the given `arena`.
+    pub fn get<'a>(&self, arena: &'a CacheArena<T>, i: usize) -> Result<Option<&'a T>, Error> {
+        arena.get(self.alloc_id, i)
+    }
+
+    /// Mutably get the i'th item in the `arena` (relative to this allocation).
+    ///
+    /// An error is returned if this allocation is not known to the given `arena`.
+    pub fn get_mut<'a>(
+        &self,
+        arena: &'a mut CacheArena<T>,
+        i: usize,
+    ) -> Result<Option<&'a mut T>, Error> {
+        arena.get_mut(self.alloc_id, i)
+    }
+
+    /// Iterate through all items in the `arena` (relative to this allocation).
+    pub fn iter<'a>(&self, arena: &'a CacheArena<T>) -> Result<impl Iterator<Item = &'a T>, Error> {
+        arena.iter(self.alloc_id)
+    }
+
+    /// Mutably iterate through all items in the `arena` (relative to this allocation).
+    pub fn iter_mut<'a>(
+        &self,
+        arena: &'a mut CacheArena<T>,
+    ) -> Result<impl Iterator<Item = &'a mut T>, Error> {
+        arena.iter_mut(self.alloc_id)
+    }
+
+    /// Return the number of items stored in this allocation.
+    pub fn len(&self, arena: &CacheArena<T>) -> Result<usize, Error> {
+        arena.len(self.alloc_id)
+    }
+
+    /// Returns true if this allocation is empty.
+    pub fn is_empty(&self, arena: &CacheArena<T>) -> Result<bool, Error> {
+        self.len(arena).map(|len| len == 0)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::Hash256;
+
+    type CacheArena = super::CacheArena<Hash256>;
+    type CacheArenaAllocation = super::CacheArenaAllocation<Hash256>;
+
+    fn hash(i: usize) -> Hash256 {
+        Hash256::from_low_u64_be(i as u64)
+    }
+
+    fn test_routine(arena: &mut CacheArena, sub: &mut CacheArenaAllocation) {
+        let mut len = sub.len(arena).expect("should exist");
+
+        sub.push(arena, hash(len)).expect("should push");
+        len += 1;
+
+        assert_eq!(
+            sub.len(arena).expect("should exist"),
+            len,
+            "after first push sub should have len {}",
+            len
+        );
+        assert_eq!(
+            sub.is_empty(arena).expect("should exist"),
+            false,
+            "new sub should not be empty"
+        );
+
+        sub.push(arena, hash(len)).expect("should push again");
+        len += 1;
+
+        assert_eq!(
+            sub.len(arena).expect("should exist"),
+            len,
+            "after second push sub should have len {}",
+            len
+        );
+
+        sub.extend_with_vec(arena, vec![hash(len), hash(len + 1)])
+            .expect("should extend with vec");
+        len += 2;
+
+        assert_eq!(
+            sub.len(arena).expect("should exist"),
+            len,
+            "after extend sub should have len {}",
+            len
+        );
+
+        let collected = sub
+            .iter(arena)
+            .expect("should get iter")
+            .cloned()
+            .collect::<Vec<_>>();
+        let collected_mut = sub
+            .iter_mut(arena)
+            .expect("should get mut iter")
+            .map(|v| *v)
+            .collect::<Vec<_>>();
+
+        for i in 0..len {
+            assert_eq!(
+                *sub.get(arena, i)
+                    .expect("should exist")
+                    .expect("should get sub index"),
+                hash(i),
+                "get({}) should be hash({})",
+                i,
+                i
+            );
+
+            assert_eq!(
+                collected[i],
+                hash(i),
+                "collected[{}] should be hash({})",
+                i,
+                i
+            );
+
+            assert_eq!(
+                collected_mut[i],
+                hash(i),
+                "collected_mut[{}] should be hash({})",
+                i,
+                i
+            );
+        }
+    }
+
+    #[test]
+    fn single() {
+        let arena = &mut CacheArena::default();
+
+        assert_eq!(arena.backing.len(), 0, "should start with an empty backing");
+        assert_eq!(arena.offsets.len(), 0, "should start without any offsets");
+
+        let mut sub = arena.alloc();
+
+        assert_eq!(
+            sub.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        test_routine(arena, &mut sub);
+    }
+
+    #[test]
+    fn double() {
+        let arena = &mut CacheArena::default();
+
+        assert_eq!(arena.backing.len(), 0, "should start with an empty backing");
+        assert_eq!(arena.offsets.len(), 0, "should start without any offsets");
+
+        let mut sub_01 = arena.alloc();
+        assert_eq!(
+            sub_01.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub_01.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        let mut sub_02 = arena.alloc();
+        assert_eq!(
+            sub_02.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub_02.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        test_routine(arena, &mut sub_01);
+        test_routine(arena, &mut sub_02);
+    }
+
+    #[test]
+    fn one_then_other() {
+        let arena = &mut CacheArena::default();
+
+        assert_eq!(arena.backing.len(), 0, "should start with an empty backing");
+        assert_eq!(arena.offsets.len(), 0, "should start without any offsets");
+
+        let mut sub_01 = arena.alloc();
+        assert_eq!(
+            sub_01.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub_01.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        test_routine(arena, &mut sub_01);
+
+        let mut sub_02 = arena.alloc();
+        assert_eq!(
+            sub_02.len(arena).expect("should exist"),
+            0,
+            "new sub should have len 0"
+        );
+        assert_eq!(
+            sub_02.is_empty(arena).expect("should exist"),
+            true,
+            "new sub should be empty"
+        );
+
+        test_routine(arena, &mut sub_02);
+        test_routine(arena, &mut sub_01);
+        test_routine(arena, &mut sub_02);
+    }
+
+    #[test]
+    fn many() {
+        let arena = &mut CacheArena::default();
+
+        assert_eq!(arena.backing.len(), 0, "should start with an empty backing");
+        assert_eq!(arena.offsets.len(), 0, "should start without any offsets");
+
+        let mut subs = vec![];
+
+        for i in 0..50 {
+            if i == 0 {
+                let sub = arena.alloc();
+                assert_eq!(
+                    sub.len(arena).expect("should exist"),
+                    0,
+                    "new sub should have len 0"
+                );
+                assert_eq!(
+                    sub.is_empty(arena).expect("should exist"),
+                    true,
+                    "new sub should be empty"
+                );
+                subs.push(sub);
+
+                continue;
+            } else if i % 2 == 0 {
+                test_routine(arena, &mut subs[i - 1]);
+            }
+
+            let sub = arena.alloc();
+            assert_eq!(
+                sub.len(arena).expect("should exist"),
+                0,
+                "new sub should have len 0"
+            );
+            assert_eq!(
+                sub.is_empty(arena).expect("should exist"),
+                true,
+                "new sub should be empty"
+            );
+            subs.push(sub);
+        }
+
+        for mut sub in subs.iter_mut() {
+            test_routine(arena, &mut sub);
+        }
+    }
+}
diff --git a/eth2/utils/cached_tree_hash/src/impls.rs b/eth2/utils/cached_tree_hash/src/impls.rs
index c5bc181205..36210db885 100644
--- a/eth2/utils/cached_tree_hash/src/impls.rs
+++ b/eth2/utils/cached_tree_hash/src/impls.rs
@@ -1,4 +1,4 @@
-use crate::{CachedTreeHash, Error, Hash256, TreeHashCache};
+use crate::{CacheArena, CachedTreeHash, Error, Hash256, TreeHashCache};
 use ssz_types::{typenum::Unsigned, FixedVector, VariableList};
 use std::mem::size_of;
 use tree_hash::{mix_in_length, BYTES_PER_CHUNK};
@@ -13,6 +13,17 @@ pub fn int_log(n: usize) -> usize {
     }
 }
 
+pub fn hash256_leaf_count(len: usize) -> usize {
+    len
+}
+
+pub fn u64_leaf_count(len: usize) -> usize {
+    let type_size = size_of::<u64>();
+    let vals_per_chunk = BYTES_PER_CHUNK / type_size;
+
+    (len + vals_per_chunk - 1) / vals_per_chunk
+}
+
 pub fn hash256_iter<'a>(
     values: &'a [Hash256],
 ) -> impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator + 'a {
@@ -36,35 +47,59 @@ pub fn u64_iter<'a>(
 }
 
 impl<N: Unsigned> CachedTreeHash<TreeHashCache> for FixedVector<Hash256, N> {
-    fn new_tree_hash_cache() -> TreeHashCache {
-        TreeHashCache::new(int_log(N::to_usize()))
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> TreeHashCache {
+        TreeHashCache::new(
+            arena,
+            int_log(N::to_usize()),
+            hash256_leaf_count(self.len()),
+        )
     }
 
-    fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
-        cache.recalculate_merkle_root(hash256_iter(&self))
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut TreeHashCache,
+    ) -> Result<Hash256, Error> {
+        cache.recalculate_merkle_root(arena, hash256_iter(&self))
     }
 }
 
 impl<N: Unsigned> CachedTreeHash<TreeHashCache> for FixedVector<u64, N> {
-    fn new_tree_hash_cache() -> TreeHashCache {
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> TreeHashCache {
         let vals_per_chunk = BYTES_PER_CHUNK / size_of::<u64>();
-        TreeHashCache::new(int_log(N::to_usize() / vals_per_chunk))
+        TreeHashCache::new(
+            arena,
+            int_log(N::to_usize() / vals_per_chunk),
+            u64_leaf_count(self.len()),
+        )
     }
 
-    fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
-        cache.recalculate_merkle_root(u64_iter(&self))
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut TreeHashCache,
+    ) -> Result<Hash256, Error> {
+        cache.recalculate_merkle_root(arena, u64_iter(&self))
     }
 }
 
 impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<Hash256, N> {
-    fn new_tree_hash_cache() -> TreeHashCache {
-        TreeHashCache::new(int_log(N::to_usize()))
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> TreeHashCache {
+        TreeHashCache::new(
+            arena,
+            int_log(N::to_usize()),
+            hash256_leaf_count(self.len()),
+        )
     }
 
-    fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut TreeHashCache,
+    ) -> Result<Hash256, Error> {
         Ok(Hash256::from_slice(&mix_in_length(
             cache
-                .recalculate_merkle_root(hash256_iter(&self))?
+                .recalculate_merkle_root(arena, hash256_iter(&self))?
                 .as_bytes(),
             self.len(),
         )))
@@ -72,14 +107,24 @@ impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<Hash256, N> {
 }
 
 impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<u64, N> {
-    fn new_tree_hash_cache() -> TreeHashCache {
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> TreeHashCache {
         let vals_per_chunk = BYTES_PER_CHUNK / size_of::<u64>();
-        TreeHashCache::new(int_log(N::to_usize() / vals_per_chunk))
+        TreeHashCache::new(
+            arena,
+            int_log(N::to_usize() / vals_per_chunk),
+            u64_leaf_count(self.len()),
+        )
     }
 
-    fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut TreeHashCache,
+    ) -> Result<Hash256, Error> {
         Ok(Hash256::from_slice(&mix_in_length(
-            cache.recalculate_merkle_root(u64_iter(&self))?.as_bytes(),
+            cache
+                .recalculate_merkle_root(arena, u64_iter(&self))?
+                .as_bytes(),
             self.len(),
         )))
     }
diff --git a/eth2/utils/cached_tree_hash/src/lib.rs b/eth2/utils/cached_tree_hash/src/lib.rs
index 135b07050f..1c483948e3 100644
--- a/eth2/utils/cached_tree_hash/src/lib.rs
+++ b/eth2/utils/cached_tree_hash/src/lib.rs
@@ -1,12 +1,13 @@
 mod cache;
+mod cache_arena;
 mod impls;
-mod multi_cache;
 #[cfg(test)]
 mod test;
 
+pub type CacheArena = cache_arena::CacheArena<Hash256>;
+
 pub use crate::cache::TreeHashCache;
 pub use crate::impls::int_log;
-pub use crate::multi_cache::MultiTreeHashCache;
 use ethereum_types::H256 as Hash256;
 use tree_hash::TreeHash;
 
@@ -19,13 +20,26 @@ pub enum Error {
     CannotShrink,
     /// Cache is inconsistent with the list of dirty indices provided.
     CacheInconsistent,
+    CacheArenaError(cache_arena::Error),
+    /// Unable to find left index in Merkle tree.
+    MissingLeftIdx(usize),
+}
+
+impl From<cache_arena::Error> for Error {
+    fn from(e: cache_arena::Error) -> Error {
+        Error::CacheArenaError(e)
+    }
 }
 
 /// Trait for types which can make use of a cache to accelerate calculation of their tree hash root.
 pub trait CachedTreeHash<Cache>: TreeHash {
     /// Create a new cache appropriate for use with values of this type.
-    fn new_tree_hash_cache() -> Cache;
+    fn new_tree_hash_cache(&self, arena: &mut CacheArena) -> Cache;
 
     /// Update the cache and use it to compute the tree hash root for `self`.
-    fn recalculate_tree_hash_root(&self, cache: &mut Cache) -> Result<Hash256, Error>;
+    fn recalculate_tree_hash_root(
+        &self,
+        arena: &mut CacheArena,
+        cache: &mut Cache,
+    ) -> Result<Hash256, Error>;
 }
diff --git a/eth2/utils/cached_tree_hash/src/multi_cache.rs b/eth2/utils/cached_tree_hash/src/multi_cache.rs
deleted file mode 100644
index 5ecdd3f4ab..0000000000
--- a/eth2/utils/cached_tree_hash/src/multi_cache.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-use crate::{int_log, CachedTreeHash, Error, Hash256, TreeHashCache};
-use ssz_derive::{Decode, Encode};
-use ssz_types::{typenum::Unsigned, VariableList};
-use tree_hash::mix_in_length;
-
-/// Multi-level tree hash cache.
-///
-/// Suitable for lists/vectors/containers holding values which themselves have caches.
-///
-/// Note: this cache could be made composable by replacing the hardcoded `Vec<TreeHashCache>` with
-/// `Vec<C>`, allowing arbitrary nesting, but for now we stick to 2-level nesting because that's all
-/// we need.
-#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
-pub struct MultiTreeHashCache {
-    list_cache: TreeHashCache,
-    value_caches: Vec<TreeHashCache>,
-}
-
-impl MultiTreeHashCache {
-    /// Returns the approximate size of the cache in bytes.
-    ///
-    /// The size is approximate because we ignore some stack-allocated `u64` and `Vec` pointers.
-    /// We focus instead on the lists of hashes, which should massively outweigh the items that we
-    /// ignore.
-    pub fn approx_mem_size(&self) -> usize {
-        self.list_cache.approx_mem_size()
-            + self
-                .value_caches
-                .iter()
-                .map(TreeHashCache::approx_mem_size)
-                .sum::<usize>()
-    }
-}
-
-impl<T, N> CachedTreeHash<MultiTreeHashCache> for VariableList<T, N>
-where
-    T: CachedTreeHash<TreeHashCache>,
-    N: Unsigned,
-{
-    fn new_tree_hash_cache() -> MultiTreeHashCache {
-        MultiTreeHashCache {
-            list_cache: TreeHashCache::new(int_log(N::to_usize())),
-            value_caches: vec![],
-        }
-    }
-
-    fn recalculate_tree_hash_root(&self, cache: &mut MultiTreeHashCache) -> Result<Hash256, Error> {
-        if self.len() < cache.value_caches.len() {
-            return Err(Error::CannotShrink);
-        }
-
-        // Resize the value caches to the size of the list.
-        cache
-            .value_caches
-            .resize(self.len(), T::new_tree_hash_cache());
-
-        // Update all individual value caches.
-        self.iter()
-            .zip(cache.value_caches.iter_mut())
-            .try_for_each(|(value, cache)| value.recalculate_tree_hash_root(cache).map(|_| ()))?;
-
-        // Pipe the value roots into the list cache, then mix in the length.
-        // Note: it's possible to avoid this 2nd iteration (or an allocation) by using
-        // `itertools::process_results`, but it requires removing the `ExactSizeIterator`
-        // bound from `recalculate_merkle_root`, and only saves about 5% in benchmarks.
-        let list_root = cache.list_cache.recalculate_merkle_root(
-            cache
-                .value_caches
-                .iter()
-                .map(|value_cache| value_cache.root().to_fixed_bytes()),
-        )?;
-
-        Ok(Hash256::from_slice(&mix_in_length(
-            list_root.as_bytes(),
-            self.len(),
-        )))
-    }
-}
diff --git a/eth2/utils/cached_tree_hash/src/test.rs b/eth2/utils/cached_tree_hash/src/test.rs
index 0e3679d9fd..ffd47e2fd9 100644
--- a/eth2/utils/cached_tree_hash/src/test.rs
+++ b/eth2/utils/cached_tree_hash/src/test.rs
@@ -1,5 +1,5 @@
 use crate::impls::hash256_iter;
-use crate::{CachedTreeHash, Error, Hash256, TreeHashCache};
+use crate::{CacheArena, CachedTreeHash, Error, Hash256, TreeHashCache};
 use eth2_hashing::ZERO_HASHES;
 use quickcheck_macros::quickcheck;
 use ssz_types::{
@@ -18,46 +18,49 @@ type Vector16u64 = FixedVector<u64, U16>;
 
 #[test]
 fn max_leaves() {
+    let arena = &mut CacheArena::default();
     let depth = 4;
     let max_len = 2u64.pow(depth as u32);
-    let mut cache = TreeHashCache::new(depth);
+    let mut cache = TreeHashCache::new(arena, depth, 2);
     assert!(cache
-        .recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len - 1)))
+        .recalculate_merkle_root(arena, hash256_iter(&int_hashes(0, max_len - 1)))
         .is_ok());
     assert!(cache
-        .recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len)))
+        .recalculate_merkle_root(arena, hash256_iter(&int_hashes(0, max_len)))
         .is_ok());
     assert_eq!(
-        cache.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len + 1))),
+        cache.recalculate_merkle_root(arena, hash256_iter(&int_hashes(0, max_len + 1))),
         Err(Error::TooManyLeaves)
     );
     assert_eq!(
-        cache.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len * 2))),
+        cache.recalculate_merkle_root(arena, hash256_iter(&int_hashes(0, max_len * 2))),
         Err(Error::TooManyLeaves)
     );
 }
 
 #[test]
 fn cannot_shrink() {
+    let arena = &mut CacheArena::default();
     let init_len = 12;
     let list1 = List16::new(int_hashes(0, init_len)).unwrap();
     let list2 = List16::new(int_hashes(0, init_len - 1)).unwrap();
 
-    let mut cache = List16::new_tree_hash_cache();
-    assert!(list1.recalculate_tree_hash_root(&mut cache).is_ok());
+    let mut cache = list1.new_tree_hash_cache(arena);
+    assert!(list1.recalculate_tree_hash_root(arena, &mut cache).is_ok());
     assert_eq!(
-        list2.recalculate_tree_hash_root(&mut cache),
+        list2.recalculate_tree_hash_root(arena, &mut cache),
         Err(Error::CannotShrink)
     );
 }
 
 #[test]
 fn empty_leaves() {
+    let arena = &mut CacheArena::default();
     let depth = 20;
-    let mut cache = TreeHashCache::new(depth);
+    let mut cache = TreeHashCache::new(arena, depth, 0);
     assert_eq!(
         cache
-            .recalculate_merkle_root(vec![].into_iter())
+            .recalculate_merkle_root(arena, vec![].into_iter())
             .unwrap()
             .as_bytes(),
         &ZERO_HASHES[depth][..]
@@ -66,40 +69,43 @@ fn empty_leaves() {
 
 #[test]
 fn fixed_vector_hash256() {
+    let arena = &mut CacheArena::default();
     let len = 16;
     let vec = Vector16::new(int_hashes(0, len)).unwrap();
 
-    let mut cache = Vector16::new_tree_hash_cache();
+    let mut cache = vec.new_tree_hash_cache(arena);
 
     assert_eq!(
         Hash256::from_slice(&vec.tree_hash_root()),
-        vec.recalculate_tree_hash_root(&mut cache).unwrap()
+        vec.recalculate_tree_hash_root(arena, &mut cache).unwrap()
     );
 }
 
 #[test]
 fn fixed_vector_u64() {
+    let arena = &mut CacheArena::default();
     let len = 16;
     let vec = Vector16u64::new((0..len).collect()).unwrap();
 
-    let mut cache = Vector16u64::new_tree_hash_cache();
+    let mut cache = vec.new_tree_hash_cache(arena);
 
     assert_eq!(
         Hash256::from_slice(&vec.tree_hash_root()),
-        vec.recalculate_tree_hash_root(&mut cache).unwrap()
+        vec.recalculate_tree_hash_root(arena, &mut cache).unwrap()
     );
 }
 
 #[test]
 fn variable_list_hash256() {
+    let arena = &mut CacheArena::default();
     let len = 13;
     let list = List16::new(int_hashes(0, len)).unwrap();
 
-    let mut cache = List16::new_tree_hash_cache();
+    let mut cache = list.new_tree_hash_cache(arena);
 
     assert_eq!(
         Hash256::from_slice(&list.tree_hash_root()),
-        list.recalculate_tree_hash_root(&mut cache).unwrap()
+        list.recalculate_tree_hash_root(arena, &mut cache).unwrap()
     );
 }
 
@@ -119,6 +125,7 @@ fn quickcheck_variable_list_h256_257(leaves_and_skips: Vec<(u64, bool)>) -> bool
 }
 
 fn variable_list_h256_test<Len: Unsigned>(leaves_and_skips: Vec<(u64, bool)>) -> bool {
+    let arena = &mut CacheArena::default();
     let leaves: Vec<_> = leaves_and_skips
         .iter()
         .map(|(l, _)| Hash256::from_low_u64_be(*l))
@@ -126,14 +133,15 @@ fn variable_list_h256_test<Len: Unsigned>(leaves_and_skips: Vec<(u64, bool)>) ->
         .collect();
 
     let mut list: VariableList<Hash256, Len>;
-    let mut cache = VariableList::<Hash256, Len>::new_tree_hash_cache();
+    let init: VariableList<Hash256, Len> = VariableList::new(vec![]).unwrap();
+    let mut cache = init.new_tree_hash_cache(arena);
 
     for (end, (_, update_cache)) in leaves_and_skips.into_iter().enumerate() {
         list = VariableList::new(leaves[..end].to_vec()).unwrap();
 
         if update_cache
             && list
-                .recalculate_tree_hash_root(&mut cache)
+                .recalculate_tree_hash_root(arena, &mut cache)
                 .unwrap()
                 .as_bytes()
                 != &list.tree_hash_root()[..]
diff --git a/eth2/utils/eth2_hashing/src/lib.rs b/eth2/utils/eth2_hashing/src/lib.rs
index 555c5bbe30..1a0b752b18 100644
--- a/eth2/utils/eth2_hashing/src/lib.rs
+++ b/eth2/utils/eth2_hashing/src/lib.rs
@@ -5,7 +5,7 @@
 //! defining it once in this crate makes it easy to replace.
 
 #[cfg(not(target_arch = "wasm32"))]
-use ring::digest::{digest, SHA256};
+use ring::digest::{digest, Context, SHA256};
 
 #[cfg(target_arch = "wasm32")]
 use sha2::{Digest, Sha256};
@@ -27,10 +27,35 @@ pub fn hash(input: &[u8]) -> Vec<u8> {
 }
 
 /// Compute the hash of two slices concatenated.
-pub fn hash_concat(h1: &[u8], h2: &[u8]) -> Vec<u8> {
-    let mut vec1 = h1.to_vec();
-    vec1.extend_from_slice(h2);
-    hash(&vec1)
+///
+/// # Panics
+///
+/// Will panic if either `h1` or `h2` are not 32 bytes in length.
+#[cfg(not(target_arch = "wasm32"))]
+pub fn hash32_concat(h1: &[u8], h2: &[u8]) -> [u8; 32] {
+    let mut context = Context::new(&SHA256);
+    context.update(h1);
+    context.update(h2);
+
+    let mut output = [0; 32];
+    output[..].copy_from_slice(context.finish().as_ref());
+    output
+}
+
+/// Compute the hash of two slices concatenated.
+///
+/// # Panics
+///
+/// Will panic if either `h1` or `h2` are not 32 bytes in length.
+#[cfg(target_arch = "wasm32")]
+pub fn hash32_concat(h1: &[u8], h2: &[u8]) -> [u8; 32] {
+    let mut preimage = [0; 64];
+    preimage[0..32].copy_from_slice(h1);
+    preimage[32..64].copy_from_slice(h2);
+
+    let mut output = [0; 32];
+    output[..].copy_from_slice(&hash(&preimage));
+    output
 }
 
 /// The max index that can be used with `ZERO_HASHES`.
@@ -44,7 +69,7 @@ lazy_static! {
         let mut hashes = vec![vec![0; 32]; ZERO_HASHES_MAX_INDEX + 1];
 
         for i in 0..ZERO_HASHES_MAX_INDEX {
-            hashes[i + 1] = hash_concat(&hashes[i], &hashes[i]);
+            hashes[i + 1] = hash32_concat(&hashes[i], &hashes[i])[..].to_vec();
         }
 
         hashes
diff --git a/eth2/utils/int_to_bytes/src/lib.rs b/eth2/utils/int_to_bytes/src/lib.rs
index 537d7b5c84..7b89e4192e 100644
--- a/eth2/utils/int_to_bytes/src/lib.rs
+++ b/eth2/utils/int_to_bytes/src/lib.rs
@@ -53,6 +53,14 @@ pub fn int_to_bytes32(int: u64) -> Vec<u8> {
     bytes.to_vec()
 }
 
+/// Returns `int` as little-endian bytes with a length of 32.
+pub fn int_to_fixed_bytes32(int: u64) -> [u8; 32] {
+    let mut bytes = [0; 32];
+    let int_bytes = int.to_le_bytes();
+    bytes[0..int_bytes.len()].copy_from_slice(&int_bytes);
+    bytes
+}
+
 /// Returns `int` as little-endian bytes with a length of 48.
 pub fn int_to_bytes48(int: u64) -> Vec<u8> {
     let mut bytes = BytesMut::with_capacity(48);
@@ -76,6 +84,13 @@ mod tests {
     use std::{fs::File, io::prelude::*, path::PathBuf};
     use yaml_rust::yaml;
 
+    #[test]
+    fn fixed_bytes32() {
+        for x in &[0, 1, 3, 256, 1024, 2943784] {
+            assert_eq!(&int_to_bytes32(*x), &int_to_fixed_bytes32(*x));
+        }
+    }
+
     #[test]
     fn int_to_bytes3_returns_none() {
         assert_eq!(int_to_bytes3(2_u32.pow(24)), None);
diff --git a/eth2/utils/merkle_proof/src/lib.rs b/eth2/utils/merkle_proof/src/lib.rs
index 1ed9d8070a..64f744be80 100644
--- a/eth2/utils/merkle_proof/src/lib.rs
+++ b/eth2/utils/merkle_proof/src/lib.rs
@@ -1,4 +1,4 @@
-use eth2_hashing::{hash, hash_concat, ZERO_HASHES};
+use eth2_hashing::{hash, hash32_concat, ZERO_HASHES};
 use ethereum_types::H256;
 use lazy_static::lazy_static;
 
@@ -65,7 +65,7 @@ impl MerkleTree {
 
                 let left_subtree = MerkleTree::create(left_leaves, depth - 1);
                 let right_subtree = MerkleTree::create(right_leaves, depth - 1);
-                let hash = H256::from_slice(&hash_concat(
+                let hash = H256::from_slice(&hash32_concat(
                     left_subtree.hash().as_bytes(),
                     right_subtree.hash().as_bytes(),
                 ));
@@ -124,7 +124,7 @@ impl MerkleTree {
                     // All other possibilities are invalid MerkleTrees
                     (_, _) => return Err(MerkleTreeError::Invalid),
                 };
-                hash.assign_from_slice(&hash_concat(
+                hash.assign_from_slice(&hash32_concat(
                     left.hash().as_bytes(),
                     right.hash().as_bytes(),
                 ));
@@ -221,7 +221,7 @@ fn merkle_root_from_branch(leaf: H256, branch: &[H256], depth: usize, index: usi
     for (i, leaf) in branch.iter().enumerate().take(depth) {
         let ith_bit = (index >> i) & 0x01;
         if ith_bit == 1 {
-            merkle_root = hash_concat(leaf.as_bytes(), &merkle_root);
+            merkle_root = hash32_concat(leaf.as_bytes(), &merkle_root)[..].to_vec();
         } else {
             let mut input = merkle_root;
             input.extend_from_slice(leaf.as_bytes());
@@ -296,10 +296,10 @@ mod tests {
         let leaf_b10 = H256::from([0xCC; 32]);
         let leaf_b11 = H256::from([0xDD; 32]);
 
-        let node_b0x = H256::from_slice(&hash_concat(leaf_b00.as_bytes(), leaf_b01.as_bytes()));
-        let node_b1x = H256::from_slice(&hash_concat(leaf_b10.as_bytes(), leaf_b11.as_bytes()));
+        let node_b0x = H256::from_slice(&hash32_concat(leaf_b00.as_bytes(), leaf_b01.as_bytes()));
+        let node_b1x = H256::from_slice(&hash32_concat(leaf_b10.as_bytes(), leaf_b11.as_bytes()));
 
-        let root = H256::from_slice(&hash_concat(node_b0x.as_bytes(), node_b1x.as_bytes()));
+        let root = H256::from_slice(&hash32_concat(node_b0x.as_bytes(), node_b1x.as_bytes()));
 
         let tree = MerkleTree::create(&[leaf_b00, leaf_b01, leaf_b10, leaf_b11], 2);
         assert_eq!(tree.hash(), root);
@@ -313,10 +313,10 @@ mod tests {
         let leaf_b10 = H256::from([0xCC; 32]);
         let leaf_b11 = H256::from([0xDD; 32]);
 
-        let node_b0x = H256::from_slice(&hash_concat(leaf_b00.as_bytes(), leaf_b01.as_bytes()));
-        let node_b1x = H256::from_slice(&hash_concat(leaf_b10.as_bytes(), leaf_b11.as_bytes()));
+        let node_b0x = H256::from_slice(&hash32_concat(leaf_b00.as_bytes(), leaf_b01.as_bytes()));
+        let node_b1x = H256::from_slice(&hash32_concat(leaf_b10.as_bytes(), leaf_b11.as_bytes()));
 
-        let root = H256::from_slice(&hash_concat(node_b0x.as_bytes(), node_b1x.as_bytes()));
+        let root = H256::from_slice(&hash32_concat(node_b0x.as_bytes(), node_b1x.as_bytes()));
 
         // Run some proofs
         assert!(verify_merkle_proof(
diff --git a/eth2/utils/tree_hash/src/lib.rs b/eth2/utils/tree_hash/src/lib.rs
index 0b3be72c46..a4037a4278 100644
--- a/eth2/utils/tree_hash/src/lib.rs
+++ b/eth2/utils/tree_hash/src/lib.rs
@@ -24,7 +24,7 @@ pub fn mix_in_length(root: &[u8], length: usize) -> Vec<u8> {
     let mut length_bytes = length.to_le_bytes().to_vec();
     length_bytes.resize(BYTES_PER_CHUNK, 0);
 
-    eth2_hashing::hash_concat(root, &length_bytes)
+    eth2_hashing::hash32_concat(root, &length_bytes)[..].to_vec()
 }
 
 #[derive(Debug, PartialEq, Clone)]
diff --git a/eth2/utils/tree_hash/src/merkleize_padded.rs b/eth2/utils/tree_hash/src/merkleize_padded.rs
index 832c0bbd80..095179799a 100644
--- a/eth2/utils/tree_hash/src/merkleize_padded.rs
+++ b/eth2/utils/tree_hash/src/merkleize_padded.rs
@@ -1,5 +1,5 @@
 use super::BYTES_PER_CHUNK;
-use eth2_hashing::{hash, hash_concat, ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
+use eth2_hashing::{hash, hash32_concat, ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
 
 /// The size of the cache that stores padding nodes for a given height.
 ///
@@ -138,7 +138,7 @@ pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Vec<u8> {
                 "Both children should be `BYTES_PER_CHUNK` bytes."
             );
 
-            let hash = hash_concat(left, right);
+            let hash = hash32_concat(left, right);
 
             // Store a parent node.
             chunks
diff --git a/eth2/utils/tree_hash_derive/src/lib.rs b/eth2/utils/tree_hash_derive/src/lib.rs
index b15e9ceb24..2f35ff1756 100644
--- a/eth2/utils/tree_hash_derive/src/lib.rs
+++ b/eth2/utils/tree_hash_derive/src/lib.rs
@@ -3,7 +3,6 @@ extern crate proc_macro;
 
 use proc_macro::TokenStream;
 use quote::quote;
-use std::collections::HashMap;
 use syn::{parse_macro_input, Attribute, DeriveInput, Meta};
 
 /// Return a Vec of `syn::Ident` for each named field in the struct, whilst filtering out fields
@@ -69,37 +68,6 @@ fn cached_tree_hash_attr_metas(attrs: &[Attribute]) -> Vec<Meta> {
         .collect()
 }
 
-/// Parse the top-level cached_tree_hash struct attribute.
-///
-/// Return the type from `#[cached_tree_hash(type = "T")]`.
-///
-/// **Panics** if the attribute is missing or the type is malformed.
-fn parse_cached_tree_hash_struct_attrs(attrs: &[Attribute]) -> syn::Type {
-    use syn::{Lit, MetaList, MetaNameValue, NestedMeta};
-
-    let parsed_attrs = cached_tree_hash_attr_metas(attrs);
-    if let [Meta::List(MetaList { nested, .. })] = &parsed_attrs[..] {
-        let eqns = nested
-            .iter()
-            .flat_map(|x| match x {
-                NestedMeta::Meta(Meta::NameValue(MetaNameValue {
-                    ident,
-                    lit: Lit::Str(lit_str),
-                    ..
-                })) => Some((ident.to_string(), lit_str.clone())),
-                _ => None,
-            })
-            .collect::<HashMap<_, _>>();
-
-        eqns["type"]
-            .clone()
-            .parse()
-            .expect("valid type required for cache")
-    } else {
-        panic!("missing attribute `#[cached_tree_hash(type = ...)` on struct");
-    }
-}
-
 /// Returns true if some field has an attribute declaring it should not be hashed.
 ///
 /// The field attribute is: `#[tree_hash(skip_hashing)]`
@@ -185,82 +153,6 @@ pub fn tree_hash_signed_root_derive(input: TokenStream) -> TokenStream {
     output.into()
 }
 
-/// Derive the `CachedTreeHash` trait for a type.
-///
-/// Requires two attributes:
-/// * `#[cached_tree_hash(type = "T")]` on the struct, declaring
-///   that the type `T` should be used as the tree hash cache.
-/// * `#[cached_tree_hash(f)]` on each struct field that makes use
-///   of the cache, which declares that the sub-cache for that field
-///   can be found in the field `cache.f` of the struct's cache.
-#[proc_macro_derive(CachedTreeHash, attributes(cached_tree_hash))]
-pub fn cached_tree_hash_derive(input: TokenStream) -> TokenStream {
-    let item = parse_macro_input!(input as DeriveInput);
-
-    let name = &item.ident;
-
-    let cache_type = parse_cached_tree_hash_struct_attrs(&item.attrs);
-
-    let (impl_generics, ty_generics, where_clause) = &item.generics.split_for_impl();
-
-    let struct_data = match &item.data {
-        syn::Data::Struct(s) => s,
-        _ => panic!("tree_hash_derive only supports structs."),
-    };
-
-    let fields = get_hashable_fields_and_their_caches(&struct_data);
-    let caching_field_ty = fields
-        .iter()
-        .filter(|(_, _, cache_field)| cache_field.is_some())
-        .map(|(_, ty, _)| ty);
-    let caching_field_cache_field = fields
-        .iter()
-        .flat_map(|(_, _, cache_field)| cache_field.as_ref());
-
-    let tree_hash_root_expr = fields
-        .iter()
-        .map(|(field, _, caching_field)| match caching_field {
-            None => quote! {
-                self.#field.tree_hash_root()
-            },
-            Some(caching_field) => quote! {
-                self.#field
-                    .recalculate_tree_hash_root(&mut cache.#caching_field)?
-                    .as_bytes()
-                    .to_vec()
-            },
-        });
-
-    let output = quote! {
-        impl #impl_generics cached_tree_hash::CachedTreeHash<#cache_type> for #name #ty_generics #where_clause {
-            fn new_tree_hash_cache() -> #cache_type {
-                // Call new cache for each sub type
-                #cache_type {
-                    initialized: true,
-                    #(
-                        #caching_field_cache_field: <#caching_field_ty>::new_tree_hash_cache()
-                    ),*
-                }
-            }
-
-            fn recalculate_tree_hash_root(
-                &self,
-                cache: &mut #cache_type)
-            -> std::result::Result<Hash256, cached_tree_hash::Error>
-            {
-                let mut leaves = vec![];
-
-                #(
-                    leaves.append(&mut #tree_hash_root_expr);
-                )*
-
-                Ok(Hash256::from_slice(&tree_hash::merkle_root(&leaves, 0)))
-            }
-        }
-    };
-    output.into()
-}
-
 fn get_signed_root_named_field_idents(struct_data: &syn::DataStruct) -> Vec<&syn::Ident> {
     struct_data
         .fields
diff --git a/tests/ef_tests/src/cases/ssz_static.rs b/tests/ef_tests/src/cases/ssz_static.rs
index e4c216f765..8eee374bb6 100644
--- a/tests/ef_tests/src/cases/ssz_static.rs
+++ b/tests/ef_tests/src/cases/ssz_static.rs
@@ -2,7 +2,7 @@ use super::*;
 use crate::case_result::compare_result;
 use crate::cases::common::SszStaticType;
 use crate::decode::yaml_decode_file;
-use cached_tree_hash::CachedTreeHash;
+use cached_tree_hash::{CacheArena, CachedTreeHash};
 use serde_derive::Deserialize;
 use std::fs;
 use std::marker::PhantomData;
@@ -126,8 +126,12 @@ impl<T: SszStaticType + CachedTreeHash<C>, C: Debug + Sync> Case for SszStaticTH
         check_serialization(&self.value, &self.serialized)?;
         check_tree_hash(&self.roots.root, &self.value.tree_hash_root())?;
 
-        let mut cache = T::new_tree_hash_cache();
-        let cached_tree_hash_root = self.value.recalculate_tree_hash_root(&mut cache).unwrap();
+        let arena = &mut CacheArena::default();
+        let mut cache = self.value.new_tree_hash_cache(arena);
+        let cached_tree_hash_root = self
+            .value
+            .recalculate_tree_hash_root(arena, &mut cache)
+            .unwrap();
         check_tree_hash(&self.roots.root, cached_tree_hash_root.as_bytes())?;
 
         Ok(())