mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-14 18:32:42 +00:00
Implement tree hash caching (#584)
* Implement basic tree hash caching * Use spaces to indent top-level Cargo.toml * Optimize BLS tree hash by hashing bytes directly * Implement tree hash caching for validator registry * Persist BeaconState tree hash cache to disk * Address Paul's review comments
This commit is contained in:
137
eth2/utils/cached_tree_hash/src/cache.rs
Normal file
137
eth2/utils/cached_tree_hash/src/cache.rs
Normal file
@@ -0,0 +1,137 @@
|
||||
use crate::{Error, Hash256};
|
||||
use eth2_hashing::{hash_concat, ZERO_HASHES};
|
||||
use ssz_derive::{Decode, Encode};
|
||||
use tree_hash::BYTES_PER_CHUNK;
|
||||
|
||||
/// Sparse Merkle tree suitable for tree hashing vectors and lists.
|
||||
#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
|
||||
pub struct TreeHashCache {
|
||||
/// Depth is such that the tree has a capacity for 2^depth leaves
|
||||
depth: usize,
|
||||
/// Sparse layers.
|
||||
///
|
||||
/// The leaves are contained in `self.layers[self.depth]`, and each other layer `i`
|
||||
/// contains the parents of the nodes in layer `i + 1`.
|
||||
layers: Vec<Vec<Hash256>>,
|
||||
}
|
||||
|
||||
impl TreeHashCache {
|
||||
/// Create a new cache with the given `depth`, but no actual content.
|
||||
pub fn new(depth: usize) -> Self {
|
||||
TreeHashCache {
|
||||
depth,
|
||||
layers: vec![vec![]; depth + 1],
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the updated Merkle root for the given `leaves`.
|
||||
pub fn recalculate_merkle_root(
|
||||
&mut self,
|
||||
leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
|
||||
) -> Result<Hash256, Error> {
|
||||
let dirty_indices = self.update_leaves(leaves)?;
|
||||
self.update_merkle_root(dirty_indices)
|
||||
}
|
||||
|
||||
/// Phase 1 of the algorithm: compute the indices of all dirty leaves.
|
||||
pub fn update_leaves(
|
||||
&mut self,
|
||||
mut leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
|
||||
) -> Result<Vec<usize>, Error> {
|
||||
let new_leaf_count = leaves.len();
|
||||
|
||||
if new_leaf_count < self.leaves().len() {
|
||||
return Err(Error::CannotShrink);
|
||||
} else if new_leaf_count > 2usize.pow(self.depth as u32) {
|
||||
return Err(Error::TooManyLeaves);
|
||||
}
|
||||
|
||||
// Update the existing leaves
|
||||
let mut dirty = self
|
||||
.leaves()
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.zip(&mut leaves)
|
||||
.flat_map(|((i, leaf), new_leaf)| {
|
||||
if leaf.as_bytes() != new_leaf {
|
||||
leaf.assign_from_slice(&new_leaf);
|
||||
Some(i)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Push the rest of the new leaves (if any)
|
||||
dirty.extend(self.leaves().len()..new_leaf_count);
|
||||
self.leaves()
|
||||
.extend(leaves.map(|l| Hash256::from_slice(&l)));
|
||||
|
||||
Ok(dirty)
|
||||
}
|
||||
|
||||
/// Phase 2: propagate changes upwards from the leaves of the tree, and compute the root.
|
||||
///
|
||||
/// Returns an error if `dirty_indices` is inconsistent with the cache.
|
||||
pub fn update_merkle_root(&mut self, mut dirty_indices: Vec<usize>) -> Result<Hash256, Error> {
|
||||
if dirty_indices.is_empty() {
|
||||
return Ok(self.root());
|
||||
}
|
||||
|
||||
let mut depth = self.depth;
|
||||
|
||||
while depth > 0 {
|
||||
let new_dirty_indices = lift_dirty(&dirty_indices);
|
||||
|
||||
for &idx in &new_dirty_indices {
|
||||
let left_idx = 2 * idx;
|
||||
let right_idx = left_idx + 1;
|
||||
|
||||
let left = self.layers[depth][left_idx];
|
||||
let right = self.layers[depth]
|
||||
.get(right_idx)
|
||||
.copied()
|
||||
.unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth - depth]));
|
||||
|
||||
let new_hash = hash_concat(left.as_bytes(), right.as_bytes());
|
||||
|
||||
match self.layers[depth - 1].get_mut(idx) {
|
||||
Some(hash) => {
|
||||
hash.assign_from_slice(&new_hash);
|
||||
}
|
||||
None => {
|
||||
// Parent layer should already contain nodes for all non-dirty indices
|
||||
if idx != self.layers[depth - 1].len() {
|
||||
return Err(Error::CacheInconsistent);
|
||||
}
|
||||
self.layers[depth - 1].push(Hash256::from_slice(&new_hash));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dirty_indices = new_dirty_indices;
|
||||
depth -= 1;
|
||||
}
|
||||
|
||||
Ok(self.root())
|
||||
}
|
||||
|
||||
/// Get the root of this cache, without doing any updates/computation.
|
||||
pub fn root(&self) -> Hash256 {
|
||||
self.layers[0]
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth]))
|
||||
}
|
||||
|
||||
pub fn leaves(&mut self) -> &mut Vec<Hash256> {
|
||||
&mut self.layers[self.depth]
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the dirty indices for one layer up.
|
||||
fn lift_dirty(dirty_indices: &[usize]) -> Vec<usize> {
|
||||
let mut new_dirty = dirty_indices.iter().map(|i| *i / 2).collect::<Vec<_>>();
|
||||
new_dirty.dedup();
|
||||
new_dirty
|
||||
}
|
||||
99
eth2/utils/cached_tree_hash/src/impls.rs
Normal file
99
eth2/utils/cached_tree_hash/src/impls.rs
Normal file
@@ -0,0 +1,99 @@
|
||||
use crate::{CachedTreeHash, Error, Hash256, TreeHashCache};
|
||||
use ssz_types::{typenum::Unsigned, FixedVector, VariableList};
|
||||
use std::mem::size_of;
|
||||
use tree_hash::{mix_in_length, BYTES_PER_CHUNK};
|
||||
|
||||
/// Compute ceil(log(n))
|
||||
///
|
||||
/// Smallest number of bits d so that n <= 2^d
|
||||
pub fn int_log(n: usize) -> usize {
|
||||
match n.checked_next_power_of_two() {
|
||||
Some(x) => x.trailing_zeros() as usize,
|
||||
None => 8 * std::mem::size_of::<usize>(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hash256_iter<'a>(
|
||||
values: &'a [Hash256],
|
||||
) -> impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator + 'a {
|
||||
values.iter().copied().map(Hash256::to_fixed_bytes)
|
||||
}
|
||||
|
||||
pub fn u64_iter<'a>(
|
||||
values: &'a [u64],
|
||||
) -> impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator + 'a {
|
||||
let type_size = size_of::<u64>();
|
||||
let vals_per_chunk = BYTES_PER_CHUNK / type_size;
|
||||
values.chunks(vals_per_chunk).map(move |xs| {
|
||||
xs.iter().map(|x| x.to_le_bytes()).enumerate().fold(
|
||||
[0; BYTES_PER_CHUNK],
|
||||
|mut chunk, (i, x_bytes)| {
|
||||
chunk[i * type_size..(i + 1) * type_size].copy_from_slice(&x_bytes);
|
||||
chunk
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
impl<N: Unsigned> CachedTreeHash<TreeHashCache> for FixedVector<Hash256, N> {
|
||||
fn new_tree_hash_cache() -> TreeHashCache {
|
||||
TreeHashCache::new(int_log(N::to_usize()))
|
||||
}
|
||||
|
||||
fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
|
||||
cache.recalculate_merkle_root(hash256_iter(&self))
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: Unsigned> CachedTreeHash<TreeHashCache> for FixedVector<u64, N> {
|
||||
fn new_tree_hash_cache() -> TreeHashCache {
|
||||
let vals_per_chunk = BYTES_PER_CHUNK / size_of::<u64>();
|
||||
TreeHashCache::new(int_log(N::to_usize() / vals_per_chunk))
|
||||
}
|
||||
|
||||
fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
|
||||
cache.recalculate_merkle_root(u64_iter(&self))
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<Hash256, N> {
|
||||
fn new_tree_hash_cache() -> TreeHashCache {
|
||||
TreeHashCache::new(int_log(N::to_usize()))
|
||||
}
|
||||
|
||||
fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
|
||||
Ok(Hash256::from_slice(&mix_in_length(
|
||||
cache
|
||||
.recalculate_merkle_root(hash256_iter(&self))?
|
||||
.as_bytes(),
|
||||
self.len(),
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<u64, N> {
|
||||
fn new_tree_hash_cache() -> TreeHashCache {
|
||||
let vals_per_chunk = BYTES_PER_CHUNK / size_of::<u64>();
|
||||
TreeHashCache::new(int_log(N::to_usize() / vals_per_chunk))
|
||||
}
|
||||
|
||||
fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
|
||||
Ok(Hash256::from_slice(&mix_in_length(
|
||||
cache.recalculate_merkle_root(u64_iter(&self))?.as_bytes(),
|
||||
self.len(),
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_int_log() {
|
||||
for i in 0..63 {
|
||||
assert_eq!(int_log(2usize.pow(i)), i as usize);
|
||||
}
|
||||
assert_eq!(int_log(10), 4);
|
||||
}
|
||||
}
|
||||
31
eth2/utils/cached_tree_hash/src/lib.rs
Normal file
31
eth2/utils/cached_tree_hash/src/lib.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
mod cache;
|
||||
mod impls;
|
||||
mod multi_cache;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub use crate::cache::TreeHashCache;
|
||||
pub use crate::impls::int_log;
|
||||
pub use crate::multi_cache::MultiTreeHashCache;
|
||||
use ethereum_types::H256 as Hash256;
|
||||
use tree_hash::TreeHash;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Error {
|
||||
/// Attempting to provide more than 2^depth leaves to a Merkle tree is disallowed.
|
||||
TooManyLeaves,
|
||||
/// Shrinking a Merkle tree cache by providing it with less leaves than it currently has is
|
||||
/// disallowed (for simplicity).
|
||||
CannotShrink,
|
||||
/// Cache is inconsistent with the list of dirty indices provided.
|
||||
CacheInconsistent,
|
||||
}
|
||||
|
||||
/// Trait for types which can make use of a cache to accelerate calculation of their tree hash root.
|
||||
pub trait CachedTreeHash<Cache>: TreeHash {
|
||||
/// Create a new cache appropriate for use with values of this type.
|
||||
fn new_tree_hash_cache() -> Cache;
|
||||
|
||||
/// Update the cache and use it to compute the tree hash root for `self`.
|
||||
fn recalculate_tree_hash_root(&self, cache: &mut Cache) -> Result<Hash256, Error>;
|
||||
}
|
||||
62
eth2/utils/cached_tree_hash/src/multi_cache.rs
Normal file
62
eth2/utils/cached_tree_hash/src/multi_cache.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use crate::{int_log, CachedTreeHash, Error, Hash256, TreeHashCache};
|
||||
use ssz_derive::{Decode, Encode};
|
||||
use ssz_types::{typenum::Unsigned, VariableList};
|
||||
use tree_hash::mix_in_length;
|
||||
|
||||
/// Multi-level tree hash cache.
|
||||
///
|
||||
/// Suitable for lists/vectors/containers holding values which themselves have caches.
|
||||
///
|
||||
/// Note: this cache could be made composable by replacing the hardcoded `Vec<TreeHashCache>` with
|
||||
/// `Vec<C>`, allowing arbitrary nesting, but for now we stick to 2-level nesting because that's all
|
||||
/// we need.
|
||||
#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
|
||||
pub struct MultiTreeHashCache {
|
||||
list_cache: TreeHashCache,
|
||||
value_caches: Vec<TreeHashCache>,
|
||||
}
|
||||
|
||||
impl<T, N> CachedTreeHash<MultiTreeHashCache> for VariableList<T, N>
|
||||
where
|
||||
T: CachedTreeHash<TreeHashCache>,
|
||||
N: Unsigned,
|
||||
{
|
||||
fn new_tree_hash_cache() -> MultiTreeHashCache {
|
||||
MultiTreeHashCache {
|
||||
list_cache: TreeHashCache::new(int_log(N::to_usize())),
|
||||
value_caches: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn recalculate_tree_hash_root(&self, cache: &mut MultiTreeHashCache) -> Result<Hash256, Error> {
|
||||
if self.len() < cache.value_caches.len() {
|
||||
return Err(Error::CannotShrink);
|
||||
}
|
||||
|
||||
// Resize the value caches to the size of the list.
|
||||
cache
|
||||
.value_caches
|
||||
.resize(self.len(), T::new_tree_hash_cache());
|
||||
|
||||
// Update all individual value caches.
|
||||
self.iter()
|
||||
.zip(cache.value_caches.iter_mut())
|
||||
.try_for_each(|(value, cache)| value.recalculate_tree_hash_root(cache).map(|_| ()))?;
|
||||
|
||||
// Pipe the value roots into the list cache, then mix in the length.
|
||||
// Note: it's possible to avoid this 2nd iteration (or an allocation) by using
|
||||
// `itertools::process_results`, but it requires removing the `ExactSizeIterator`
|
||||
// bound from `recalculate_merkle_root`, and only saves about 5% in benchmarks.
|
||||
let list_root = cache.list_cache.recalculate_merkle_root(
|
||||
cache
|
||||
.value_caches
|
||||
.iter()
|
||||
.map(|value_cache| value_cache.root().to_fixed_bytes()),
|
||||
)?;
|
||||
|
||||
Ok(Hash256::from_slice(&mix_in_length(
|
||||
list_root.as_bytes(),
|
||||
self.len(),
|
||||
)))
|
||||
}
|
||||
}
|
||||
147
eth2/utils/cached_tree_hash/src/test.rs
Normal file
147
eth2/utils/cached_tree_hash/src/test.rs
Normal file
@@ -0,0 +1,147 @@
|
||||
use crate::impls::hash256_iter;
|
||||
use crate::{CachedTreeHash, Error, Hash256, TreeHashCache};
|
||||
use eth2_hashing::ZERO_HASHES;
|
||||
use quickcheck_macros::quickcheck;
|
||||
use ssz_types::{
|
||||
typenum::{Unsigned, U16, U255, U256, U257},
|
||||
FixedVector, VariableList,
|
||||
};
|
||||
use tree_hash::TreeHash;
|
||||
|
||||
fn int_hashes(start: u64, end: u64) -> Vec<Hash256> {
|
||||
(start..end).map(Hash256::from_low_u64_le).collect()
|
||||
}
|
||||
|
||||
type List16 = VariableList<Hash256, U16>;
|
||||
type Vector16 = FixedVector<Hash256, U16>;
|
||||
type Vector16u64 = FixedVector<u64, U16>;
|
||||
|
||||
#[test]
|
||||
fn max_leaves() {
|
||||
let depth = 4;
|
||||
let max_len = 2u64.pow(depth as u32);
|
||||
let mut cache = TreeHashCache::new(depth);
|
||||
assert!(cache
|
||||
.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len - 1)))
|
||||
.is_ok());
|
||||
assert!(cache
|
||||
.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len)))
|
||||
.is_ok());
|
||||
assert_eq!(
|
||||
cache.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len + 1))),
|
||||
Err(Error::TooManyLeaves)
|
||||
);
|
||||
assert_eq!(
|
||||
cache.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len * 2))),
|
||||
Err(Error::TooManyLeaves)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cannot_shrink() {
|
||||
let init_len = 12;
|
||||
let list1 = List16::new(int_hashes(0, init_len)).unwrap();
|
||||
let list2 = List16::new(int_hashes(0, init_len - 1)).unwrap();
|
||||
|
||||
let mut cache = List16::new_tree_hash_cache();
|
||||
assert!(list1.recalculate_tree_hash_root(&mut cache).is_ok());
|
||||
assert_eq!(
|
||||
list2.recalculate_tree_hash_root(&mut cache),
|
||||
Err(Error::CannotShrink)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_leaves() {
|
||||
let depth = 20;
|
||||
let mut cache = TreeHashCache::new(depth);
|
||||
assert_eq!(
|
||||
cache
|
||||
.recalculate_merkle_root(vec![].into_iter())
|
||||
.unwrap()
|
||||
.as_bytes(),
|
||||
&ZERO_HASHES[depth][..]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixed_vector_hash256() {
|
||||
let len = 16;
|
||||
let vec = Vector16::new(int_hashes(0, len)).unwrap();
|
||||
|
||||
let mut cache = Vector16::new_tree_hash_cache();
|
||||
|
||||
assert_eq!(
|
||||
Hash256::from_slice(&vec.tree_hash_root()),
|
||||
vec.recalculate_tree_hash_root(&mut cache).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixed_vector_u64() {
|
||||
let len = 16;
|
||||
let vec = Vector16u64::new((0..len).collect()).unwrap();
|
||||
|
||||
let mut cache = Vector16u64::new_tree_hash_cache();
|
||||
|
||||
assert_eq!(
|
||||
Hash256::from_slice(&vec.tree_hash_root()),
|
||||
vec.recalculate_tree_hash_root(&mut cache).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn variable_list_hash256() {
|
||||
let len = 13;
|
||||
let list = List16::new(int_hashes(0, len)).unwrap();
|
||||
|
||||
let mut cache = List16::new_tree_hash_cache();
|
||||
|
||||
assert_eq!(
|
||||
Hash256::from_slice(&list.tree_hash_root()),
|
||||
list.recalculate_tree_hash_root(&mut cache).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[quickcheck]
|
||||
fn quickcheck_variable_list_h256_256(leaves_and_skips: Vec<(u64, bool)>) -> bool {
|
||||
variable_list_h256_test::<U256>(leaves_and_skips)
|
||||
}
|
||||
|
||||
#[quickcheck]
|
||||
fn quickcheck_variable_list_h256_255(leaves_and_skips: Vec<(u64, bool)>) -> bool {
|
||||
variable_list_h256_test::<U255>(leaves_and_skips)
|
||||
}
|
||||
|
||||
#[quickcheck]
|
||||
fn quickcheck_variable_list_h256_257(leaves_and_skips: Vec<(u64, bool)>) -> bool {
|
||||
variable_list_h256_test::<U257>(leaves_and_skips)
|
||||
}
|
||||
|
||||
fn variable_list_h256_test<Len: Unsigned>(leaves_and_skips: Vec<(u64, bool)>) -> bool {
|
||||
let leaves: Vec<_> = leaves_and_skips
|
||||
.iter()
|
||||
.map(|(l, _)| Hash256::from_low_u64_be(*l))
|
||||
.take(Len::to_usize())
|
||||
.collect();
|
||||
|
||||
let mut list: VariableList<Hash256, Len>;
|
||||
let mut cache = VariableList::<Hash256, Len>::new_tree_hash_cache();
|
||||
|
||||
for (end, (_, update_cache)) in leaves_and_skips.into_iter().enumerate() {
|
||||
list = VariableList::new(leaves[..end].to_vec()).unwrap();
|
||||
|
||||
if update_cache {
|
||||
if list
|
||||
.recalculate_tree_hash_root(&mut cache)
|
||||
.unwrap()
|
||||
.as_bytes()
|
||||
!= &list.tree_hash_root()[..]
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
Reference in New Issue
Block a user