From 84d72cfed6bd3e1fe01381894ad217a68734f5f9 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 29 Apr 2019 17:46:01 +1000 Subject: [PATCH] Tidy and add docs for cached tree hash --- eth2/types/src/beacon_state.rs | 4 +- eth2/types/src/test_utils/macros.rs | 4 +- eth2/types/src/tree_hash_vector.rs | 20 ++- eth2/utils/bls/src/public_key.rs | 14 +- eth2/utils/bls/src/signature.rs | 14 +- eth2/utils/boolean-bitfield/src/lib.rs | 14 +- .../examples/8k_hashes_cached.rs | 2 +- .../cached_tree_hash/src/btree_overlay.rs | 53 ++++++- eth2/utils/cached_tree_hash/src/impls/vec.rs | 6 +- eth2/utils/cached_tree_hash/src/lib.rs | 88 +++++----- eth2/utils/cached_tree_hash/src/merkleize.rs | 15 +- .../cached_tree_hash/src/tree_hash_cache.rs | 150 ++++++++++++------ eth2/utils/cached_tree_hash/tests/tests.rs | 55 +++---- eth2/utils/tree_hash_derive/src/lib.rs | 2 +- eth2/utils/tree_hash_derive/tests/tests.rs | 10 +- 15 files changed, 292 insertions(+), 159 deletions(-) diff --git a/eth2/types/src/beacon_state.rs b/eth2/types/src/beacon_state.rs index 6948997c53..e9b052f99e 100644 --- a/eth2/types/src/beacon_state.rs +++ b/eth2/types/src/beacon_state.rs @@ -806,7 +806,7 @@ impl BeaconState { /// canonical root of `self`. pub fn update_tree_hash_cache(&mut self) -> Result { if self.tree_hash_cache.is_empty() { - self.tree_hash_cache = TreeHashCache::new(self, 0)?; + self.tree_hash_cache = TreeHashCache::new(self)?; } else { // Move the cache outside of `self` to satisfy the borrow checker. let mut cache = std::mem::replace(&mut self.tree_hash_cache, TreeHashCache::default()); @@ -828,7 +828,7 @@ impl BeaconState { /// cache update. pub fn cached_tree_hash_root(&self) -> Result { self.tree_hash_cache - .root() + .tree_hash_root() .and_then(|b| Ok(Hash256::from_slice(b))) .map_err(|e| e.into()) } diff --git a/eth2/types/src/test_utils/macros.rs b/eth2/types/src/test_utils/macros.rs index d6739ca0bd..71f462c1a6 100644 --- a/eth2/types/src/test_utils/macros.rs +++ b/eth2/types/src/test_utils/macros.rs @@ -46,7 +46,7 @@ macro_rules! cached_tree_hash_tests { // Test the original hash let original = $type::random_for_test(&mut rng); - let mut cache = cached_tree_hash::TreeHashCache::new(&original, 0).unwrap(); + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); assert_eq!( cache.tree_hash_root().unwrap().to_vec(), @@ -64,7 +64,7 @@ macro_rules! cached_tree_hash_tests { ); // Produce a new cache for the modified object and compare it to the updated cache. - let mut modified_cache = cached_tree_hash::TreeHashCache::new(&modified, 0).unwrap(); + let mut modified_cache = cached_tree_hash::TreeHashCache::new(&modified).unwrap(); // Reset the caches. cache.reset_modifications(); diff --git a/eth2/types/src/tree_hash_vector.rs b/eth2/types/src/tree_hash_vector.rs index 8a7d99a6c7..42a730f250 100644 --- a/eth2/types/src/tree_hash_vector.rs +++ b/eth2/types/src/tree_hash_vector.rs @@ -63,20 +63,20 @@ where &self, depth: usize, ) -> Result { - let (cache, _overlay) = cached_tree_hash::impls::vec::new_tree_hash_cache(self, depth)?; + let (cache, _overlay) = cached_tree_hash::vec::new_tree_hash_cache(self, depth)?; Ok(cache) } fn tree_hash_cache_schema(&self, depth: usize) -> cached_tree_hash::BTreeSchema { - cached_tree_hash::impls::vec::produce_schema(self, depth) + cached_tree_hash::vec::produce_schema(self, depth) } fn update_tree_hash_cache( &self, cache: &mut cached_tree_hash::TreeHashCache, ) -> Result<(), cached_tree_hash::Error> { - cached_tree_hash::impls::vec::update_tree_hash_cache(self, cache)?; + cached_tree_hash::vec::update_tree_hash_cache(self, cache)?; Ok(()) } @@ -122,15 +122,21 @@ mod test { pub fn test_cached_tree_hash() { let original = TreeHashVector::from(vec![1_u64, 2, 3, 4]); - let mut hasher = cached_tree_hash::CachedTreeHasher::new(&original).unwrap(); + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); - assert_eq!(hasher.tree_hash_root().unwrap(), original.tree_hash_root()); + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + original.tree_hash_root() + ); let modified = TreeHashVector::from(vec![1_u64, 1, 1, 1]); - hasher.update(&modified).unwrap(); + cache.update(&modified).unwrap(); - assert_eq!(hasher.tree_hash_root().unwrap(), modified.tree_hash_root()); + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + modified.tree_hash_root() + ); } } diff --git a/eth2/utils/bls/src/public_key.rs b/eth2/utils/bls/src/public_key.rs index e6e4d3508f..41b87d383b 100644 --- a/eth2/utils/bls/src/public_key.rs +++ b/eth2/utils/bls/src/public_key.rs @@ -149,15 +149,21 @@ mod tests { let sk = SecretKey::random(); let original = PublicKey::from_secret_key(&sk); - let mut hasher = cached_tree_hash::CachedTreeHasher::new(&original).unwrap(); + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); - assert_eq!(hasher.tree_hash_root().unwrap(), original.tree_hash_root()); + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + original.tree_hash_root() + ); let sk = SecretKey::random(); let modified = PublicKey::from_secret_key(&sk); - hasher.update(&modified).unwrap(); + cache.update(&modified).unwrap(); - assert_eq!(hasher.tree_hash_root().unwrap(), modified.tree_hash_root()); + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + modified.tree_hash_root() + ); } } diff --git a/eth2/utils/bls/src/signature.rs b/eth2/utils/bls/src/signature.rs index 75f43aaf46..e2dbd9c27d 100644 --- a/eth2/utils/bls/src/signature.rs +++ b/eth2/utils/bls/src/signature.rs @@ -166,15 +166,21 @@ mod tests { let keypair = Keypair::random(); let original = Signature::new(&[42, 42], 0, &keypair.sk); - let mut hasher = cached_tree_hash::CachedTreeHasher::new(&original).unwrap(); + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); - assert_eq!(hasher.tree_hash_root().unwrap(), original.tree_hash_root()); + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + original.tree_hash_root() + ); let modified = Signature::new(&[99, 99], 0, &keypair.sk); - hasher.update(&modified).unwrap(); + cache.update(&modified).unwrap(); - assert_eq!(hasher.tree_hash_root().unwrap(), modified.tree_hash_root()); + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + modified.tree_hash_root() + ); } #[test] diff --git a/eth2/utils/boolean-bitfield/src/lib.rs b/eth2/utils/boolean-bitfield/src/lib.rs index c5fa590d6d..d49da0d10c 100644 --- a/eth2/utils/boolean-bitfield/src/lib.rs +++ b/eth2/utils/boolean-bitfield/src/lib.rs @@ -284,15 +284,21 @@ mod tests { pub fn test_cached_tree_hash() { let original = BooleanBitfield::from_bytes(&vec![18; 12][..]); - let mut hasher = cached_tree_hash::CachedTreeHasher::new(&original).unwrap(); + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); - assert_eq!(hasher.tree_hash_root().unwrap(), original.tree_hash_root()); + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + original.tree_hash_root() + ); let modified = BooleanBitfield::from_bytes(&vec![2; 1][..]); - hasher.update(&modified).unwrap(); + cache.update(&modified).unwrap(); - assert_eq!(hasher.tree_hash_root().unwrap(), modified.tree_hash_root()); + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + modified.tree_hash_root() + ); } #[test] diff --git a/eth2/utils/cached_tree_hash/examples/8k_hashes_cached.rs b/eth2/utils/cached_tree_hash/examples/8k_hashes_cached.rs index cb8dc42fb0..1e67571d57 100644 --- a/eth2/utils/cached_tree_hash/examples/8k_hashes_cached.rs +++ b/eth2/utils/cached_tree_hash/examples/8k_hashes_cached.rs @@ -2,7 +2,7 @@ use cached_tree_hash::TreeHashCache; use ethereum_types::H256 as Hash256; fn run(vec: &Vec, modified_vec: &Vec) { - let mut cache = TreeHashCache::new(vec, 0).unwrap(); + let mut cache = TreeHashCache::new(vec).unwrap(); cache.update(modified_vec).unwrap(); } diff --git a/eth2/utils/cached_tree_hash/src/btree_overlay.rs b/eth2/utils/cached_tree_hash/src/btree_overlay.rs index 4b3c6cc277..a96df769cf 100644 --- a/eth2/utils/cached_tree_hash/src/btree_overlay.rs +++ b/eth2/utils/cached_tree_hash/src/btree_overlay.rs @@ -1,7 +1,19 @@ use super::*; +/// A schema defining a binary tree over a `TreeHashCache`. +/// +/// This structure is used for succinct storage, run-time functionality is gained by converting the +/// schema into a `BTreeOverlay`. #[derive(Debug, PartialEq, Clone)] pub struct BTreeSchema { + /// The depth of a schema defines how far it is nested within other fixed-length items. + /// + /// Each time a new variable-length object is created all items within it are assigned a depth + /// of `depth + 1`. + /// + /// When storing the schemas in a list, the depth parameter allows for removing all schemas + /// belonging to a specific variable-length item without removing schemas related to adjacent + /// variable-length items. pub depth: usize, lengths: Vec, } @@ -25,21 +37,35 @@ impl Into for BTreeOverlay { } } +/// Provides a status for some leaf-node in binary tree. #[derive(Debug, PartialEq, Clone)] pub enum LeafNode { + /// The leaf node does not exist in this tree. DoesNotExist, + /// The leaf node exists in the tree and has a real value within the given `chunk` range. Exists(Range), + /// The leaf node exists in the tree only as padding. Padding, } +/// Instantiated from a `BTreeSchema`, allows for interpreting some chunks of a `TreeHashCache` as +/// a perfect binary tree. +/// +/// The primary purpose of this struct is to map from binary tree "nodes" to `TreeHashCache` +/// "chunks". Each tree has nodes `0..n` where `n` is the number of nodes and `0` is the root node. +/// Each of these nodes is mapped to a chunk, starting from `self.offset` and increasing in steps +/// of `1` for internal nodes and arbitrary steps for leaf-nodes. #[derive(Debug, PartialEq, Clone)] pub struct BTreeOverlay { offset: usize, + /// See `BTreeSchema.depth` for a description. pub depth: usize, lengths: Vec, } impl BTreeOverlay { + /// Instantiates a new instance for `item`, where it's first chunk is `inital_offset` and has + /// the specified `depth`. pub fn new(item: &T, initial_offset: usize, depth: usize) -> Self where T: CachedTreeHash, @@ -47,6 +73,7 @@ impl BTreeOverlay { Self::from_schema(item.tree_hash_cache_schema(depth), initial_offset) } + /// Instantiates a new instance from a schema, where it's first chunk is `offset`. pub fn from_schema(schema: BTreeSchema, offset: usize) -> Self { Self { offset, @@ -55,6 +82,10 @@ impl BTreeOverlay { } } + /// Returns a `LeafNode` for each of the `n` leaves of the tree. + /// + /// `LeafNode::DoesNotExist` is returned for each element `i` in `0..n` where `i >= + /// self.num_leaf_nodes()`. pub fn get_leaf_nodes(&self, n: usize) -> Vec { let mut running_offset = self.offset + self.num_internal_nodes(); @@ -74,10 +105,12 @@ impl BTreeOverlay { leaf_nodes } + /// Returns the number of leaf nodes in the tree. pub fn num_leaf_nodes(&self) -> usize { self.lengths.len().next_power_of_two() } + /// Returns the number of leafs in the tree which are padding. pub fn num_padding_leaves(&self) -> usize { self.num_leaf_nodes() - self.lengths.len() } @@ -90,31 +123,39 @@ impl BTreeOverlay { 2 * self.num_leaf_nodes() - 1 } + /// Returns the number of internal (non-leaf) nodes in the tree. pub fn num_internal_nodes(&self) -> usize { self.num_leaf_nodes() - 1 } + /// Returns the chunk of the first node of the tree. fn first_node(&self) -> usize { self.offset } + /// Returns the root chunk of the tree (the zero-th node) pub fn root(&self) -> usize { self.first_node() } + /// Returns the first chunk outside of the boundary of this tree. It is the root node chunk + /// plus the total number of chunks in the tree. pub fn next_node(&self) -> usize { self.first_node() + self.num_internal_nodes() + self.num_leaf_nodes() - self.lengths.len() + self.lengths.iter().sum::() } + /// Returns the height of the tree where a tree with a single node has a height of 1. pub fn height(&self) -> usize { self.num_leaf_nodes().trailing_zeros() as usize } + /// Returns the range of chunks that belong to the internal nodes of the tree. pub fn internal_chunk_range(&self) -> Range { self.offset..self.offset + self.num_internal_nodes() } + /// Returns all of the chunks that are encompassed by the tree. pub fn chunk_range(&self) -> Range { self.first_node()..self.next_node() } @@ -127,10 +168,14 @@ impl BTreeOverlay { self.next_node() - self.first_node() } + /// Returns the first chunk of the first leaf node in the tree. pub fn first_leaf_node(&self) -> usize { self.offset + self.num_internal_nodes() } + /// Returns the chunks for some given parent node. + /// + /// Note: it is a parent _node_ not a parent _chunk_. pub fn child_chunks(&self, parent: usize) -> (usize, usize) { let children = children(parent); @@ -142,7 +187,7 @@ impl BTreeOverlay { } } - /// (parent, (left_child, right_child)) + /// Returns a vec of (parent_chunk, (left_child_chunk, right_child_chunk)). pub fn internal_parents_and_children(&self) -> Vec<(usize, (usize, usize))> { let mut chunks = Vec::with_capacity(self.num_nodes()); chunks.append(&mut self.internal_node_chunks()); @@ -156,17 +201,17 @@ impl BTreeOverlay { .collect() } - // Returns a `Vec` of chunk indices for each internal node of the tree. + /// Returns a vec of chunk indices for each internal node of the tree. pub fn internal_node_chunks(&self) -> Vec { (self.offset..self.offset + self.num_internal_nodes()).collect() } - // Returns a `Vec` of the first chunk index for each leaf node of the tree. + /// Returns a vec of the first chunk for each leaf node of the tree. pub fn leaf_node_chunks(&self) -> Vec { self.n_leaf_node_chunks(self.num_leaf_nodes()) } - // Returns a `Vec` of the first chunk index for the first `n` leaf nodes of the tree. + /// Returns a vec of the first chunk index for the first `n` leaf nodes of the tree. fn n_leaf_node_chunks(&self, n: usize) -> Vec { let mut chunks = Vec::with_capacity(n); diff --git a/eth2/utils/cached_tree_hash/src/impls/vec.rs b/eth2/utils/cached_tree_hash/src/impls/vec.rs index a4ecee3f3d..bdb7eb134f 100644 --- a/eth2/utils/cached_tree_hash/src/impls/vec.rs +++ b/eth2/utils/cached_tree_hash/src/impls/vec.rs @@ -66,10 +66,10 @@ pub fn new_tree_hash_cache( TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => { let subtrees = vec .iter() - .map(|item| TreeHashCache::new(item, depth + 1)) + .map(|item| TreeHashCache::new_at_depth(item, depth + 1)) .collect::, _>>()?; - TreeHashCache::from_leaves_and_subtrees(&vec, subtrees, depth) + TreeHashCache::from_subtrees(&vec, subtrees, depth) } }?; @@ -301,7 +301,7 @@ fn splice_in_new_tree( where T: CachedTreeHash, { - let (bytes, mut bools, schemas) = TreeHashCache::new(item, depth)?.into_components(); + let (bytes, mut bools, schemas) = TreeHashCache::new_at_depth(item, depth)?.into_components(); // Record the number of schemas, this will be used later in the fn. let num_schemas = schemas.len(); diff --git a/eth2/utils/cached_tree_hash/src/lib.rs b/eth2/utils/cached_tree_hash/src/lib.rs index b9bb8457b1..21fa786e4c 100644 --- a/eth2/utils/cached_tree_hash/src/lib.rs +++ b/eth2/utils/cached_tree_hash/src/lib.rs @@ -1,16 +1,52 @@ +//! Performs cached merkle-hashing adhering to the Ethereum 2.0 specification defined +//! [here](https://github.com/ethereum/eth2.0-specs/blob/v0.5.1/specs/simple-serialize.md#merkleization). +//! +//! Caching allows for reduced hashing when some object has only been partially modified. This +//! allows for significant CPU-time savings (at the cost of additional storage). For example, +//! determining the root of a list of 1024 items with a single modification has been observed to +//! run in 1/25th of the time of a full merkle hash. +//! +//! +//! # Example: +//! +//! ``` +//! use cached_tree_hash::TreeHashCache; +//! use tree_hash_derive::{TreeHash, CachedTreeHash}; +//! +//! #[derive(TreeHash, CachedTreeHash)] +//! struct Foo { +//! bar: u64, +//! baz: Vec +//! } +//! +//! let mut foo = Foo { +//! bar: 1, +//! baz: vec![0, 1, 2] +//! }; +//! +//! let mut cache = TreeHashCache::new(&foo).unwrap(); +//! +//! foo.baz[1] = 0; +//! +//! cache.update(&foo).unwrap(); +//! +//! println!("Root is: {:?}", cache.tree_hash_root().unwrap()); +//! ``` + use hashing::hash; use std::ops::Range; use tree_hash::{TreeHash, TreeHashType, BYTES_PER_CHUNK, HASHSIZE}; mod btree_overlay; mod errors; -pub mod impls; +mod impls; pub mod merkleize; mod resize; mod tree_hash_cache; pub use btree_overlay::{BTreeOverlay, BTreeSchema}; pub use errors::Error; +pub use impls::vec; pub use tree_hash_cache::TreeHashCache; pub trait CachedTreeHash: TreeHash { @@ -25,34 +61,8 @@ pub trait CachedTreeHash: TreeHash { fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error>; } -#[derive(Debug, PartialEq)] -pub struct CachedTreeHasher { - pub cache: TreeHashCache, -} - -impl CachedTreeHasher { - pub fn new(item: &T) -> Result - where - T: CachedTreeHash, - { - Ok(Self { - cache: TreeHashCache::new(item, 0)?, - }) - } - - pub fn update(&mut self, item: &T) -> Result<(), Error> - where - T: CachedTreeHash, - { - self.cache.update(item) - } - - pub fn tree_hash_root(&self) -> Result, Error> { - // Return the root of the cache -- the merkle root. - Ok(self.cache.root()?.to_vec()) - } -} - +/// Implements `CachedTreeHash` on `$type` as a fixed-length tree-hash vector of the ssz encoding +/// of `$type`. #[macro_export] macro_rules! cached_tree_hash_ssz_encoding_as_vector { ($type: ident, $num_bytes: expr) => { @@ -61,10 +71,8 @@ macro_rules! cached_tree_hash_ssz_encoding_as_vector { &self, depth: usize, ) -> Result { - let (cache, _schema) = cached_tree_hash::impls::vec::new_tree_hash_cache( - &ssz::ssz_encode(self), - depth, - )?; + let (cache, _schema) = + cached_tree_hash::vec::new_tree_hash_cache(&ssz::ssz_encode(self), depth)?; Ok(cache) } @@ -79,10 +87,7 @@ macro_rules! cached_tree_hash_ssz_encoding_as_vector { &self, cache: &mut cached_tree_hash::TreeHashCache, ) -> Result<(), cached_tree_hash::Error> { - cached_tree_hash::impls::vec::update_tree_hash_cache( - &ssz::ssz_encode(self), - cache, - )?; + cached_tree_hash::vec::update_tree_hash_cache(&ssz::ssz_encode(self), cache)?; Ok(()) } @@ -90,6 +95,8 @@ macro_rules! cached_tree_hash_ssz_encoding_as_vector { }; } +/// Implements `CachedTreeHash` on `$type` as a variable-length tree-hash list of the result of +/// calling `.as_bytes()` on `$type`. #[macro_export] macro_rules! cached_tree_hash_bytes_as_list { ($type: ident) => { @@ -101,7 +108,7 @@ macro_rules! cached_tree_hash_bytes_as_list { let bytes = self.to_bytes(); let (mut cache, schema) = - cached_tree_hash::impls::vec::new_tree_hash_cache(&bytes, depth)?; + cached_tree_hash::vec::new_tree_hash_cache(&bytes, depth)?; cache.add_length_nodes(schema.into_overlay(0).chunk_range(), bytes.len())?; @@ -115,7 +122,7 @@ macro_rules! cached_tree_hash_bytes_as_list { fn tree_hash_cache_schema(&self, depth: usize) -> cached_tree_hash::BTreeSchema { let bytes = self.to_bytes(); - cached_tree_hash::impls::vec::produce_schema(&bytes, depth) + cached_tree_hash::vec::produce_schema(&bytes, depth) } fn update_tree_hash_cache( @@ -128,8 +135,7 @@ macro_rules! cached_tree_hash_bytes_as_list { cache.chunk_index += 1; // Update the cache, returning the new overlay. - let new_overlay = - cached_tree_hash::impls::vec::update_tree_hash_cache(&bytes, cache)?; + let new_overlay = cached_tree_hash::vec::update_tree_hash_cache(&bytes, cache)?; // Mix in length cache.mix_in_length(new_overlay.chunk_range(), bytes.len())?; diff --git a/eth2/utils/cached_tree_hash/src/merkleize.rs b/eth2/utils/cached_tree_hash/src/merkleize.rs index e744961b0d..9d8c832002 100644 --- a/eth2/utils/cached_tree_hash/src/merkleize.rs +++ b/eth2/utils/cached_tree_hash/src/merkleize.rs @@ -35,6 +35,7 @@ pub fn merkleize(values: Vec) -> Vec { o } +/// Ensures that the given `bytes` are a power-of-two chunks, padding with zero if not. pub fn sanitise_bytes(mut bytes: Vec) -> Vec { let present_leaves = num_unsanitized_leaves(bytes.len()); let required_leaves = present_leaves.next_power_of_two(); @@ -46,6 +47,7 @@ pub fn sanitise_bytes(mut bytes: Vec) -> Vec { bytes } +/// Pads out `bytes` to ensure it is a clean `num_leaves` chunks. pub fn pad_for_leaf_count(num_leaves: usize, bytes: &mut Vec) { let required_leaves = num_leaves.next_power_of_two(); @@ -59,9 +61,10 @@ fn last_leaf_needs_padding(num_bytes: usize) -> bool { num_bytes % HASHSIZE != 0 } -/// Rounds up -pub fn num_unsanitized_leaves(num_bytes: usize) -> usize { - (num_bytes + HASHSIZE - 1) / HASHSIZE +/// Returns the number of leaves for a given `bytes_len` number of bytes, rounding up if +/// `num_bytes` is not a client multiple of chunk size. +pub fn num_unsanitized_leaves(bytes_len: usize) -> usize { + (bytes_len + HASHSIZE - 1) / HASHSIZE } fn num_bytes(num_leaves: usize) -> usize { @@ -72,7 +75,9 @@ fn num_nodes(num_leaves: usize) -> usize { 2 * num_leaves - 1 } -pub fn num_sanitized_leaves(num_bytes: usize) -> usize { - let leaves = (num_bytes + HASHSIZE - 1) / HASHSIZE; +/// Returns the power-of-two number of leaves that would result from the given `bytes_len` number +/// of bytes. +pub fn num_sanitized_leaves(bytes_len: usize) -> usize { + let leaves = (bytes_len + HASHSIZE - 1) / HASHSIZE; leaves.next_power_of_two() } diff --git a/eth2/utils/cached_tree_hash/src/tree_hash_cache.rs b/eth2/utils/cached_tree_hash/src/tree_hash_cache.rs index acb96ba241..8f7b9de865 100644 --- a/eth2/utils/cached_tree_hash/src/tree_hash_cache.rs +++ b/eth2/utils/cached_tree_hash/src/tree_hash_cache.rs @@ -4,20 +4,35 @@ use super::*; use crate::merkleize::{merkleize, pad_for_leaf_count}; use int_to_bytes::int_to_bytes32; +/// Provides cached tree hashing for some object implementing `CachedTreeHash`. +/// +/// Caching allows for doing minimal internal-node hashing when an object has only been partially +/// changed. +/// +/// See the crate root for an example. #[derive(Debug, PartialEq, Clone)] pub struct TreeHashCache { - pub cache: Vec, + /// Stores the binary-tree in 32-byte chunks. + pub bytes: Vec, + /// Maps to each chunk of `self.bytes`, indicating if the chunk is dirty. pub chunk_modified: Vec, + /// Contains a schema for each variable-length item stored in the cache. pub schemas: Vec, + /// A counter used during updates. pub chunk_index: usize, + /// A counter used during updates. pub schema_index: usize, } impl Default for TreeHashCache { + /// Create an empty cache. + /// + /// Note: an empty cache is effectively useless, an error will be raised if `self.update` is + /// called. fn default() -> TreeHashCache { TreeHashCache { - cache: vec![], + bytes: vec![], chunk_modified: vec![], schemas: vec![], chunk_index: 0, @@ -26,20 +41,34 @@ impl Default for TreeHashCache { } } -impl Into> for TreeHashCache { - fn into(self) -> Vec { - self.cache - } -} - impl TreeHashCache { - pub fn new(item: &T, depth: usize) -> Result + /// Instantiates a new cache from `item` at a depth of `0`. + /// + /// The returned cache is fully-built and will return an accurate tree-hash root. + pub fn new(item: &T) -> Result + where + T: CachedTreeHash, + { + Self::new_at_depth(item, 0) + } + + /// Instantiates a new cache from `item` at the specified `depth`. + /// + /// The returned cache is fully-built and will return an accurate tree-hash root. + pub fn new_at_depth(item: &T, depth: usize) -> Result where T: CachedTreeHash, { item.new_tree_hash_cache(depth) } + /// Updates the cache with `item`. + /// + /// `item` _must_ be of the same type as the `item` used to build the cache, otherwise an error + /// may be returned. + /// + /// After calling `update`, the cache will return an accurate tree-hash root using + /// `self.tree_hash_root()`. pub fn update(&mut self, item: &T) -> Result<(), Error> where T: CachedTreeHash, @@ -53,11 +82,10 @@ impl TreeHashCache { } } - pub fn from_leaves_and_subtrees( - item: &T, - leaves_and_subtrees: Vec, - depth: usize, - ) -> Result + /// Builds a new cache for `item`, given `subtrees` contains a `Self` for field/item of `item`. + /// + /// Each `subtree` in `subtree` will become a leaf-node of the merkle-tree of `item`. + pub fn from_subtrees(item: &T, subtrees: Vec, depth: usize) -> Result where T: CachedTreeHash, { @@ -65,20 +93,18 @@ impl TreeHashCache { // Note how many leaves were provided. If is not a power-of-two, we'll need to pad it out // later. - let num_provided_leaf_nodes = leaves_and_subtrees.len(); + let num_provided_leaf_nodes = subtrees.len(); // Allocate enough bytes to store the internal nodes and the leaves and subtrees, then fill // all the to-be-built internal nodes with zeros and append the leaves and subtrees. let internal_node_bytes = overlay.num_internal_nodes() * BYTES_PER_CHUNK; - let leaves_and_subtrees_bytes = leaves_and_subtrees - .iter() - .fold(0, |acc, t| acc + t.bytes_len()); - let mut cache = Vec::with_capacity(leaves_and_subtrees_bytes + internal_node_bytes); - cache.resize(internal_node_bytes, 0); + let subtrees_bytes = subtrees.iter().fold(0, |acc, t| acc + t.bytes.len()); + let mut bytes = Vec::with_capacity(subtrees_bytes + internal_node_bytes); + bytes.resize(internal_node_bytes, 0); // Allocate enough bytes to store all the leaves. let mut leaves = Vec::with_capacity(overlay.num_leaf_nodes() * HASHSIZE); - let mut schemas = Vec::with_capacity(leaves_and_subtrees.len()); + let mut schemas = Vec::with_capacity(subtrees.len()); if T::tree_hash_type() == TreeHashType::List { schemas.push(overlay.into()); @@ -86,32 +112,36 @@ impl TreeHashCache { // Iterate through all of the leaves/subtrees, adding their root as a leaf node and then // concatenating their merkle trees. - for t in leaves_and_subtrees { - leaves.append(&mut t.root()?.to_vec()); + for t in subtrees { + leaves.append(&mut t.tree_hash_root()?.to_vec()); - let (mut bytes, _bools, mut t_schemas) = t.into_components(); - cache.append(&mut bytes); + let (mut t_bytes, _bools, mut t_schemas) = t.into_components(); + bytes.append(&mut t_bytes); schemas.append(&mut t_schemas); } // Pad the leaves to an even power-of-two, using zeros. - pad_for_leaf_count(num_provided_leaf_nodes, &mut cache); + pad_for_leaf_count(num_provided_leaf_nodes, &mut bytes); // Merkleize the leaves, then split the leaf nodes off them. Then, replace all-zeros // internal nodes created earlier with the internal nodes generated by `merkleize`. let mut merkleized = merkleize(leaves); merkleized.split_off(internal_node_bytes); - cache.splice(0..internal_node_bytes, merkleized); + bytes.splice(0..internal_node_bytes, merkleized); Ok(Self { - chunk_modified: vec![true; cache.len() / BYTES_PER_CHUNK], - cache, + chunk_modified: vec![true; bytes.len() / BYTES_PER_CHUNK], + bytes, schemas, chunk_index: 0, schema_index: 0, }) } + /// Instantiate a new cache from the pre-built `bytes` where each `self.chunk_modified` will be + /// set to `intitial_modified_state`. + /// + /// Note: `bytes.len()` must be a multiple of 32 pub fn from_bytes( bytes: Vec, initial_modified_state: bool, @@ -128,17 +158,22 @@ impl TreeHashCache { Ok(Self { chunk_modified: vec![initial_modified_state; bytes.len() / BYTES_PER_CHUNK], - cache: bytes, + bytes, schemas, chunk_index: 0, schema_index: 0, }) } + /// Returns `true` if this cache is empty (i.e., it has never been built for some item). + /// + /// Note: an empty cache is effectively useless, an error will be raised if `self.update` is + /// called. pub fn is_empty(&self) -> bool { self.chunk_modified.is_empty() } + /// Return an overlay, built from the schema at `schema_index` with an offset of `chunk_index`. pub fn get_overlay( &self, schema_index: usize, @@ -152,6 +187,9 @@ impl TreeHashCache { .into_overlay(chunk_index)) } + /// Resets the per-update counters, allowing a new update to start. + /// + /// Note: this does _not_ delete the contents of the cache. pub fn reset_modifications(&mut self) { // Reset the per-hash counters. self.chunk_index = 0; @@ -162,9 +200,14 @@ impl TreeHashCache { } } + /// Replace the schema at `schema_index` with the schema derived from `new_overlay`. + /// + /// If the `new_overlay` schema has a different number of internal nodes to the schema at + /// `schema_index`, the cache will be updated to add/remove these new internal nodes. pub fn replace_overlay( &mut self, schema_index: usize, + // TODO: remove chunk index (if possible) chunk_index: usize, new_overlay: BTreeOverlay, ) -> Result { @@ -225,6 +268,9 @@ impl TreeHashCache { Ok(old_schema.into_overlay(chunk_index)) } + /// Remove all of the child schemas following `schema_index`. + /// + /// Schema `a` is a child of schema `b` if `a.depth < b.depth`. pub fn remove_proceeding_child_schemas(&mut self, schema_index: usize, depth: usize) { let end = self .schemas @@ -237,6 +283,8 @@ impl TreeHashCache { self.schemas.splice(schema_index..end, vec![]); } + /// Iterate through the internal nodes chunks of `overlay`, updating the chunk with the + /// merkle-root of it's children if either of those children are dirty. pub fn update_internal_nodes(&mut self, overlay: &BTreeOverlay) -> Result<(), Error> { for (parent, children) in overlay.internal_parents_and_children().into_iter().rev() { if self.either_modified(children)? { @@ -247,37 +295,34 @@ impl TreeHashCache { Ok(()) } - fn bytes_len(&self) -> usize { - self.cache.len() - } - + /// Returns to the tree-hash root of the cache. pub fn tree_hash_root(&self) -> Result<&[u8], Error> { - self.root() - } - - pub fn root(&self) -> Result<&[u8], Error> { if self.is_empty() { Err(Error::CacheNotInitialized) } else { - self.cache + self.bytes .get(0..HASHSIZE) .ok_or_else(|| Error::NoBytesForRoot) } } + /// Splices the given `bytes` over `self.bytes` and `bools` over `self.chunk_modified` at the + /// specified `chunk_range`. pub fn splice(&mut self, chunk_range: Range, bytes: Vec, bools: Vec) { // Update the `chunk_modified` vec, marking all spliced-in nodes as changed. self.chunk_modified.splice(chunk_range.clone(), bools); - self.cache + self.bytes .splice(node_range_to_byte_range(&chunk_range), bytes); } + /// If the bytes at `chunk` are not the same as `to`, `self.bytes` is updated and + /// `self.chunk_modified` is set to `true`. pub fn maybe_update_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { let start = chunk * BYTES_PER_CHUNK; let end = start + BYTES_PER_CHUNK; if !self.chunk_equals(chunk, to)? { - self.cache + self.bytes .get_mut(start..end) .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))? .copy_from_slice(to); @@ -287,18 +332,20 @@ impl TreeHashCache { Ok(()) } + /// Returns the slices of `self.bytes` and `self.chunk_modified` at the given `chunk_range`. fn slices(&self, chunk_range: Range) -> Option<(&[u8], &[bool])> { Some(( - self.cache.get(node_range_to_byte_range(&chunk_range))?, + self.bytes.get(node_range_to_byte_range(&chunk_range))?, self.chunk_modified.get(chunk_range)?, )) } + /// Updates `self.bytes` at `chunk` and sets `self.chunk_modified` for the `chunk` to `true`. pub fn modify_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { let start = chunk * BYTES_PER_CHUNK; let end = start + BYTES_PER_CHUNK; - self.cache + self.bytes .get_mut(start..end) .ok_or_else(|| Error::NoBytesForChunk(chunk))? .copy_from_slice(to); @@ -308,20 +355,23 @@ impl TreeHashCache { Ok(()) } + /// Returns the bytes at `chunk`. fn get_chunk(&self, chunk: usize) -> Result<&[u8], Error> { let start = chunk * BYTES_PER_CHUNK; let end = start + BYTES_PER_CHUNK; Ok(self - .cache + .bytes .get(start..end) .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?) } + /// Returns `true` if the bytes at `chunk` are equal to `other`. fn chunk_equals(&mut self, chunk: usize, other: &[u8]) -> Result { Ok(self.get_chunk(chunk)? == other) } + /// Returns `true` if `chunk` is dirty. pub fn changed(&self, chunk: usize) -> Result { self.chunk_modified .get(chunk) @@ -329,10 +379,12 @@ impl TreeHashCache { .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk)) } + /// Returns `true` if either of the `children` chunks is dirty. fn either_modified(&self, children: (usize, usize)) -> Result { Ok(self.changed(children.0)? | self.changed(children.1)?) } + /// Returns the hash of the concatenation of the given `children`. pub fn hash_children(&self, children: (usize, usize)) -> Result, Error> { let mut child_bytes = Vec::with_capacity(BYTES_PER_CHUNK * 2); child_bytes.append(&mut self.get_chunk(children.0)?.to_vec()); @@ -341,6 +393,7 @@ impl TreeHashCache { Ok(hash(&child_bytes)) } + /// Adds a chunk before and after the given `chunk` range and calls `self.mix_in_length()`. pub fn add_length_nodes( &mut self, chunk_range: Range, @@ -351,13 +404,13 @@ impl TreeHashCache { let byte_range = node_range_to_byte_range(&chunk_range); // Add the last node. - self.cache + self.bytes .splice(byte_range.end..byte_range.end, vec![0; HASHSIZE]); self.chunk_modified .splice(chunk_range.end..chunk_range.end, vec![false]); // Add the first node. - self.cache + self.bytes .splice(byte_range.start..byte_range.start, vec![0; HASHSIZE]); self.chunk_modified .splice(chunk_range.start..chunk_range.start, vec![false]); @@ -367,6 +420,8 @@ impl TreeHashCache { Ok(()) } + /// Sets `chunk_range.end + 1` equal to the little-endian serialization of `length`. Sets + /// `chunk_range.start - 1` equal to `self.hash_children(chunk_range.start, chunk_range.end + 1)`. pub fn mix_in_length(&mut self, chunk_range: Range, length: usize) -> Result<(), Error> { // Update the length chunk. self.maybe_update_chunk(chunk_range.end, &int_to_bytes32(length as u64))?; @@ -380,8 +435,9 @@ impl TreeHashCache { Ok(()) } + /// Returns `(self.bytes, self.chunk_modified, self.schemas)`. pub fn into_components(self) -> (Vec, Vec, Vec) { - (self.cache, self.chunk_modified, self.schemas) + (self.bytes, self.chunk_modified, self.schemas) } } diff --git a/eth2/utils/cached_tree_hash/tests/tests.rs b/eth2/utils/cached_tree_hash/tests/tests.rs index 4b7a4e830c..3e2598e2b0 100644 --- a/eth2/utils/cached_tree_hash/tests/tests.rs +++ b/eth2/utils/cached_tree_hash/tests/tests.rs @@ -9,7 +9,7 @@ fn modifications() { let vec: Vec = (0..n).map(|_| Hash256::random()).collect(); - let mut cache = TreeHashCache::new(&vec, 0).unwrap(); + let mut cache = TreeHashCache::new(&vec).unwrap(); cache.update(&vec).unwrap(); let modifications = cache.chunk_modified.iter().filter(|b| **b).count(); @@ -36,60 +36,57 @@ fn test_routine(original: T, modified: Vec) where T: CachedTreeHash + std::fmt::Debug, { - let mut hasher = CachedTreeHasher::new(&original).unwrap(); + let mut cache = TreeHashCache::new(&original).unwrap(); let standard_root = original.tree_hash_root(); - let cached_root = hasher.tree_hash_root().unwrap(); + let cached_root = cache.tree_hash_root().unwrap(); assert_eq!(standard_root, cached_root, "Initial cache build failed."); for (i, modified) in modified.iter().enumerate() { println!("-- Start of modification {} --", i); // Update the existing hasher. - hasher + cache .update(modified) .expect(&format!("Modification {}", i)); // Create a new hasher from the "modified" struct. - let modified_hasher = CachedTreeHasher::new(modified).unwrap(); + let modified_cache = TreeHashCache::new(modified).unwrap(); assert_eq!( - hasher.cache.chunk_modified.len(), - modified_hasher.cache.chunk_modified.len(), + cache.chunk_modified.len(), + modified_cache.chunk_modified.len(), "Number of chunks is different" ); assert_eq!( - hasher.cache.cache.len(), - modified_hasher.cache.cache.len(), + cache.bytes.len(), + modified_cache.bytes.len(), "Number of bytes is different" ); - assert_eq!( - hasher.cache.cache, modified_hasher.cache.cache, - "Bytes are different" - ); + assert_eq!(cache.bytes, modified_cache.bytes, "Bytes are different"); assert_eq!( - hasher.cache.schemas.len(), - modified_hasher.cache.schemas.len(), + cache.schemas.len(), + modified_cache.schemas.len(), "Number of schemas is different" ); assert_eq!( - hasher.cache.schemas, modified_hasher.cache.schemas, + cache.schemas, modified_cache.schemas, "Schemas are different" ); // Test the root generated by the updated hasher matches a non-cached tree hash root. let standard_root = modified.tree_hash_root(); - let cached_root = hasher + let cached_root = cache .tree_hash_root() .expect(&format!("Modification {}", i)); assert_eq!( standard_root, cached_root, "Modification {} failed. \n Cache: {:?}", - i, hasher + i, cache ); } } @@ -194,20 +191,20 @@ fn test_shrinking_vec_of_vec() { let original: Vec> = vec![vec![1], vec![2], vec![3], vec![4], vec![5]]; let modified: Vec> = original[0..3].to_vec(); - let new_hasher = CachedTreeHasher::new(&modified).unwrap(); + let new_cache = TreeHashCache::new(&modified).unwrap(); - let mut modified_hasher = CachedTreeHasher::new(&original).unwrap(); - modified_hasher.update(&modified).unwrap(); + let mut modified_cache = TreeHashCache::new(&original).unwrap(); + modified_cache.update(&modified).unwrap(); assert_eq!( - new_hasher.cache.schemas.len(), - modified_hasher.cache.schemas.len(), + new_cache.schemas.len(), + modified_cache.schemas.len(), "Schema count is different" ); assert_eq!( - new_hasher.cache.chunk_modified.len(), - modified_hasher.cache.chunk_modified.len(), + new_cache.chunk_modified.len(), + modified_cache.chunk_modified.len(), "Chunk count is different" ); } @@ -601,7 +598,7 @@ fn generic_test(index: usize) { d: 4, }; - let mut cache = TreeHashCache::new(&inner, 0).unwrap(); + let mut cache = TreeHashCache::new(&inner).unwrap(); let changed_inner = match index { 0 => Inner { @@ -636,7 +633,7 @@ fn generic_test(index: usize) { let expected = merkleize(join(data)); - let cache_bytes: Vec = cache.into(); + let (cache_bytes, _, _) = cache.into_components(); assert_eq!(expected, cache_bytes); } @@ -666,9 +663,9 @@ fn inner_builds() { d: 4, }; - let cache: Vec = TreeHashCache::new(&inner, 0).unwrap().into(); + let (cache_bytes, _, _) = TreeHashCache::new(&inner).unwrap().into_components(); - assert_eq!(expected, cache); + assert_eq!(expected, cache_bytes); } fn join(many: Vec>) -> Vec { diff --git a/eth2/utils/tree_hash_derive/src/lib.rs b/eth2/utils/tree_hash_derive/src/lib.rs index b111ae7c48..50727a89fc 100644 --- a/eth2/utils/tree_hash_derive/src/lib.rs +++ b/eth2/utils/tree_hash_derive/src/lib.rs @@ -58,7 +58,7 @@ pub fn subtree_derive(input: TokenStream) -> TokenStream { let output = quote! { impl cached_tree_hash::CachedTreeHash for #name { fn new_tree_hash_cache(&self, depth: usize) -> Result { - let tree = cached_tree_hash::TreeHashCache::from_leaves_and_subtrees( + let tree = cached_tree_hash::TreeHashCache::from_subtrees( self, vec![ #( diff --git a/eth2/utils/tree_hash_derive/tests/tests.rs b/eth2/utils/tree_hash_derive/tests/tests.rs index 11eae4e021..d4fd55165e 100644 --- a/eth2/utils/tree_hash_derive/tests/tests.rs +++ b/eth2/utils/tree_hash_derive/tests/tests.rs @@ -1,4 +1,4 @@ -use cached_tree_hash::{CachedTreeHash, CachedTreeHasher}; +use cached_tree_hash::{CachedTreeHash, TreeHashCache}; use tree_hash::{merkleize::merkle_root, SignedRoot, TreeHash}; use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; @@ -12,16 +12,16 @@ pub struct Inner { fn test_standard_and_cached(original: &T, modified: &T) { // let mut cache = original.new_tree_hash_cache().unwrap(); - let mut hasher = CachedTreeHasher::new(original).unwrap(); + let mut cache = TreeHashCache::new(original).unwrap(); let standard_root = original.tree_hash_root(); - let cached_root = hasher.tree_hash_root().unwrap(); + let cached_root = cache.tree_hash_root().unwrap(); assert_eq!(standard_root, cached_root); // Test after a modification - hasher.update(modified).unwrap(); + cache.update(modified).unwrap(); let standard_root = modified.tree_hash_root(); - let cached_root = hasher.tree_hash_root().unwrap(); + let cached_root = cache.tree_hash_root().unwrap(); assert_eq!(standard_root, cached_root); }