From 88c6d15c32a83e029362daec6920ff4b82d33b51 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 16 Jul 2019 14:40:56 +1000 Subject: [PATCH] Padding efficent merkle root algo (#436) * Add initial work on padding efficent merkle roots * Improve merklize_padded * Improve tree_hash crate -- fix bugs, docs * Update codebase for tree_hash API change * Remove dbg statements, fix import error * Fix clippy lints, doc error * Tidy tree hash comments * Increase tree_hash max tree height * Fix PR review comments * Fix typos * Fix cache access off-by-one in tree hash * Set max tree depth to 48 (from 64) --- eth2/utils/tree_hash/Cargo.toml | 2 + eth2/utils/tree_hash/src/impls.rs | 2 +- eth2/utils/tree_hash/src/lib.rs | 16 +- eth2/utils/tree_hash/src/merkleize_padded.rs | 366 ++++++++++++++++++ .../{merkleize.rs => merkleize_standard.rs} | 23 +- eth2/utils/tree_hash_derive/src/lib.rs | 4 +- eth2/utils/tree_hash_derive/tests/tests.rs | 2 +- 7 files changed, 403 insertions(+), 12 deletions(-) create mode 100644 eth2/utils/tree_hash/src/merkleize_padded.rs rename eth2/utils/tree_hash/src/{merkleize.rs => merkleize_standard.rs} (72%) diff --git a/eth2/utils/tree_hash/Cargo.toml b/eth2/utils/tree_hash/Cargo.toml index 7e23d21652..948e0fe4fc 100644 --- a/eth2/utils/tree_hash/Cargo.toml +++ b/eth2/utils/tree_hash/Cargo.toml @@ -5,9 +5,11 @@ authors = ["Paul Hauner "] edition = "2018" [dev-dependencies] +rand = "0.7" tree_hash_derive = { path = "../tree_hash_derive" } [dependencies] ethereum-types = "0.5" hashing = { path = "../hashing" } int_to_bytes = { path = "../int_to_bytes" } +lazy_static = "0.1" diff --git a/eth2/utils/tree_hash/src/impls.rs b/eth2/utils/tree_hash/src/impls.rs index 212aba3c8b..b911478302 100644 --- a/eth2/utils/tree_hash/src/impls.rs +++ b/eth2/utils/tree_hash/src/impls.rs @@ -1,5 +1,5 @@ use super::*; -use crate::merkleize::merkle_root; +use crate::merkle_root; use ethereum_types::H256; use hashing::hash; use int_to_bytes::int_to_bytes32; diff --git a/eth2/utils/tree_hash/src/lib.rs b/eth2/utils/tree_hash/src/lib.rs index 2554e70c32..a1d7a048e5 100644 --- a/eth2/utils/tree_hash/src/lib.rs +++ b/eth2/utils/tree_hash/src/lib.rs @@ -1,5 +1,17 @@ +#[macro_use] +extern crate lazy_static; + pub mod impls; -pub mod merkleize; +mod merkleize_padded; +mod merkleize_standard; + +pub use merkleize_padded::merkleize_padded; +pub use merkleize_standard::merkleize_standard; + +/// Alias to `merkleize_padded(&bytes, 0)` +pub fn merkle_root(bytes: &[u8]) -> Vec { + merkleize_padded(&bytes, 0) +} pub const BYTES_PER_CHUNK: usize = 32; pub const HASHSIZE: usize = 32; @@ -44,7 +56,7 @@ macro_rules! tree_hash_ssz_encoding_as_vector { } fn tree_hash_root(&self) -> Vec { - tree_hash::merkleize::merkle_root(&ssz::ssz_encode(self)) + tree_hash::merkle_root(&ssz::ssz_encode(self)) } } }; diff --git a/eth2/utils/tree_hash/src/merkleize_padded.rs b/eth2/utils/tree_hash/src/merkleize_padded.rs new file mode 100644 index 0000000000..43bd247d81 --- /dev/null +++ b/eth2/utils/tree_hash/src/merkleize_padded.rs @@ -0,0 +1,366 @@ +use super::BYTES_PER_CHUNK; +use hashing::hash; + +/// The size of the cache that stores padding nodes for a given height. +/// +/// Currently, we panic if we encounter a tree with a height larger than `MAX_TREE_DEPTH`. +/// +/// It is set to 48 as we expect it to be sufficiently high that we won't exceed it. +pub const MAX_TREE_DEPTH: usize = 48; + +lazy_static! { + /// Cached zero hashes where `ZERO_HASHES[i]` is the hash of a Merkle tree with 2^i zero leaves. + static ref ZERO_HASHES: Vec> = { + let mut hashes = vec![vec![0; 32]; MAX_TREE_DEPTH + 1]; + + for i in 0..MAX_TREE_DEPTH { + hashes[i + 1] = hash_concat(&hashes[i], &hashes[i]); + } + + hashes + }; +} + +/// Merkleize `bytes` and return the root, optionally padding the tree out to `min_leaves` number of +/// leaves. +/// +/// First all nodes are extracted from `bytes` and then a padding node is added until the number of +/// leaf chunks is greater than or equal to `min_leaves`. Callers may set `min_leaves` to `0` if no +/// adding additional chunks should be added to the given `bytes`. +/// +/// If `bytes.len() <= BYTES_PER_CHUNK`, no hashing is done and `bytes` is returned, potentially +/// padded out to `BYTES_PER_CHUNK` length with `0`. +/// +/// ## CPU Performance +/// +/// A cache of `MAX_TREE_DEPTH` hashes are stored to avoid re-computing the hashes of padding nodes +/// (or their parents). Therefore, adding padding nodes only incurs one more hash per additional +/// height of the tree. +/// +/// ## Memory Performance +/// +/// This algorithm has two interesting memory usage properties: +/// +/// 1. The maximum memory footprint is roughly `O(V / 2)` memory, where `V` is the number of leaf +/// chunks with values (i.e., leaves that are not padding). The means adding padding nodes to +/// the tree does not increase the memory footprint. +/// 2. At each height of the tree half of the memory is freed until only a single chunk is stored. +/// 3. The input `bytes` are not copied into another list before processing. +/// +/// _Note: there are some minor memory overheads, including a handful of usizes and a list of +/// `MAX_TREE_DEPTH` hashes as `lazy_static` constants._ +pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Vec { + // If the bytes are just one chunk or less, pad to one chunk and return without hashing. + if bytes.len() <= BYTES_PER_CHUNK && min_leaves <= 1 { + let mut o = bytes.to_vec(); + o.resize(BYTES_PER_CHUNK, 0); + return o; + } + + assert!( + bytes.len() > BYTES_PER_CHUNK || min_leaves > 1, + "Merkle hashing only needs to happen if there is more than one chunk" + ); + + // The number of leaves that can be made directly from `bytes`. + let leaves_with_values = (bytes.len() + (BYTES_PER_CHUNK - 1)) / BYTES_PER_CHUNK; + + // The number of parents that have at least one non-padding leaf. + // + // Since there is more than one node in this tree (see prior assertion), there should always be + // one or more initial parent nodes. + let initial_parents_with_values = std::cmp::max(1, next_even_number(leaves_with_values) / 2); + + // The number of leaves in the full tree (including padding nodes). + let num_leaves = std::cmp::max(leaves_with_values, min_leaves).next_power_of_two(); + + // The number of levels in the tree. + // + // A tree with a single node has `height == 1`. + let height = num_leaves.trailing_zeros() as usize + 1; + + assert!(height >= 2, "The tree should have two or more heights"); + + // A buffer/scratch-space used for storing each round of hashes at each height. + // + // This buffer is kept as small as possible; it will shrink so it never stores a padding node. + let mut chunks = ChunkStore::with_capacity(initial_parents_with_values); + + // Create a parent in the `chunks` buffer for every two chunks in `bytes`. + // + // I.e., do the first round of hashing, hashing from the `bytes` slice and filling the `chunks` + // struct. + for i in 0..initial_parents_with_values { + let start = i * BYTES_PER_CHUNK * 2; + + // Hash two chunks, creating a parent chunk. + let hash = match bytes.get(start..start + BYTES_PER_CHUNK * 2) { + // All bytes are available, hash as usual. + Some(slice) => hash(slice), + // Unable to get all the bytes, get a small slice and pad it out. + None => { + let mut preimage = bytes + .get(start..) + .expect("`i` can only be larger than zero if there are bytes to read") + .to_vec(); + preimage.resize(BYTES_PER_CHUNK * 2, 0); + hash(&preimage) + } + }; + + assert_eq!( + hash.len(), + BYTES_PER_CHUNK, + "Hashes should be exactly one chunk" + ); + + // Store the parent node. + chunks + .set(i, &hash) + .expect("Buffer should always have capacity for parent nodes") + } + + // Iterate through all heights above the leaf nodes and either (a) hash two children or, (b) + // hash a left child and a right padding node. + // + // Skip the 0'th height because the leaves have already been processed. Skip the highest-height + // in the tree as it is the root does not require hashing. + // + // The padding nodes for each height are cached via `lazy static` to simulate non-adjacent + // padding nodes (i.e., avoid doing unnecessary hashing). + for height in 1..height - 1 { + let child_nodes = chunks.len(); + let parent_nodes = next_even_number(child_nodes) / 2; + + // For each pair of nodes stored in `chunks`: + // + // - If two nodes are available, hash them to form a parent. + // - If one node is available, hash it and a cached padding node to form a parent. + for i in 0..parent_nodes { + let (left, right) = match (chunks.get(i * 2), chunks.get(i * 2 + 1)) { + (Ok(left), Ok(right)) => (left, right), + (Ok(left), Err(_)) => (left, get_zero_hash(height)), + // Deriving `parent_nodes` from `chunks.len()` has ensured that we never encounter the + // scenario where we expect two nodes but there are none. + (Err(_), Err(_)) => unreachable!("Parent must have one child"), + // `chunks` is a contiguous array so it is impossible for an index to be missing + // when a higher index is present. + (Err(_), Ok(_)) => unreachable!("Parent must have a left child"), + }; + + assert!( + left.len() == right.len() && right.len() == BYTES_PER_CHUNK, + "Both children should be `BYTES_PER_CHUNK` bytes." + ); + + let hash = hash_concat(left, right); + + // Store a parent node. + chunks + .set(i, &hash) + .expect("Buf is adequate size for parent"); + } + + // Shrink the buffer so it neatly fits the number of new nodes created in this round. + // + // The number of `parent_nodes` is either decreasing or stable. It never increases. + chunks.truncate(parent_nodes); + } + + // There should be a single chunk left in the buffer and it is the Merkle root. + let root = chunks.into_vec(); + + assert_eq!(root.len(), BYTES_PER_CHUNK, "Only one chunk should remain"); + + root +} + +/// A helper struct for storing words of `BYTES_PER_CHUNK` size in a flat byte array. +#[derive(Debug)] +struct ChunkStore(Vec); + +impl ChunkStore { + /// Creates a new instance with `chunks` padding nodes. + fn with_capacity(chunks: usize) -> Self { + Self(vec![0; chunks * BYTES_PER_CHUNK]) + } + + /// Set the `i`th chunk to `value`. + /// + /// Returns `Err` if `value.len() != BYTES_PER_CHUNK` or `i` is out-of-bounds. + fn set(&mut self, i: usize, value: &[u8]) -> Result<(), ()> { + if i < self.len() && value.len() == BYTES_PER_CHUNK { + let slice = &mut self.0[i * BYTES_PER_CHUNK..i * BYTES_PER_CHUNK + BYTES_PER_CHUNK]; + slice.copy_from_slice(value); + Ok(()) + } else { + Err(()) + } + } + + /// Gets the `i`th chunk. + /// + /// Returns `Err` if `i` is out-of-bounds. + fn get(&self, i: usize) -> Result<&[u8], ()> { + if i < self.len() { + Ok(&self.0[i * BYTES_PER_CHUNK..i * BYTES_PER_CHUNK + BYTES_PER_CHUNK]) + } else { + Err(()) + } + } + + /// Returns the number of chunks presently stored in `self`. + fn len(&self) -> usize { + self.0.len() / BYTES_PER_CHUNK + } + + /// Truncates 'self' to `num_chunks` chunks. + /// + /// Functionally identical to `Vec::truncate`. + fn truncate(&mut self, num_chunks: usize) { + self.0.truncate(num_chunks * BYTES_PER_CHUNK) + } + + /// Consumes `self`, returning the underlying byte array. + fn into_vec(self) -> Vec { + self.0 + } +} + +/// Returns a cached padding node for a given height. +fn get_zero_hash(height: usize) -> &'static [u8] { + if height <= MAX_TREE_DEPTH { + &ZERO_HASHES[height] + } else { + panic!("Tree exceeds MAX_TREE_DEPTH of {}", MAX_TREE_DEPTH) + } +} + +/// Concatenate two vectors. +fn concat(mut vec1: Vec, mut vec2: Vec) -> Vec { + vec1.append(&mut vec2); + vec1 +} + +/// Compute the hash of two other hashes concatenated. +fn hash_concat(h1: &[u8], h2: &[u8]) -> Vec { + hash(&concat(h1.to_vec(), h2.to_vec())) +} + +/// Returns the next even number following `n`. If `n` is even, `n` is returned. +fn next_even_number(n: usize) -> usize { + n + n % 2 +} + +#[cfg(test)] +mod test { + use super::*; + + pub fn reference_root(bytes: &[u8]) -> Vec { + crate::merkleize_standard(&bytes)[0..32].to_vec() + } + + macro_rules! common_tests { + ($get_bytes: ident) => { + #[test] + fn zero_value_0_nodes() { + test_against_reference(&$get_bytes(0 * BYTES_PER_CHUNK), 0); + } + + #[test] + fn zero_value_1_nodes() { + test_against_reference(&$get_bytes(1 * BYTES_PER_CHUNK), 0); + } + + #[test] + fn zero_value_2_nodes() { + test_against_reference(&$get_bytes(2 * BYTES_PER_CHUNK), 0); + } + + #[test] + fn zero_value_3_nodes() { + test_against_reference(&$get_bytes(3 * BYTES_PER_CHUNK), 0); + } + + #[test] + fn zero_value_4_nodes() { + test_against_reference(&$get_bytes(4 * BYTES_PER_CHUNK), 0); + } + + #[test] + fn zero_value_8_nodes() { + test_against_reference(&$get_bytes(8 * BYTES_PER_CHUNK), 0); + } + + #[test] + fn zero_value_9_nodes() { + test_against_reference(&$get_bytes(9 * BYTES_PER_CHUNK), 0); + } + + #[test] + fn zero_value_8_nodes_varying_min_length() { + for i in 0..64 { + test_against_reference(&$get_bytes(8 * BYTES_PER_CHUNK), i); + } + } + + #[test] + fn zero_value_range_of_nodes() { + for i in 0..32 * BYTES_PER_CHUNK { + test_against_reference(&$get_bytes(i), 0); + } + } + + #[test] + fn max_tree_depth_min_nodes() { + let input = vec![0; 10 * BYTES_PER_CHUNK]; + let min_nodes = 2usize.pow(MAX_TREE_DEPTH as u32); + assert_eq!( + merkleize_padded(&input, min_nodes), + get_zero_hash(MAX_TREE_DEPTH) + ); + } + }; + } + + mod zero_value { + use super::*; + + fn zero_bytes(bytes: usize) -> Vec { + vec![0; bytes] + } + + common_tests!(zero_bytes); + } + + mod random_value { + use super::*; + use rand::RngCore; + + fn random_bytes(bytes: usize) -> Vec { + let mut bytes = Vec::with_capacity(bytes); + rand::thread_rng().fill_bytes(&mut bytes); + bytes + } + + common_tests!(random_bytes); + } + + fn test_against_reference(input: &[u8], min_nodes: usize) { + let mut reference_input = input.to_vec(); + reference_input.resize( + std::cmp::max( + reference_input.len(), + min_nodes.next_power_of_two() * BYTES_PER_CHUNK, + ), + 0, + ); + + assert_eq!( + reference_root(&reference_input), + merkleize_padded(&input, min_nodes), + "input.len(): {:?}", + input.len() + ); + } +} diff --git a/eth2/utils/tree_hash/src/merkleize.rs b/eth2/utils/tree_hash/src/merkleize_standard.rs similarity index 72% rename from eth2/utils/tree_hash/src/merkleize.rs rename to eth2/utils/tree_hash/src/merkleize_standard.rs index 9482895ec5..c47d342ddb 100644 --- a/eth2/utils/tree_hash/src/merkleize.rs +++ b/eth2/utils/tree_hash/src/merkleize_standard.rs @@ -1,12 +1,23 @@ use super::*; use hashing::hash; -pub fn merkle_root(bytes: &[u8]) -> Vec { - // TODO: replace this with a more memory efficient method. - efficient_merkleize(&bytes)[0..32].to_vec() -} - -pub fn efficient_merkleize(bytes: &[u8]) -> Vec { +/// Merkleizes bytes and returns the root, using a simple algorithm that does not optimize to avoid +/// processing or storing padding bytes. +/// +/// The input `bytes` will be padded to ensure that the number of leaves is a power-of-two. +/// +/// It is likely a better choice to use [merkleize_padded](fn.merkleize_padded.html) instead. +/// +/// ## CPU Performance +/// +/// Will hash all nodes in the tree, even if they are padding and pre-determined. +/// +/// ## Memory Performance +/// +/// - Duplicates the input `bytes`. +/// - Stores all internal nodes, even if they are padding. +/// - Does not free up unused memory during operation. +pub fn merkleize_standard(bytes: &[u8]) -> Vec { // If the bytes are just one chunk (or less than one chunk) just return them. if bytes.len() <= HASHSIZE { let mut o = bytes.to_vec(); diff --git a/eth2/utils/tree_hash_derive/src/lib.rs b/eth2/utils/tree_hash_derive/src/lib.rs index fe94af181a..f815dd5298 100644 --- a/eth2/utils/tree_hash_derive/src/lib.rs +++ b/eth2/utils/tree_hash_derive/src/lib.rs @@ -150,7 +150,7 @@ pub fn tree_hash_derive(input: TokenStream) -> TokenStream { leaves.append(&mut self.#idents.tree_hash_root()); )* - tree_hash::merkleize::merkle_root(&leaves) + tree_hash::merkle_root(&leaves) } } }; @@ -180,7 +180,7 @@ pub fn tree_hash_signed_root_derive(input: TokenStream) -> TokenStream { leaves.append(&mut self.#idents.tree_hash_root()); )* - tree_hash::merkleize::merkle_root(&leaves) + tree_hash::merkle_root(&leaves) } } }; diff --git a/eth2/utils/tree_hash_derive/tests/tests.rs b/eth2/utils/tree_hash_derive/tests/tests.rs index d4fd55165e..ab11730ff9 100644 --- a/eth2/utils/tree_hash_derive/tests/tests.rs +++ b/eth2/utils/tree_hash_derive/tests/tests.rs @@ -1,5 +1,5 @@ use cached_tree_hash::{CachedTreeHash, TreeHashCache}; -use tree_hash::{merkleize::merkle_root, SignedRoot, TreeHash}; +use tree_hash::{merkle_root, SignedRoot, TreeHash}; use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; #[derive(Clone, Debug, TreeHash, CachedTreeHash)]