Refactor tree hashing (#861)

* Pre-allocated tree hash caches

* Add SmallVec to tree hash cache

* Avoid allocation for validator.pubkey

* Avoid iterator which seems to be doing heap alloc

* Add more smallvecs

* MOAR SMALLVEC

* Move non-test code to Hash256 tree hash

* Fix byte ordering error

* Add incomplete but working merkle stream impl

* Fix zero hash error

* Add zero hash fn

* Add MerkleStream comments

* Add smallvec, tidy

* Integrate into tree hash derive

* Update ssz_types tree hash

* Don't heap alloc for mix in length

* Add byte-level streaming to MerkleStream

* Avoid recursion in write method

* Update BLS to MerkleStream

* Fix some not-compiling tests

* Remove debug profiling

* Remove code duplication

* Move beacon state tree hash to new hasher

* Fix failing tests

* Update comments

* Add some fast-paths to tree_hash::merkle_root

* Remove unncessary test

* Rename MerkleStream -> MerkleHasher

* Rename new_with_leaf_count -> with_leaves

* Tidy

* Remove NonZeroUsize

* Remove todo

* Update smallvec
This commit is contained in:
Paul Hauner
2020-03-05 08:07:27 +11:00
committed by GitHub
parent 12999fb06c
commit 7f6ae4c2f5
43 changed files with 1076 additions and 292 deletions

View File

@@ -1,6 +1,12 @@
use super::*;
use ethereum_types::{H256, U128, U256};
fn int_to_hash256(int: u64) -> Hash256 {
let mut bytes = [0; HASHSIZE];
bytes[0..8].copy_from_slice(&int.to_le_bytes());
Hash256::from_slice(&bytes)
}
macro_rules! impl_for_bitsize {
($type: ident, $bit_size: expr) => {
impl TreeHash for $type {
@@ -17,8 +23,8 @@ macro_rules! impl_for_bitsize {
}
#[allow(clippy::cast_lossless)]
fn tree_hash_root(&self) -> Vec<u8> {
int_to_bytes32(*self as u64)
fn tree_hash_root(&self) -> Hash256 {
int_to_hash256(*self as u64)
}
}
};
@@ -43,12 +49,13 @@ impl TreeHash for bool {
u8::tree_hash_packing_factor()
}
fn tree_hash_root(&self) -> Vec<u8> {
int_to_bytes32(*self as u64)
fn tree_hash_root(&self) -> Hash256 {
int_to_hash256(*self as u64)
}
}
macro_rules! impl_for_u8_array {
/// Only valid for byte types less than 32 bytes.
macro_rules! impl_for_lt_32byte_u8_array {
($len: expr) => {
impl TreeHash for [u8; $len] {
fn tree_hash_type() -> TreeHashType {
@@ -63,15 +70,17 @@ macro_rules! impl_for_u8_array {
unreachable!("bytesN should never be packed.")
}
fn tree_hash_root(&self) -> Vec<u8> {
merkle_root(&self[..], 0)
fn tree_hash_root(&self) -> Hash256 {
let mut result = [0; 32];
result[0..$len].copy_from_slice(&self[..]);
Hash256::from_slice(&result)
}
}
};
}
impl_for_u8_array!(4);
impl_for_u8_array!(32);
impl_for_lt_32byte_u8_array!(4);
impl_for_lt_32byte_u8_array!(32);
impl TreeHash for U128 {
fn tree_hash_type() -> TreeHashType {
@@ -88,8 +97,10 @@ impl TreeHash for U128 {
2
}
fn tree_hash_root(&self) -> Vec<u8> {
merkle_root(&self.tree_hash_packed_encoding(), 0)
fn tree_hash_root(&self) -> Hash256 {
let mut result = [0; HASHSIZE];
self.to_little_endian(&mut result[0..16]);
Hash256::from_slice(&result)
}
}
@@ -108,8 +119,10 @@ impl TreeHash for U256 {
1
}
fn tree_hash_root(&self) -> Vec<u8> {
merkle_root(&self.tree_hash_packed_encoding(), 0)
fn tree_hash_root(&self) -> Hash256 {
let mut result = [0; 32];
self.to_little_endian(&mut result[..]);
Hash256::from_slice(&result)
}
}
@@ -126,18 +139,11 @@ impl TreeHash for H256 {
1
}
fn tree_hash_root(&self) -> Vec<u8> {
merkle_root(&self.as_bytes().to_vec(), 0)
fn tree_hash_root(&self) -> Hash256 {
*self
}
}
/// Returns `int` as little-endian bytes with a length of 32.
fn int_to_bytes32(int: u64) -> Vec<u8> {
let mut vec = int.to_le_bytes().to_vec();
vec.resize(32, 0);
vec
}
#[cfg(test)]
mod test {
use super::*;
@@ -149,22 +155,22 @@ mod test {
let false_bytes: Vec<u8> = vec![0; 32];
assert_eq!(true.tree_hash_root(), true_bytes);
assert_eq!(false.tree_hash_root(), false_bytes);
assert_eq!(true.tree_hash_root().as_bytes(), true_bytes.as_slice());
assert_eq!(false.tree_hash_root().as_bytes(), false_bytes.as_slice());
}
#[test]
fn int_to_bytes() {
assert_eq!(&int_to_bytes32(0), &[0; 32]);
assert_eq!(int_to_hash256(0).as_bytes(), &[0; 32]);
assert_eq!(
&int_to_bytes32(1),
int_to_hash256(1).as_bytes(),
&[
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0
]
);
assert_eq!(
&int_to_bytes32(u64::max_value()),
int_to_hash256(u64::max_value()).as_bytes(),
&[
255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0

View File

@@ -1,30 +1,80 @@
pub mod impls;
mod merkle_hasher;
mod merkleize_padded;
mod merkleize_standard;
pub use merkle_hasher::{Error, MerkleHasher};
pub use merkleize_padded::merkleize_padded;
pub use merkleize_standard::merkleize_standard;
use eth2_hashing::{Context, SHA256};
use eth2_hashing::{ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
pub const BYTES_PER_CHUNK: usize = 32;
pub const HASHSIZE: usize = 32;
pub const MERKLE_HASH_CHUNK: usize = 2 * BYTES_PER_CHUNK;
/// Alias to `merkleize_padded(&bytes, minimum_chunk_count)`
pub type Hash256 = ethereum_types::H256;
/// Convenience method for `MerkleHasher` which also provides some fast-paths for small trees.
///
/// If `minimum_chunk_count < bytes / BYTES_PER_CHUNK`, padding will be added for the difference
/// between the two.
pub fn merkle_root(bytes: &[u8], minimum_chunk_count: usize) -> Vec<u8> {
merkleize_padded(&bytes, minimum_chunk_count)
/// `minimum_leaf_count` will only be used if it is greater than or equal to the minimum number of leaves that can be created from `bytes`.
pub fn merkle_root(bytes: &[u8], minimum_leaf_count: usize) -> Hash256 {
let leaves = std::cmp::max(
(bytes.len() + (HASHSIZE - 1)) / HASHSIZE,
minimum_leaf_count,
);
if leaves == 0 {
// If there are no bytes then the hash is always zero.
Hash256::zero()
} else if leaves == 1 {
// If there is only one leaf, the hash is always those leaf bytes padded out to 32-bytes.
let mut hash = [0; HASHSIZE];
hash[0..bytes.len()].copy_from_slice(bytes);
Hash256::from_slice(&hash)
} else if leaves == 2 {
// If there are only two leaves (this is common with BLS pubkeys), we can avoid some
// overhead with `MerkleHasher` and just do a simple 3-node tree here.
let mut leaves = [0; HASHSIZE * 2];
leaves[0..bytes.len()].copy_from_slice(bytes);
let mut context = Context::new(&SHA256);
context.update(&leaves);
let digest = context.finish();
Hash256::from_slice(digest.as_ref())
} else {
// If there are 3 or more leaves, use `MerkleHasher`.
let mut hasher = MerkleHasher::with_leaves(leaves);
hasher
.write(bytes)
.expect("the number of leaves is adequate for the number of bytes");
hasher
.finish()
.expect("the number of leaves is adequate for the number of bytes")
}
}
/// Returns the node created by hashing `root` and `length`.
///
/// Used in `TreeHash` for inserting the length of a list above it's root.
pub fn mix_in_length(root: &[u8], length: usize) -> Vec<u8> {
let mut length_bytes = length.to_le_bytes().to_vec();
length_bytes.resize(BYTES_PER_CHUNK, 0);
pub fn mix_in_length(root: &Hash256, length: usize) -> Hash256 {
let usize_len = std::mem::size_of::<usize>();
eth2_hashing::hash32_concat(root, &length_bytes)[..].to_vec()
let mut length_bytes = [0; BYTES_PER_CHUNK];
length_bytes[0..usize_len].copy_from_slice(&length.to_le_bytes());
Hash256::from_slice(&eth2_hashing::hash32_concat(root.as_bytes(), &length_bytes)[..])
}
/// Returns a cached padding node for a given height.
fn get_zero_hash(height: usize) -> &'static [u8] {
if height <= ZERO_HASHES_MAX_INDEX {
&ZERO_HASHES[height]
} else {
panic!("Tree exceeds MAX_TREE_DEPTH of {}", ZERO_HASHES_MAX_INDEX)
}
}
#[derive(Debug, PartialEq, Clone)]
@@ -42,7 +92,7 @@ pub trait TreeHash {
fn tree_hash_packing_factor() -> usize;
fn tree_hash_root(&self) -> Vec<u8>;
fn tree_hash_root(&self) -> Hash256;
}
#[macro_export]
@@ -104,6 +154,9 @@ mod test {
eth2_hashing::hash(&preimage)
};
assert_eq!(mix_in_length(&[42; BYTES_PER_CHUNK], 42), hash);
assert_eq!(
mix_in_length(&Hash256::from_slice(&[42; BYTES_PER_CHUNK]), 42).as_bytes(),
&hash[..]
);
}
}

View File

@@ -0,0 +1,575 @@
use crate::{get_zero_hash, Hash256, HASHSIZE};
use eth2_hashing::{Context, Digest, SHA256};
use smallvec::{smallvec, SmallVec};
use std::mem;
type SmallVec8<T> = SmallVec<[T; 8]>;
#[derive(Clone, Debug, PartialEq)]
pub enum Error {
/// The maximum number of leaves defined by the initialization `depth` has been exceed.
MaximumLeavesExceeded { max_leaves: usize },
}
/// Helper struct to store either a hash digest or a slice.
///
/// Should be used as a left or right value for some node.
enum Preimage<'a> {
Digest(Digest),
Slice(&'a [u8]),
}
impl<'a> Preimage<'a> {
/// Returns a 32-byte slice.
fn as_bytes(&self) -> &[u8] {
match self {
Preimage::Digest(digest) => digest.as_ref(),
Preimage::Slice(slice) => slice,
}
}
}
/// A node that has had a left child supplied, but not a right child.
struct HalfNode {
/// The hasher context.
context: Context,
/// The tree id of the node. The root node has in id of `1` and ids increase moving down the
/// tree from left to right.
id: usize,
}
impl HalfNode {
/// Create a new half-node from the given `left` value.
fn new(id: usize, left: Preimage) -> Self {
let mut context = Context::new(&SHA256);
context.update(left.as_bytes());
Self { context, id }
}
/// Complete the half-node by providing a `right` value. Returns a digest of the left and right
/// nodes.
fn finish(mut self, right: Preimage) -> Digest {
self.context.update(right.as_bytes());
self.context.finish()
}
}
/// Provides a Merkle-root hasher that allows for streaming bytes (i.e., providing any-length byte
/// slices without need to separate into leaves). Efficiently handles cases where not all leaves
/// have been provided by assuming all non-provided leaves are `[0; 32]` and pre-computing the
/// zero-value hashes at all depths of the tree.
///
/// This algorithm aims to allocate as little memory as possible and it does this by "folding" up
/// the tree has each leaf is provided. Consider this step-by-step functional diagram of hashing a
/// tree with depth three:
///
/// ## Functional Diagram
///
/// Nodes that are `-` have not been defined and do not occupy memory. Nodes that are `L` are
/// leaves that are provided but are not stored. Nodes that have integers (`1`, `2`) are stored in
/// our struct. Finally, nodes that are `X` were stored, but are now removed.
///
/// ### Start
///
/// ```ignore
/// -
/// / \
/// - -
/// / \ / \
/// - - - -
/// ```
///
/// ### Provide first leaf
///
/// ```ignore
/// -
/// / \
/// 2 -
/// / \ / \
/// L - - -
/// ```
///
/// ### Provide second leaf
///
/// ```ignore
/// 1
/// / \
/// X -
/// / \ / \
/// L L - -
/// ```
///
/// ### Provide third leaf
///
/// ```ignore
/// 1
/// / \
/// X 3
/// / \ / \
/// L L L -
/// ```
///
/// ### Provide fourth and final leaf
///
/// ```ignore
/// 1
/// / \
/// X X
/// / \ / \
/// L L L L
/// ```
///
pub struct MerkleHasher {
/// Stores the nodes that are half-complete and awaiting a right node.
///
/// A smallvec of size 8 means we can hash a tree with 256 leaves without allocating on the
/// heap. Each half-node is 224 bytes, so this smallvec may store 1,792 bytes on the stack.
half_nodes: SmallVec8<HalfNode>,
/// The depth of the tree that will be produced.
///
/// Depth is counted top-down (i.e., the root node is at depth 0). A tree with 1 leaf has a
/// depth of 1, a tree with 4 leaves has a depth of 3.
depth: usize,
/// The next leaf that we are expecting to process.
next_leaf: usize,
/// A buffer of bytes that are waiting to be written to a leaf.
buffer: SmallVec<[u8; 32]>,
/// Set to Some(root) when the root of the tree is known.
root: Option<Hash256>,
}
/// Returns the parent of node with id `i`.
fn get_parent(i: usize) -> usize {
i / 2
}
/// Gets the depth of a node with an id of `i`.
///
/// It is a logic error to provide `i == 0`.
///
/// E.g., if `i` is 1, depth is 0. If `i` is is 1, depth is 1.
fn get_depth(i: usize) -> usize {
let total_bits = mem::size_of::<usize>() * 8;
total_bits - i.leading_zeros() as usize - 1
}
impl MerkleHasher {
/// Instantiate a hasher for a tree with a given number of leaves.
///
/// `num_leaves` will be rounded to the next power of two. E.g., if `num_leaves == 6`, then the
/// tree will _actually_ be able to accomodate 8 leaves and the resulting hasher is exactly the
/// same as one that was instantiated with `Self::with_leaves(8)`.
///
/// ## Notes
///
/// If `num_leaves == 0`, a tree of depth 1 will be created. If no leaves are provided it will
/// return a root of `[0; 32]`.
pub fn with_leaves(num_leaves: usize) -> Self {
let depth = get_depth(num_leaves.next_power_of_two()) + 1;
Self::with_depth(depth)
}
/// Instantiates a new, empty hasher for a tree with `depth` layers which will have capacity
/// for `1 << (depth - 1)` leaf nodes.
///
/// It is not possible to grow the depth of the tree after instantiation.
///
/// ## Panics
///
/// Panics if `depth == 0`.
fn with_depth(depth: usize) -> Self {
assert!(depth > 0, "merkle tree cannot have a depth of zero");
Self {
half_nodes: SmallVec::with_capacity(depth - 1),
depth,
next_leaf: 1 << (depth - 1),
buffer: SmallVec::with_capacity(32),
root: None,
}
}
/// Write some bytes to the hasher.
///
/// ## Errors
///
/// Returns an error if the given bytes would create a leaf that would exceed the maximum
/// permissible number of leaves defined by the initialization `depth`. E.g., a tree of `depth
/// == 2` can only accept 2 leaves. A tree of `depth == 14` can only accept 8,192 leaves.
pub fn write(&mut self, bytes: &[u8]) -> Result<(), Error> {
let mut ptr = 0;
while ptr <= bytes.len() {
let slice = &bytes[ptr..std::cmp::min(bytes.len(), ptr + HASHSIZE)];
if self.buffer.is_empty() && slice.len() == HASHSIZE {
self.process_leaf(slice)?;
ptr += HASHSIZE
} else if self.buffer.len() + slice.len() < HASHSIZE {
self.buffer.extend_from_slice(slice);
ptr += HASHSIZE
} else {
let buf_len = self.buffer.len();
let required = HASHSIZE - buf_len;
let mut leaf = [0; HASHSIZE];
leaf[..buf_len].copy_from_slice(&self.buffer);
leaf[buf_len..].copy_from_slice(&slice[0..required]);
self.process_leaf(&leaf)?;
self.buffer = smallvec![];
ptr += required
}
}
Ok(())
}
/// Process the next leaf in the tree.
///
/// ## Errors
///
/// Returns an error if the given leaf would exceed the maximum permissible number of leaves
/// defined by the initialization `depth`. E.g., a tree of `depth == 2` can only accept 2
/// leaves. A tree of `depth == 14` can only accept 8,192 leaves.
fn process_leaf(&mut self, leaf: &[u8]) -> Result<(), Error> {
assert_eq!(leaf.len(), HASHSIZE, "a leaf must be 32 bytes");
let max_leaves = 1 << (self.depth + 1);
if self.next_leaf > max_leaves {
return Err(Error::MaximumLeavesExceeded { max_leaves });
} else if self.next_leaf == 1 {
// A tree of depth one has a root that is equal to the first given leaf.
self.root = Some(Hash256::from_slice(leaf))
} else if self.next_leaf % 2 == 0 {
self.process_left_node(self.next_leaf, Preimage::Slice(leaf))
} else {
self.process_right_node(self.next_leaf, Preimage::Slice(leaf))
}
self.next_leaf += 1;
Ok(())
}
/// Returns the root of the Merkle tree.
///
/// If not all leaves have been provided, the tree will be efficiently completed under the
/// assumption that all not-yet-provided leaves are equal to `[0; 32]`.
///
/// ## Errors
///
/// Returns an error if the bytes remaining in the buffer would create a leaf that would exceed
/// the maximum permissible number of leaves defined by the initialization `depth`.
pub fn finish(mut self) -> Result<Hash256, Error> {
if !self.buffer.is_empty() {
let mut leaf = [0; HASHSIZE];
leaf[..self.buffer.len()].copy_from_slice(&self.buffer);
self.process_leaf(&leaf)?
}
// If the tree is incomplete, we must complete it by providing zero-hashes.
loop {
if let Some(root) = self.root {
break Ok(root);
} else {
if let Some(node) = self.half_nodes.last() {
let right_child = node.id * 2 + 1;
self.process_right_node(right_child, self.zero_hash(right_child));
} else if self.next_leaf == 1 {
// The next_leaf can only be 1 if the tree has a depth of one. If have been no
// leaves supplied, assume a root of zero.
break Ok(Hash256::zero());
} else {
// The only scenario where there are (a) no half nodes and (b) a tree of depth
// two or more is where no leaves have been supplied at all.
//
// Once we supply this first zero-hash leaf then all future operations will be
// triggered via the `process_right_node` branch.
self.process_left_node(self.next_leaf, self.zero_hash(self.next_leaf))
}
}
}
}
/// Process a node that will become the left-hand node of some parent. The supplied `id` is
/// that of the node (not the parent). The `preimage` is the value of the node (i.e., if this
/// is a leaf node it will be the value of that leaf).
///
/// In this scenario, the only option is to push a new half-node.
fn process_left_node(&mut self, id: usize, preimage: Preimage) {
self.half_nodes
.push(HalfNode::new(get_parent(id), preimage))
}
/// Process a node that will become the right-hand node of some parent. The supplied `id` is
/// that of the node (not the parent). The `preimage` is the value of the node (i.e., if this
/// is a leaf node it will be the value of that leaf).
///
/// This operation will always complete one node, then it will attempt to crawl up the tree and
/// collapse and other viable nodes. For example, consider a tree of depth 3 (see diagram
/// below). When providing the node with id `7`, the node with id `3` will be completed which
/// will also provide the right-node for the `1` node. This function will complete both of
/// those nodes and ultimately find the root of the tree.
///
/// ```ignore
/// 1 <-- completed
/// / \
/// 2 3 <-- completed
/// / \ / \
/// 4 5 6 7 <-- supplied right node
/// ```
fn process_right_node(&mut self, id: usize, mut preimage: Preimage) {
let mut parent = get_parent(id);
loop {
match self.half_nodes.last() {
Some(node) if node.id == parent => {
preimage = Preimage::Digest(
self.half_nodes
.pop()
.expect("if .last() is Some then .pop() must succeed")
.finish(preimage),
);
if parent == 1 {
self.root = Some(Hash256::from_slice(preimage.as_bytes()));
break;
} else {
parent = get_parent(parent);
}
}
_ => {
self.half_nodes.push(HalfNode::new(parent, preimage));
break;
}
}
}
}
/// Returns a "zero hash" from a pre-computed set for the given node.
///
/// Note: this node is not always zero, instead it is the result of hashing up a tree where the
/// leaves are all zeros. E.g., in a tree of depth 2, the `zero_hash` of a node at depth 1
/// will be `[0; 32]`. However, the `zero_hash` for a node at depth 0 will be
/// `hash(concat([0; 32], [0; 32])))`.
fn zero_hash(&self, id: usize) -> Preimage<'static> {
Preimage::Slice(get_zero_hash(self.depth - (get_depth(id) + 1)))
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::merkleize_padded;
/// This test is just to ensure that the stack size of the `Context` remains the same. We choose
/// our smallvec size based upon this, so it's good to know if it suddenly changes in size.
#[test]
fn context_size() {
assert_eq!(
mem::size_of::<HalfNode>(),
216 + 8,
"Halfnode size should be as expected"
);
}
fn compare_with_reference(leaves: &[Hash256], depth: usize) {
let reference_bytes = leaves
.iter()
.map(|hash| hash.as_bytes().to_vec())
.flatten()
.collect::<Vec<_>>();
let reference_root = merkleize_padded(&reference_bytes, 1 << (depth - 1));
let merklizer_root_32_bytes = {
let mut m = MerkleHasher::with_depth(depth);
for leaf in leaves.iter() {
m.write(leaf.as_bytes()).expect("should process leaf");
}
m.finish().expect("should finish")
};
assert_eq!(
reference_root, merklizer_root_32_bytes,
"32 bytes should match reference root"
);
let merklizer_root_individual_3_bytes = {
let mut m = MerkleHasher::with_depth(depth);
for bytes in reference_bytes.clone().chunks(3) {
m.write(bytes).expect("should process byte");
}
m.finish().expect("should finish")
};
assert_eq!(
reference_root, merklizer_root_individual_3_bytes,
"3 bytes should match reference root"
);
let merklizer_root_individual_single_bytes = {
let mut m = MerkleHasher::with_depth(depth);
for byte in reference_bytes.iter() {
m.write(&[*byte]).expect("should process byte");
}
m.finish().expect("should finish")
};
assert_eq!(
reference_root, merklizer_root_individual_single_bytes,
"single bytes should match reference root"
);
}
/// A simple wrapper to compare MerkleHasher to the reference function by just giving a number
/// of leaves and a depth.
fn compare_reference_with_len(leaves: u64, depth: usize) {
let leaves = (0..leaves)
.map(|i| Hash256::from_low_u64_be(i))
.collect::<Vec<_>>();
compare_with_reference(&leaves, depth)
}
/// Compares the `MerkleHasher::with_depth` and `MerkleHasher::with_leaves` generate consistent
/// results.
fn compare_new_with_leaf_count(num_leaves: u64, depth: usize) {
let leaves = (0..num_leaves)
.map(|i| Hash256::from_low_u64_be(i))
.collect::<Vec<_>>();
let from_depth = {
let mut m = MerkleHasher::with_depth(depth);
for leaf in leaves.iter() {
m.write(leaf.as_bytes()).expect("should process leaf");
}
m.finish()
};
let from_num_leaves = {
let mut m = MerkleHasher::with_leaves(num_leaves as usize);
for leaf in leaves.iter() {
m.process_leaf(leaf.as_bytes())
.expect("should process leaf");
}
m.finish()
};
assert_eq!(
from_depth, from_num_leaves,
"hash generated by depth should match that from num leaves"
);
}
#[test]
fn with_leaves() {
compare_new_with_leaf_count(1, 1);
compare_new_with_leaf_count(2, 2);
compare_new_with_leaf_count(3, 3);
compare_new_with_leaf_count(4, 3);
compare_new_with_leaf_count(5, 4);
compare_new_with_leaf_count(6, 4);
compare_new_with_leaf_count(7, 4);
compare_new_with_leaf_count(8, 4);
compare_new_with_leaf_count(9, 5);
compare_new_with_leaf_count(10, 5);
compare_new_with_leaf_count(11, 5);
compare_new_with_leaf_count(12, 5);
compare_new_with_leaf_count(13, 5);
compare_new_with_leaf_count(14, 5);
compare_new_with_leaf_count(15, 5);
}
#[test]
fn depth() {
assert_eq!(get_depth(1), 0);
assert_eq!(get_depth(2), 1);
assert_eq!(get_depth(3), 1);
assert_eq!(get_depth(4), 2);
assert_eq!(get_depth(5), 2);
assert_eq!(get_depth(6), 2);
assert_eq!(get_depth(7), 2);
assert_eq!(get_depth(8), 3);
}
#[test]
fn with_0_leaves() {
let hasher = MerkleHasher::with_leaves(0);
assert_eq!(hasher.finish().unwrap(), Hash256::zero());
}
#[test]
#[should_panic]
fn too_many_leaves() {
compare_reference_with_len(2, 1);
}
#[test]
fn full_trees() {
compare_reference_with_len(1, 1);
compare_reference_with_len(2, 2);
compare_reference_with_len(4, 3);
compare_reference_with_len(8, 4);
compare_reference_with_len(16, 5);
compare_reference_with_len(32, 6);
compare_reference_with_len(64, 7);
compare_reference_with_len(128, 8);
compare_reference_with_len(256, 9);
compare_reference_with_len(256, 9);
compare_reference_with_len(8192, 14);
}
#[test]
fn incomplete_trees() {
compare_reference_with_len(0, 1);
compare_reference_with_len(0, 2);
compare_reference_with_len(1, 2);
for i in 0..=4 {
compare_reference_with_len(i, 3);
}
for i in 0..=7 {
compare_reference_with_len(i, 4);
}
for i in 0..=15 {
compare_reference_with_len(i, 5);
}
for i in 0..=32 {
compare_reference_with_len(i, 6);
}
for i in 0..=64 {
compare_reference_with_len(i, 7);
}
compare_reference_with_len(0, 14);
compare_reference_with_len(13, 14);
compare_reference_with_len(8191, 14);
}
#[test]
fn remaining_buffer() {
let a = {
let mut m = MerkleHasher::with_leaves(2);
m.write(&[1]).expect("should write");
m.finish().expect("should finish")
};
let b = {
let mut m = MerkleHasher::with_leaves(2);
let mut leaf = vec![1];
leaf.extend_from_slice(&[0; 31]);
m.write(&leaf).expect("should write");
m.write(&[0; 32]).expect("should write");
m.finish().expect("should finish")
};
assert_eq!(a, b, "should complete buffer");
}
}

View File

@@ -1,14 +1,12 @@
use super::BYTES_PER_CHUNK;
use eth2_hashing::{hash, hash32_concat, ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
/// The size of the cache that stores padding nodes for a given height.
///
/// Currently, we panic if we encounter a tree with a height larger than `MAX_TREE_DEPTH`.
pub const MAX_TREE_DEPTH: usize = ZERO_HASHES_MAX_INDEX;
use super::{get_zero_hash, Hash256, BYTES_PER_CHUNK};
use eth2_hashing::{hash, hash32_concat};
/// Merkleize `bytes` and return the root, optionally padding the tree out to `min_leaves` number of
/// leaves.
///
/// **Note**: This function is generally worse than using the `crate::merkle_root` which uses
/// `MerkleHasher`. We only keep this function around for reference testing.
///
/// First all nodes are extracted from `bytes` and then a padding node is added until the number of
/// leaf chunks is greater than or equal to `min_leaves`. Callers may set `min_leaves` to `0` if no
/// adding additional chunks should be added to the given `bytes`.
@@ -34,12 +32,12 @@ pub const MAX_TREE_DEPTH: usize = ZERO_HASHES_MAX_INDEX;
///
/// _Note: there are some minor memory overheads, including a handful of usizes and a list of
/// `MAX_TREE_DEPTH` hashes as `lazy_static` constants._
pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Vec<u8> {
pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Hash256 {
// If the bytes are just one chunk or less, pad to one chunk and return without hashing.
if bytes.len() <= BYTES_PER_CHUNK && min_leaves <= 1 {
let mut o = bytes.to_vec();
o.resize(BYTES_PER_CHUNK, 0);
return o;
return Hash256::from_slice(&o);
}
assert!(
@@ -157,7 +155,7 @@ pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Vec<u8> {
assert_eq!(root.len(), BYTES_PER_CHUNK, "Only one chunk should remain");
root
Hash256::from_slice(&root)
}
/// A helper struct for storing words of `BYTES_PER_CHUNK` size in a flat byte array.
@@ -212,15 +210,6 @@ impl ChunkStore {
}
}
/// Returns a cached padding node for a given height.
fn get_zero_hash(height: usize) -> &'static [u8] {
if height <= MAX_TREE_DEPTH {
&ZERO_HASHES[height]
} else {
panic!("Tree exceeds MAX_TREE_DEPTH of {}", MAX_TREE_DEPTH)
}
}
/// Returns the next even number following `n`. If `n` is even, `n` is returned.
fn next_even_number(n: usize) -> usize {
n + n % 2
@@ -229,9 +218,10 @@ fn next_even_number(n: usize) -> usize {
#[cfg(test)]
mod test {
use super::*;
use crate::ZERO_HASHES_MAX_INDEX;
pub fn reference_root(bytes: &[u8]) -> Vec<u8> {
crate::merkleize_standard(&bytes)[0..32].to_vec()
pub fn reference_root(bytes: &[u8]) -> Hash256 {
crate::merkleize_standard(&bytes)
}
macro_rules! common_tests {
@@ -288,10 +278,10 @@ mod test {
#[test]
fn max_tree_depth_min_nodes() {
let input = vec![0; 10 * BYTES_PER_CHUNK];
let min_nodes = 2usize.pow(MAX_TREE_DEPTH as u32);
let min_nodes = 2usize.pow(ZERO_HASHES_MAX_INDEX as u32);
assert_eq!(
merkleize_padded(&input, min_nodes),
get_zero_hash(MAX_TREE_DEPTH)
merkleize_padded(&input, min_nodes).as_bytes(),
get_zero_hash(ZERO_HASHES_MAX_INDEX)
);
}
};

View File

@@ -4,9 +4,10 @@ use eth2_hashing::hash;
/// Merkleizes bytes and returns the root, using a simple algorithm that does not optimize to avoid
/// processing or storing padding bytes.
///
/// The input `bytes` will be padded to ensure that the number of leaves is a power-of-two.
/// **Note**: This function is generally worse than using the `crate::merkle_root` which uses
/// `MerkleHasher`. We only keep this function around for reference testing.
///
/// It is likely a better choice to use [merkleize_padded](fn.merkleize_padded.html) instead.
/// The input `bytes` will be padded to ensure that the number of leaves is a power-of-two.
///
/// ## CPU Performance
///
@@ -17,12 +18,12 @@ use eth2_hashing::hash;
/// - Duplicates the input `bytes`.
/// - Stores all internal nodes, even if they are padding.
/// - Does not free up unused memory during operation.
pub fn merkleize_standard(bytes: &[u8]) -> Vec<u8> {
pub fn merkleize_standard(bytes: &[u8]) -> Hash256 {
// If the bytes are just one chunk (or less than one chunk) just return them.
if bytes.len() <= HASHSIZE {
let mut o = bytes.to_vec();
o.resize(HASHSIZE, 0);
return o;
return Hash256::from_slice(&o[0..HASHSIZE]);
}
let leaves = num_sanitized_leaves(bytes.len());
@@ -67,7 +68,7 @@ pub fn merkleize_standard(bytes: &[u8]) -> Vec<u8> {
o[j..j + HASHSIZE].copy_from_slice(&hash);
}
o
Hash256::from_slice(&o[0..HASHSIZE])
}
fn num_sanitized_leaves(num_bytes: usize) -> usize {