mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-14 18:32:42 +00:00
Refactor tree hashing (#861)
* Pre-allocated tree hash caches * Add SmallVec to tree hash cache * Avoid allocation for validator.pubkey * Avoid iterator which seems to be doing heap alloc * Add more smallvecs * MOAR SMALLVEC * Move non-test code to Hash256 tree hash * Fix byte ordering error * Add incomplete but working merkle stream impl * Fix zero hash error * Add zero hash fn * Add MerkleStream comments * Add smallvec, tidy * Integrate into tree hash derive * Update ssz_types tree hash * Don't heap alloc for mix in length * Add byte-level streaming to MerkleStream * Avoid recursion in write method * Update BLS to MerkleStream * Fix some not-compiling tests * Remove debug profiling * Remove code duplication * Move beacon state tree hash to new hasher * Fix failing tests * Update comments * Add some fast-paths to tree_hash::merkle_root * Remove unncessary test * Rename MerkleStream -> MerkleHasher * Rename new_with_leaf_count -> with_leaves * Tidy * Remove NonZeroUsize * Remove todo * Update smallvec
This commit is contained in:
@@ -1,6 +1,12 @@
|
||||
use super::*;
|
||||
use ethereum_types::{H256, U128, U256};
|
||||
|
||||
fn int_to_hash256(int: u64) -> Hash256 {
|
||||
let mut bytes = [0; HASHSIZE];
|
||||
bytes[0..8].copy_from_slice(&int.to_le_bytes());
|
||||
Hash256::from_slice(&bytes)
|
||||
}
|
||||
|
||||
macro_rules! impl_for_bitsize {
|
||||
($type: ident, $bit_size: expr) => {
|
||||
impl TreeHash for $type {
|
||||
@@ -17,8 +23,8 @@ macro_rules! impl_for_bitsize {
|
||||
}
|
||||
|
||||
#[allow(clippy::cast_lossless)]
|
||||
fn tree_hash_root(&self) -> Vec<u8> {
|
||||
int_to_bytes32(*self as u64)
|
||||
fn tree_hash_root(&self) -> Hash256 {
|
||||
int_to_hash256(*self as u64)
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -43,12 +49,13 @@ impl TreeHash for bool {
|
||||
u8::tree_hash_packing_factor()
|
||||
}
|
||||
|
||||
fn tree_hash_root(&self) -> Vec<u8> {
|
||||
int_to_bytes32(*self as u64)
|
||||
fn tree_hash_root(&self) -> Hash256 {
|
||||
int_to_hash256(*self as u64)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_for_u8_array {
|
||||
/// Only valid for byte types less than 32 bytes.
|
||||
macro_rules! impl_for_lt_32byte_u8_array {
|
||||
($len: expr) => {
|
||||
impl TreeHash for [u8; $len] {
|
||||
fn tree_hash_type() -> TreeHashType {
|
||||
@@ -63,15 +70,17 @@ macro_rules! impl_for_u8_array {
|
||||
unreachable!("bytesN should never be packed.")
|
||||
}
|
||||
|
||||
fn tree_hash_root(&self) -> Vec<u8> {
|
||||
merkle_root(&self[..], 0)
|
||||
fn tree_hash_root(&self) -> Hash256 {
|
||||
let mut result = [0; 32];
|
||||
result[0..$len].copy_from_slice(&self[..]);
|
||||
Hash256::from_slice(&result)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_for_u8_array!(4);
|
||||
impl_for_u8_array!(32);
|
||||
impl_for_lt_32byte_u8_array!(4);
|
||||
impl_for_lt_32byte_u8_array!(32);
|
||||
|
||||
impl TreeHash for U128 {
|
||||
fn tree_hash_type() -> TreeHashType {
|
||||
@@ -88,8 +97,10 @@ impl TreeHash for U128 {
|
||||
2
|
||||
}
|
||||
|
||||
fn tree_hash_root(&self) -> Vec<u8> {
|
||||
merkle_root(&self.tree_hash_packed_encoding(), 0)
|
||||
fn tree_hash_root(&self) -> Hash256 {
|
||||
let mut result = [0; HASHSIZE];
|
||||
self.to_little_endian(&mut result[0..16]);
|
||||
Hash256::from_slice(&result)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,8 +119,10 @@ impl TreeHash for U256 {
|
||||
1
|
||||
}
|
||||
|
||||
fn tree_hash_root(&self) -> Vec<u8> {
|
||||
merkle_root(&self.tree_hash_packed_encoding(), 0)
|
||||
fn tree_hash_root(&self) -> Hash256 {
|
||||
let mut result = [0; 32];
|
||||
self.to_little_endian(&mut result[..]);
|
||||
Hash256::from_slice(&result)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -126,18 +139,11 @@ impl TreeHash for H256 {
|
||||
1
|
||||
}
|
||||
|
||||
fn tree_hash_root(&self) -> Vec<u8> {
|
||||
merkle_root(&self.as_bytes().to_vec(), 0)
|
||||
fn tree_hash_root(&self) -> Hash256 {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `int` as little-endian bytes with a length of 32.
|
||||
fn int_to_bytes32(int: u64) -> Vec<u8> {
|
||||
let mut vec = int.to_le_bytes().to_vec();
|
||||
vec.resize(32, 0);
|
||||
vec
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
@@ -149,22 +155,22 @@ mod test {
|
||||
|
||||
let false_bytes: Vec<u8> = vec![0; 32];
|
||||
|
||||
assert_eq!(true.tree_hash_root(), true_bytes);
|
||||
assert_eq!(false.tree_hash_root(), false_bytes);
|
||||
assert_eq!(true.tree_hash_root().as_bytes(), true_bytes.as_slice());
|
||||
assert_eq!(false.tree_hash_root().as_bytes(), false_bytes.as_slice());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn int_to_bytes() {
|
||||
assert_eq!(&int_to_bytes32(0), &[0; 32]);
|
||||
assert_eq!(int_to_hash256(0).as_bytes(), &[0; 32]);
|
||||
assert_eq!(
|
||||
&int_to_bytes32(1),
|
||||
int_to_hash256(1).as_bytes(),
|
||||
&[
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
&int_to_bytes32(u64::max_value()),
|
||||
int_to_hash256(u64::max_value()).as_bytes(),
|
||||
&[
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
@@ -1,30 +1,80 @@
|
||||
pub mod impls;
|
||||
mod merkle_hasher;
|
||||
mod merkleize_padded;
|
||||
mod merkleize_standard;
|
||||
|
||||
pub use merkle_hasher::{Error, MerkleHasher};
|
||||
pub use merkleize_padded::merkleize_padded;
|
||||
pub use merkleize_standard::merkleize_standard;
|
||||
|
||||
use eth2_hashing::{Context, SHA256};
|
||||
use eth2_hashing::{ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
|
||||
|
||||
pub const BYTES_PER_CHUNK: usize = 32;
|
||||
pub const HASHSIZE: usize = 32;
|
||||
pub const MERKLE_HASH_CHUNK: usize = 2 * BYTES_PER_CHUNK;
|
||||
|
||||
/// Alias to `merkleize_padded(&bytes, minimum_chunk_count)`
|
||||
pub type Hash256 = ethereum_types::H256;
|
||||
|
||||
/// Convenience method for `MerkleHasher` which also provides some fast-paths for small trees.
|
||||
///
|
||||
/// If `minimum_chunk_count < bytes / BYTES_PER_CHUNK`, padding will be added for the difference
|
||||
/// between the two.
|
||||
pub fn merkle_root(bytes: &[u8], minimum_chunk_count: usize) -> Vec<u8> {
|
||||
merkleize_padded(&bytes, minimum_chunk_count)
|
||||
/// `minimum_leaf_count` will only be used if it is greater than or equal to the minimum number of leaves that can be created from `bytes`.
|
||||
pub fn merkle_root(bytes: &[u8], minimum_leaf_count: usize) -> Hash256 {
|
||||
let leaves = std::cmp::max(
|
||||
(bytes.len() + (HASHSIZE - 1)) / HASHSIZE,
|
||||
minimum_leaf_count,
|
||||
);
|
||||
|
||||
if leaves == 0 {
|
||||
// If there are no bytes then the hash is always zero.
|
||||
Hash256::zero()
|
||||
} else if leaves == 1 {
|
||||
// If there is only one leaf, the hash is always those leaf bytes padded out to 32-bytes.
|
||||
let mut hash = [0; HASHSIZE];
|
||||
hash[0..bytes.len()].copy_from_slice(bytes);
|
||||
Hash256::from_slice(&hash)
|
||||
} else if leaves == 2 {
|
||||
// If there are only two leaves (this is common with BLS pubkeys), we can avoid some
|
||||
// overhead with `MerkleHasher` and just do a simple 3-node tree here.
|
||||
let mut leaves = [0; HASHSIZE * 2];
|
||||
leaves[0..bytes.len()].copy_from_slice(bytes);
|
||||
|
||||
let mut context = Context::new(&SHA256);
|
||||
context.update(&leaves);
|
||||
let digest = context.finish();
|
||||
|
||||
Hash256::from_slice(digest.as_ref())
|
||||
} else {
|
||||
// If there are 3 or more leaves, use `MerkleHasher`.
|
||||
let mut hasher = MerkleHasher::with_leaves(leaves);
|
||||
hasher
|
||||
.write(bytes)
|
||||
.expect("the number of leaves is adequate for the number of bytes");
|
||||
hasher
|
||||
.finish()
|
||||
.expect("the number of leaves is adequate for the number of bytes")
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the node created by hashing `root` and `length`.
|
||||
///
|
||||
/// Used in `TreeHash` for inserting the length of a list above it's root.
|
||||
pub fn mix_in_length(root: &[u8], length: usize) -> Vec<u8> {
|
||||
let mut length_bytes = length.to_le_bytes().to_vec();
|
||||
length_bytes.resize(BYTES_PER_CHUNK, 0);
|
||||
pub fn mix_in_length(root: &Hash256, length: usize) -> Hash256 {
|
||||
let usize_len = std::mem::size_of::<usize>();
|
||||
|
||||
eth2_hashing::hash32_concat(root, &length_bytes)[..].to_vec()
|
||||
let mut length_bytes = [0; BYTES_PER_CHUNK];
|
||||
length_bytes[0..usize_len].copy_from_slice(&length.to_le_bytes());
|
||||
|
||||
Hash256::from_slice(ð2_hashing::hash32_concat(root.as_bytes(), &length_bytes)[..])
|
||||
}
|
||||
|
||||
/// Returns a cached padding node for a given height.
|
||||
fn get_zero_hash(height: usize) -> &'static [u8] {
|
||||
if height <= ZERO_HASHES_MAX_INDEX {
|
||||
&ZERO_HASHES[height]
|
||||
} else {
|
||||
panic!("Tree exceeds MAX_TREE_DEPTH of {}", ZERO_HASHES_MAX_INDEX)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
@@ -42,7 +92,7 @@ pub trait TreeHash {
|
||||
|
||||
fn tree_hash_packing_factor() -> usize;
|
||||
|
||||
fn tree_hash_root(&self) -> Vec<u8>;
|
||||
fn tree_hash_root(&self) -> Hash256;
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
@@ -104,6 +154,9 @@ mod test {
|
||||
eth2_hashing::hash(&preimage)
|
||||
};
|
||||
|
||||
assert_eq!(mix_in_length(&[42; BYTES_PER_CHUNK], 42), hash);
|
||||
assert_eq!(
|
||||
mix_in_length(&Hash256::from_slice(&[42; BYTES_PER_CHUNK]), 42).as_bytes(),
|
||||
&hash[..]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
575
eth2/utils/tree_hash/src/merkle_hasher.rs
Normal file
575
eth2/utils/tree_hash/src/merkle_hasher.rs
Normal file
@@ -0,0 +1,575 @@
|
||||
use crate::{get_zero_hash, Hash256, HASHSIZE};
|
||||
use eth2_hashing::{Context, Digest, SHA256};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::mem;
|
||||
|
||||
type SmallVec8<T> = SmallVec<[T; 8]>;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Error {
|
||||
/// The maximum number of leaves defined by the initialization `depth` has been exceed.
|
||||
MaximumLeavesExceeded { max_leaves: usize },
|
||||
}
|
||||
|
||||
/// Helper struct to store either a hash digest or a slice.
|
||||
///
|
||||
/// Should be used as a left or right value for some node.
|
||||
enum Preimage<'a> {
|
||||
Digest(Digest),
|
||||
Slice(&'a [u8]),
|
||||
}
|
||||
|
||||
impl<'a> Preimage<'a> {
|
||||
/// Returns a 32-byte slice.
|
||||
fn as_bytes(&self) -> &[u8] {
|
||||
match self {
|
||||
Preimage::Digest(digest) => digest.as_ref(),
|
||||
Preimage::Slice(slice) => slice,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A node that has had a left child supplied, but not a right child.
|
||||
struct HalfNode {
|
||||
/// The hasher context.
|
||||
context: Context,
|
||||
/// The tree id of the node. The root node has in id of `1` and ids increase moving down the
|
||||
/// tree from left to right.
|
||||
id: usize,
|
||||
}
|
||||
|
||||
impl HalfNode {
|
||||
/// Create a new half-node from the given `left` value.
|
||||
fn new(id: usize, left: Preimage) -> Self {
|
||||
let mut context = Context::new(&SHA256);
|
||||
context.update(left.as_bytes());
|
||||
|
||||
Self { context, id }
|
||||
}
|
||||
|
||||
/// Complete the half-node by providing a `right` value. Returns a digest of the left and right
|
||||
/// nodes.
|
||||
fn finish(mut self, right: Preimage) -> Digest {
|
||||
self.context.update(right.as_bytes());
|
||||
self.context.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides a Merkle-root hasher that allows for streaming bytes (i.e., providing any-length byte
|
||||
/// slices without need to separate into leaves). Efficiently handles cases where not all leaves
|
||||
/// have been provided by assuming all non-provided leaves are `[0; 32]` and pre-computing the
|
||||
/// zero-value hashes at all depths of the tree.
|
||||
///
|
||||
/// This algorithm aims to allocate as little memory as possible and it does this by "folding" up
|
||||
/// the tree has each leaf is provided. Consider this step-by-step functional diagram of hashing a
|
||||
/// tree with depth three:
|
||||
///
|
||||
/// ## Functional Diagram
|
||||
///
|
||||
/// Nodes that are `-` have not been defined and do not occupy memory. Nodes that are `L` are
|
||||
/// leaves that are provided but are not stored. Nodes that have integers (`1`, `2`) are stored in
|
||||
/// our struct. Finally, nodes that are `X` were stored, but are now removed.
|
||||
///
|
||||
/// ### Start
|
||||
///
|
||||
/// ```ignore
|
||||
/// -
|
||||
/// / \
|
||||
/// - -
|
||||
/// / \ / \
|
||||
/// - - - -
|
||||
/// ```
|
||||
///
|
||||
/// ### Provide first leaf
|
||||
///
|
||||
/// ```ignore
|
||||
/// -
|
||||
/// / \
|
||||
/// 2 -
|
||||
/// / \ / \
|
||||
/// L - - -
|
||||
/// ```
|
||||
///
|
||||
/// ### Provide second leaf
|
||||
///
|
||||
/// ```ignore
|
||||
/// 1
|
||||
/// / \
|
||||
/// X -
|
||||
/// / \ / \
|
||||
/// L L - -
|
||||
/// ```
|
||||
///
|
||||
/// ### Provide third leaf
|
||||
///
|
||||
/// ```ignore
|
||||
/// 1
|
||||
/// / \
|
||||
/// X 3
|
||||
/// / \ / \
|
||||
/// L L L -
|
||||
/// ```
|
||||
///
|
||||
/// ### Provide fourth and final leaf
|
||||
///
|
||||
/// ```ignore
|
||||
/// 1
|
||||
/// / \
|
||||
/// X X
|
||||
/// / \ / \
|
||||
/// L L L L
|
||||
/// ```
|
||||
///
|
||||
pub struct MerkleHasher {
|
||||
/// Stores the nodes that are half-complete and awaiting a right node.
|
||||
///
|
||||
/// A smallvec of size 8 means we can hash a tree with 256 leaves without allocating on the
|
||||
/// heap. Each half-node is 224 bytes, so this smallvec may store 1,792 bytes on the stack.
|
||||
half_nodes: SmallVec8<HalfNode>,
|
||||
/// The depth of the tree that will be produced.
|
||||
///
|
||||
/// Depth is counted top-down (i.e., the root node is at depth 0). A tree with 1 leaf has a
|
||||
/// depth of 1, a tree with 4 leaves has a depth of 3.
|
||||
depth: usize,
|
||||
/// The next leaf that we are expecting to process.
|
||||
next_leaf: usize,
|
||||
/// A buffer of bytes that are waiting to be written to a leaf.
|
||||
buffer: SmallVec<[u8; 32]>,
|
||||
/// Set to Some(root) when the root of the tree is known.
|
||||
root: Option<Hash256>,
|
||||
}
|
||||
|
||||
/// Returns the parent of node with id `i`.
|
||||
fn get_parent(i: usize) -> usize {
|
||||
i / 2
|
||||
}
|
||||
|
||||
/// Gets the depth of a node with an id of `i`.
|
||||
///
|
||||
/// It is a logic error to provide `i == 0`.
|
||||
///
|
||||
/// E.g., if `i` is 1, depth is 0. If `i` is is 1, depth is 1.
|
||||
fn get_depth(i: usize) -> usize {
|
||||
let total_bits = mem::size_of::<usize>() * 8;
|
||||
total_bits - i.leading_zeros() as usize - 1
|
||||
}
|
||||
|
||||
impl MerkleHasher {
|
||||
/// Instantiate a hasher for a tree with a given number of leaves.
|
||||
///
|
||||
/// `num_leaves` will be rounded to the next power of two. E.g., if `num_leaves == 6`, then the
|
||||
/// tree will _actually_ be able to accomodate 8 leaves and the resulting hasher is exactly the
|
||||
/// same as one that was instantiated with `Self::with_leaves(8)`.
|
||||
///
|
||||
/// ## Notes
|
||||
///
|
||||
/// If `num_leaves == 0`, a tree of depth 1 will be created. If no leaves are provided it will
|
||||
/// return a root of `[0; 32]`.
|
||||
pub fn with_leaves(num_leaves: usize) -> Self {
|
||||
let depth = get_depth(num_leaves.next_power_of_two()) + 1;
|
||||
Self::with_depth(depth)
|
||||
}
|
||||
|
||||
/// Instantiates a new, empty hasher for a tree with `depth` layers which will have capacity
|
||||
/// for `1 << (depth - 1)` leaf nodes.
|
||||
///
|
||||
/// It is not possible to grow the depth of the tree after instantiation.
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// Panics if `depth == 0`.
|
||||
fn with_depth(depth: usize) -> Self {
|
||||
assert!(depth > 0, "merkle tree cannot have a depth of zero");
|
||||
|
||||
Self {
|
||||
half_nodes: SmallVec::with_capacity(depth - 1),
|
||||
depth,
|
||||
next_leaf: 1 << (depth - 1),
|
||||
buffer: SmallVec::with_capacity(32),
|
||||
root: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Write some bytes to the hasher.
|
||||
///
|
||||
/// ## Errors
|
||||
///
|
||||
/// Returns an error if the given bytes would create a leaf that would exceed the maximum
|
||||
/// permissible number of leaves defined by the initialization `depth`. E.g., a tree of `depth
|
||||
/// == 2` can only accept 2 leaves. A tree of `depth == 14` can only accept 8,192 leaves.
|
||||
pub fn write(&mut self, bytes: &[u8]) -> Result<(), Error> {
|
||||
let mut ptr = 0;
|
||||
while ptr <= bytes.len() {
|
||||
let slice = &bytes[ptr..std::cmp::min(bytes.len(), ptr + HASHSIZE)];
|
||||
|
||||
if self.buffer.is_empty() && slice.len() == HASHSIZE {
|
||||
self.process_leaf(slice)?;
|
||||
ptr += HASHSIZE
|
||||
} else if self.buffer.len() + slice.len() < HASHSIZE {
|
||||
self.buffer.extend_from_slice(slice);
|
||||
ptr += HASHSIZE
|
||||
} else {
|
||||
let buf_len = self.buffer.len();
|
||||
let required = HASHSIZE - buf_len;
|
||||
|
||||
let mut leaf = [0; HASHSIZE];
|
||||
leaf[..buf_len].copy_from_slice(&self.buffer);
|
||||
leaf[buf_len..].copy_from_slice(&slice[0..required]);
|
||||
|
||||
self.process_leaf(&leaf)?;
|
||||
self.buffer = smallvec![];
|
||||
|
||||
ptr += required
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Process the next leaf in the tree.
|
||||
///
|
||||
/// ## Errors
|
||||
///
|
||||
/// Returns an error if the given leaf would exceed the maximum permissible number of leaves
|
||||
/// defined by the initialization `depth`. E.g., a tree of `depth == 2` can only accept 2
|
||||
/// leaves. A tree of `depth == 14` can only accept 8,192 leaves.
|
||||
fn process_leaf(&mut self, leaf: &[u8]) -> Result<(), Error> {
|
||||
assert_eq!(leaf.len(), HASHSIZE, "a leaf must be 32 bytes");
|
||||
|
||||
let max_leaves = 1 << (self.depth + 1);
|
||||
|
||||
if self.next_leaf > max_leaves {
|
||||
return Err(Error::MaximumLeavesExceeded { max_leaves });
|
||||
} else if self.next_leaf == 1 {
|
||||
// A tree of depth one has a root that is equal to the first given leaf.
|
||||
self.root = Some(Hash256::from_slice(leaf))
|
||||
} else if self.next_leaf % 2 == 0 {
|
||||
self.process_left_node(self.next_leaf, Preimage::Slice(leaf))
|
||||
} else {
|
||||
self.process_right_node(self.next_leaf, Preimage::Slice(leaf))
|
||||
}
|
||||
|
||||
self.next_leaf += 1;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the root of the Merkle tree.
|
||||
///
|
||||
/// If not all leaves have been provided, the tree will be efficiently completed under the
|
||||
/// assumption that all not-yet-provided leaves are equal to `[0; 32]`.
|
||||
///
|
||||
/// ## Errors
|
||||
///
|
||||
/// Returns an error if the bytes remaining in the buffer would create a leaf that would exceed
|
||||
/// the maximum permissible number of leaves defined by the initialization `depth`.
|
||||
pub fn finish(mut self) -> Result<Hash256, Error> {
|
||||
if !self.buffer.is_empty() {
|
||||
let mut leaf = [0; HASHSIZE];
|
||||
leaf[..self.buffer.len()].copy_from_slice(&self.buffer);
|
||||
self.process_leaf(&leaf)?
|
||||
}
|
||||
|
||||
// If the tree is incomplete, we must complete it by providing zero-hashes.
|
||||
loop {
|
||||
if let Some(root) = self.root {
|
||||
break Ok(root);
|
||||
} else {
|
||||
if let Some(node) = self.half_nodes.last() {
|
||||
let right_child = node.id * 2 + 1;
|
||||
self.process_right_node(right_child, self.zero_hash(right_child));
|
||||
} else if self.next_leaf == 1 {
|
||||
// The next_leaf can only be 1 if the tree has a depth of one. If have been no
|
||||
// leaves supplied, assume a root of zero.
|
||||
break Ok(Hash256::zero());
|
||||
} else {
|
||||
// The only scenario where there are (a) no half nodes and (b) a tree of depth
|
||||
// two or more is where no leaves have been supplied at all.
|
||||
//
|
||||
// Once we supply this first zero-hash leaf then all future operations will be
|
||||
// triggered via the `process_right_node` branch.
|
||||
self.process_left_node(self.next_leaf, self.zero_hash(self.next_leaf))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Process a node that will become the left-hand node of some parent. The supplied `id` is
|
||||
/// that of the node (not the parent). The `preimage` is the value of the node (i.e., if this
|
||||
/// is a leaf node it will be the value of that leaf).
|
||||
///
|
||||
/// In this scenario, the only option is to push a new half-node.
|
||||
fn process_left_node(&mut self, id: usize, preimage: Preimage) {
|
||||
self.half_nodes
|
||||
.push(HalfNode::new(get_parent(id), preimage))
|
||||
}
|
||||
|
||||
/// Process a node that will become the right-hand node of some parent. The supplied `id` is
|
||||
/// that of the node (not the parent). The `preimage` is the value of the node (i.e., if this
|
||||
/// is a leaf node it will be the value of that leaf).
|
||||
///
|
||||
/// This operation will always complete one node, then it will attempt to crawl up the tree and
|
||||
/// collapse and other viable nodes. For example, consider a tree of depth 3 (see diagram
|
||||
/// below). When providing the node with id `7`, the node with id `3` will be completed which
|
||||
/// will also provide the right-node for the `1` node. This function will complete both of
|
||||
/// those nodes and ultimately find the root of the tree.
|
||||
///
|
||||
/// ```ignore
|
||||
/// 1 <-- completed
|
||||
/// / \
|
||||
/// 2 3 <-- completed
|
||||
/// / \ / \
|
||||
/// 4 5 6 7 <-- supplied right node
|
||||
/// ```
|
||||
fn process_right_node(&mut self, id: usize, mut preimage: Preimage) {
|
||||
let mut parent = get_parent(id);
|
||||
|
||||
loop {
|
||||
match self.half_nodes.last() {
|
||||
Some(node) if node.id == parent => {
|
||||
preimage = Preimage::Digest(
|
||||
self.half_nodes
|
||||
.pop()
|
||||
.expect("if .last() is Some then .pop() must succeed")
|
||||
.finish(preimage),
|
||||
);
|
||||
if parent == 1 {
|
||||
self.root = Some(Hash256::from_slice(preimage.as_bytes()));
|
||||
break;
|
||||
} else {
|
||||
parent = get_parent(parent);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
self.half_nodes.push(HalfNode::new(parent, preimage));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a "zero hash" from a pre-computed set for the given node.
|
||||
///
|
||||
/// Note: this node is not always zero, instead it is the result of hashing up a tree where the
|
||||
/// leaves are all zeros. E.g., in a tree of depth 2, the `zero_hash` of a node at depth 1
|
||||
/// will be `[0; 32]`. However, the `zero_hash` for a node at depth 0 will be
|
||||
/// `hash(concat([0; 32], [0; 32])))`.
|
||||
fn zero_hash(&self, id: usize) -> Preimage<'static> {
|
||||
Preimage::Slice(get_zero_hash(self.depth - (get_depth(id) + 1)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::merkleize_padded;
|
||||
|
||||
/// This test is just to ensure that the stack size of the `Context` remains the same. We choose
|
||||
/// our smallvec size based upon this, so it's good to know if it suddenly changes in size.
|
||||
#[test]
|
||||
fn context_size() {
|
||||
assert_eq!(
|
||||
mem::size_of::<HalfNode>(),
|
||||
216 + 8,
|
||||
"Halfnode size should be as expected"
|
||||
);
|
||||
}
|
||||
|
||||
fn compare_with_reference(leaves: &[Hash256], depth: usize) {
|
||||
let reference_bytes = leaves
|
||||
.iter()
|
||||
.map(|hash| hash.as_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let reference_root = merkleize_padded(&reference_bytes, 1 << (depth - 1));
|
||||
|
||||
let merklizer_root_32_bytes = {
|
||||
let mut m = MerkleHasher::with_depth(depth);
|
||||
for leaf in leaves.iter() {
|
||||
m.write(leaf.as_bytes()).expect("should process leaf");
|
||||
}
|
||||
m.finish().expect("should finish")
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
reference_root, merklizer_root_32_bytes,
|
||||
"32 bytes should match reference root"
|
||||
);
|
||||
|
||||
let merklizer_root_individual_3_bytes = {
|
||||
let mut m = MerkleHasher::with_depth(depth);
|
||||
for bytes in reference_bytes.clone().chunks(3) {
|
||||
m.write(bytes).expect("should process byte");
|
||||
}
|
||||
m.finish().expect("should finish")
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
reference_root, merklizer_root_individual_3_bytes,
|
||||
"3 bytes should match reference root"
|
||||
);
|
||||
|
||||
let merklizer_root_individual_single_bytes = {
|
||||
let mut m = MerkleHasher::with_depth(depth);
|
||||
for byte in reference_bytes.iter() {
|
||||
m.write(&[*byte]).expect("should process byte");
|
||||
}
|
||||
m.finish().expect("should finish")
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
reference_root, merklizer_root_individual_single_bytes,
|
||||
"single bytes should match reference root"
|
||||
);
|
||||
}
|
||||
|
||||
/// A simple wrapper to compare MerkleHasher to the reference function by just giving a number
|
||||
/// of leaves and a depth.
|
||||
fn compare_reference_with_len(leaves: u64, depth: usize) {
|
||||
let leaves = (0..leaves)
|
||||
.map(|i| Hash256::from_low_u64_be(i))
|
||||
.collect::<Vec<_>>();
|
||||
compare_with_reference(&leaves, depth)
|
||||
}
|
||||
|
||||
/// Compares the `MerkleHasher::with_depth` and `MerkleHasher::with_leaves` generate consistent
|
||||
/// results.
|
||||
fn compare_new_with_leaf_count(num_leaves: u64, depth: usize) {
|
||||
let leaves = (0..num_leaves)
|
||||
.map(|i| Hash256::from_low_u64_be(i))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let from_depth = {
|
||||
let mut m = MerkleHasher::with_depth(depth);
|
||||
for leaf in leaves.iter() {
|
||||
m.write(leaf.as_bytes()).expect("should process leaf");
|
||||
}
|
||||
m.finish()
|
||||
};
|
||||
|
||||
let from_num_leaves = {
|
||||
let mut m = MerkleHasher::with_leaves(num_leaves as usize);
|
||||
for leaf in leaves.iter() {
|
||||
m.process_leaf(leaf.as_bytes())
|
||||
.expect("should process leaf");
|
||||
}
|
||||
m.finish()
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
from_depth, from_num_leaves,
|
||||
"hash generated by depth should match that from num leaves"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_leaves() {
|
||||
compare_new_with_leaf_count(1, 1);
|
||||
compare_new_with_leaf_count(2, 2);
|
||||
compare_new_with_leaf_count(3, 3);
|
||||
compare_new_with_leaf_count(4, 3);
|
||||
compare_new_with_leaf_count(5, 4);
|
||||
compare_new_with_leaf_count(6, 4);
|
||||
compare_new_with_leaf_count(7, 4);
|
||||
compare_new_with_leaf_count(8, 4);
|
||||
compare_new_with_leaf_count(9, 5);
|
||||
compare_new_with_leaf_count(10, 5);
|
||||
compare_new_with_leaf_count(11, 5);
|
||||
compare_new_with_leaf_count(12, 5);
|
||||
compare_new_with_leaf_count(13, 5);
|
||||
compare_new_with_leaf_count(14, 5);
|
||||
compare_new_with_leaf_count(15, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn depth() {
|
||||
assert_eq!(get_depth(1), 0);
|
||||
assert_eq!(get_depth(2), 1);
|
||||
assert_eq!(get_depth(3), 1);
|
||||
assert_eq!(get_depth(4), 2);
|
||||
assert_eq!(get_depth(5), 2);
|
||||
assert_eq!(get_depth(6), 2);
|
||||
assert_eq!(get_depth(7), 2);
|
||||
assert_eq!(get_depth(8), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_0_leaves() {
|
||||
let hasher = MerkleHasher::with_leaves(0);
|
||||
assert_eq!(hasher.finish().unwrap(), Hash256::zero());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn too_many_leaves() {
|
||||
compare_reference_with_len(2, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_trees() {
|
||||
compare_reference_with_len(1, 1);
|
||||
compare_reference_with_len(2, 2);
|
||||
compare_reference_with_len(4, 3);
|
||||
compare_reference_with_len(8, 4);
|
||||
compare_reference_with_len(16, 5);
|
||||
compare_reference_with_len(32, 6);
|
||||
compare_reference_with_len(64, 7);
|
||||
compare_reference_with_len(128, 8);
|
||||
compare_reference_with_len(256, 9);
|
||||
compare_reference_with_len(256, 9);
|
||||
compare_reference_with_len(8192, 14);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn incomplete_trees() {
|
||||
compare_reference_with_len(0, 1);
|
||||
|
||||
compare_reference_with_len(0, 2);
|
||||
compare_reference_with_len(1, 2);
|
||||
|
||||
for i in 0..=4 {
|
||||
compare_reference_with_len(i, 3);
|
||||
}
|
||||
|
||||
for i in 0..=7 {
|
||||
compare_reference_with_len(i, 4);
|
||||
}
|
||||
|
||||
for i in 0..=15 {
|
||||
compare_reference_with_len(i, 5);
|
||||
}
|
||||
|
||||
for i in 0..=32 {
|
||||
compare_reference_with_len(i, 6);
|
||||
}
|
||||
|
||||
for i in 0..=64 {
|
||||
compare_reference_with_len(i, 7);
|
||||
}
|
||||
|
||||
compare_reference_with_len(0, 14);
|
||||
compare_reference_with_len(13, 14);
|
||||
compare_reference_with_len(8191, 14);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remaining_buffer() {
|
||||
let a = {
|
||||
let mut m = MerkleHasher::with_leaves(2);
|
||||
m.write(&[1]).expect("should write");
|
||||
m.finish().expect("should finish")
|
||||
};
|
||||
|
||||
let b = {
|
||||
let mut m = MerkleHasher::with_leaves(2);
|
||||
let mut leaf = vec![1];
|
||||
leaf.extend_from_slice(&[0; 31]);
|
||||
m.write(&leaf).expect("should write");
|
||||
m.write(&[0; 32]).expect("should write");
|
||||
m.finish().expect("should finish")
|
||||
};
|
||||
|
||||
assert_eq!(a, b, "should complete buffer");
|
||||
}
|
||||
}
|
||||
@@ -1,14 +1,12 @@
|
||||
use super::BYTES_PER_CHUNK;
|
||||
use eth2_hashing::{hash, hash32_concat, ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
|
||||
|
||||
/// The size of the cache that stores padding nodes for a given height.
|
||||
///
|
||||
/// Currently, we panic if we encounter a tree with a height larger than `MAX_TREE_DEPTH`.
|
||||
pub const MAX_TREE_DEPTH: usize = ZERO_HASHES_MAX_INDEX;
|
||||
use super::{get_zero_hash, Hash256, BYTES_PER_CHUNK};
|
||||
use eth2_hashing::{hash, hash32_concat};
|
||||
|
||||
/// Merkleize `bytes` and return the root, optionally padding the tree out to `min_leaves` number of
|
||||
/// leaves.
|
||||
///
|
||||
/// **Note**: This function is generally worse than using the `crate::merkle_root` which uses
|
||||
/// `MerkleHasher`. We only keep this function around for reference testing.
|
||||
///
|
||||
/// First all nodes are extracted from `bytes` and then a padding node is added until the number of
|
||||
/// leaf chunks is greater than or equal to `min_leaves`. Callers may set `min_leaves` to `0` if no
|
||||
/// adding additional chunks should be added to the given `bytes`.
|
||||
@@ -34,12 +32,12 @@ pub const MAX_TREE_DEPTH: usize = ZERO_HASHES_MAX_INDEX;
|
||||
///
|
||||
/// _Note: there are some minor memory overheads, including a handful of usizes and a list of
|
||||
/// `MAX_TREE_DEPTH` hashes as `lazy_static` constants._
|
||||
pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Vec<u8> {
|
||||
pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Hash256 {
|
||||
// If the bytes are just one chunk or less, pad to one chunk and return without hashing.
|
||||
if bytes.len() <= BYTES_PER_CHUNK && min_leaves <= 1 {
|
||||
let mut o = bytes.to_vec();
|
||||
o.resize(BYTES_PER_CHUNK, 0);
|
||||
return o;
|
||||
return Hash256::from_slice(&o);
|
||||
}
|
||||
|
||||
assert!(
|
||||
@@ -157,7 +155,7 @@ pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Vec<u8> {
|
||||
|
||||
assert_eq!(root.len(), BYTES_PER_CHUNK, "Only one chunk should remain");
|
||||
|
||||
root
|
||||
Hash256::from_slice(&root)
|
||||
}
|
||||
|
||||
/// A helper struct for storing words of `BYTES_PER_CHUNK` size in a flat byte array.
|
||||
@@ -212,15 +210,6 @@ impl ChunkStore {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a cached padding node for a given height.
|
||||
fn get_zero_hash(height: usize) -> &'static [u8] {
|
||||
if height <= MAX_TREE_DEPTH {
|
||||
&ZERO_HASHES[height]
|
||||
} else {
|
||||
panic!("Tree exceeds MAX_TREE_DEPTH of {}", MAX_TREE_DEPTH)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the next even number following `n`. If `n` is even, `n` is returned.
|
||||
fn next_even_number(n: usize) -> usize {
|
||||
n + n % 2
|
||||
@@ -229,9 +218,10 @@ fn next_even_number(n: usize) -> usize {
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::ZERO_HASHES_MAX_INDEX;
|
||||
|
||||
pub fn reference_root(bytes: &[u8]) -> Vec<u8> {
|
||||
crate::merkleize_standard(&bytes)[0..32].to_vec()
|
||||
pub fn reference_root(bytes: &[u8]) -> Hash256 {
|
||||
crate::merkleize_standard(&bytes)
|
||||
}
|
||||
|
||||
macro_rules! common_tests {
|
||||
@@ -288,10 +278,10 @@ mod test {
|
||||
#[test]
|
||||
fn max_tree_depth_min_nodes() {
|
||||
let input = vec![0; 10 * BYTES_PER_CHUNK];
|
||||
let min_nodes = 2usize.pow(MAX_TREE_DEPTH as u32);
|
||||
let min_nodes = 2usize.pow(ZERO_HASHES_MAX_INDEX as u32);
|
||||
assert_eq!(
|
||||
merkleize_padded(&input, min_nodes),
|
||||
get_zero_hash(MAX_TREE_DEPTH)
|
||||
merkleize_padded(&input, min_nodes).as_bytes(),
|
||||
get_zero_hash(ZERO_HASHES_MAX_INDEX)
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -4,9 +4,10 @@ use eth2_hashing::hash;
|
||||
/// Merkleizes bytes and returns the root, using a simple algorithm that does not optimize to avoid
|
||||
/// processing or storing padding bytes.
|
||||
///
|
||||
/// The input `bytes` will be padded to ensure that the number of leaves is a power-of-two.
|
||||
/// **Note**: This function is generally worse than using the `crate::merkle_root` which uses
|
||||
/// `MerkleHasher`. We only keep this function around for reference testing.
|
||||
///
|
||||
/// It is likely a better choice to use [merkleize_padded](fn.merkleize_padded.html) instead.
|
||||
/// The input `bytes` will be padded to ensure that the number of leaves is a power-of-two.
|
||||
///
|
||||
/// ## CPU Performance
|
||||
///
|
||||
@@ -17,12 +18,12 @@ use eth2_hashing::hash;
|
||||
/// - Duplicates the input `bytes`.
|
||||
/// - Stores all internal nodes, even if they are padding.
|
||||
/// - Does not free up unused memory during operation.
|
||||
pub fn merkleize_standard(bytes: &[u8]) -> Vec<u8> {
|
||||
pub fn merkleize_standard(bytes: &[u8]) -> Hash256 {
|
||||
// If the bytes are just one chunk (or less than one chunk) just return them.
|
||||
if bytes.len() <= HASHSIZE {
|
||||
let mut o = bytes.to_vec();
|
||||
o.resize(HASHSIZE, 0);
|
||||
return o;
|
||||
return Hash256::from_slice(&o[0..HASHSIZE]);
|
||||
}
|
||||
|
||||
let leaves = num_sanitized_leaves(bytes.len());
|
||||
@@ -67,7 +68,7 @@ pub fn merkleize_standard(bytes: &[u8]) -> Vec<u8> {
|
||||
o[j..j + HASHSIZE].copy_from_slice(&hash);
|
||||
}
|
||||
|
||||
o
|
||||
Hash256::from_slice(&o[0..HASHSIZE])
|
||||
}
|
||||
|
||||
fn num_sanitized_leaves(num_bytes: usize) -> usize {
|
||||
|
||||
Reference in New Issue
Block a user