Refactor tree hashing (#861)

* Pre-allocated tree hash caches

* Add SmallVec to tree hash cache

* Avoid allocation for validator.pubkey

* Avoid iterator which seems to be doing heap alloc

* Add more smallvecs

* MOAR SMALLVEC

* Move non-test code to Hash256 tree hash

* Fix byte ordering error

* Add incomplete but working merkle stream impl

* Fix zero hash error

* Add zero hash fn

* Add MerkleStream comments

* Add smallvec, tidy

* Integrate into tree hash derive

* Update ssz_types tree hash

* Don't heap alloc for mix in length

* Add byte-level streaming to MerkleStream

* Avoid recursion in write method

* Update BLS to MerkleStream

* Fix some not-compiling tests

* Remove debug profiling

* Remove code duplication

* Move beacon state tree hash to new hasher

* Fix failing tests

* Update comments

* Add some fast-paths to tree_hash::merkle_root

* Remove unncessary test

* Rename MerkleStream -> MerkleHasher

* Rename new_with_leaf_count -> with_leaves

* Tidy

* Remove NonZeroUsize

* Remove todo

* Update smallvec
This commit is contained in:
Paul Hauner
2020-03-05 08:07:27 +11:00
committed by GitHub
parent 12999fb06c
commit 7f6ae4c2f5
43 changed files with 1076 additions and 292 deletions

View File

@@ -1,30 +1,80 @@
pub mod impls;
mod merkle_hasher;
mod merkleize_padded;
mod merkleize_standard;
pub use merkle_hasher::{Error, MerkleHasher};
pub use merkleize_padded::merkleize_padded;
pub use merkleize_standard::merkleize_standard;
use eth2_hashing::{Context, SHA256};
use eth2_hashing::{ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
pub const BYTES_PER_CHUNK: usize = 32;
pub const HASHSIZE: usize = 32;
pub const MERKLE_HASH_CHUNK: usize = 2 * BYTES_PER_CHUNK;
/// Alias to `merkleize_padded(&bytes, minimum_chunk_count)`
pub type Hash256 = ethereum_types::H256;
/// Convenience method for `MerkleHasher` which also provides some fast-paths for small trees.
///
/// If `minimum_chunk_count < bytes / BYTES_PER_CHUNK`, padding will be added for the difference
/// between the two.
pub fn merkle_root(bytes: &[u8], minimum_chunk_count: usize) -> Vec<u8> {
merkleize_padded(&bytes, minimum_chunk_count)
/// `minimum_leaf_count` will only be used if it is greater than or equal to the minimum number of leaves that can be created from `bytes`.
pub fn merkle_root(bytes: &[u8], minimum_leaf_count: usize) -> Hash256 {
let leaves = std::cmp::max(
(bytes.len() + (HASHSIZE - 1)) / HASHSIZE,
minimum_leaf_count,
);
if leaves == 0 {
// If there are no bytes then the hash is always zero.
Hash256::zero()
} else if leaves == 1 {
// If there is only one leaf, the hash is always those leaf bytes padded out to 32-bytes.
let mut hash = [0; HASHSIZE];
hash[0..bytes.len()].copy_from_slice(bytes);
Hash256::from_slice(&hash)
} else if leaves == 2 {
// If there are only two leaves (this is common with BLS pubkeys), we can avoid some
// overhead with `MerkleHasher` and just do a simple 3-node tree here.
let mut leaves = [0; HASHSIZE * 2];
leaves[0..bytes.len()].copy_from_slice(bytes);
let mut context = Context::new(&SHA256);
context.update(&leaves);
let digest = context.finish();
Hash256::from_slice(digest.as_ref())
} else {
// If there are 3 or more leaves, use `MerkleHasher`.
let mut hasher = MerkleHasher::with_leaves(leaves);
hasher
.write(bytes)
.expect("the number of leaves is adequate for the number of bytes");
hasher
.finish()
.expect("the number of leaves is adequate for the number of bytes")
}
}
/// Returns the node created by hashing `root` and `length`.
///
/// Used in `TreeHash` for inserting the length of a list above it's root.
pub fn mix_in_length(root: &[u8], length: usize) -> Vec<u8> {
let mut length_bytes = length.to_le_bytes().to_vec();
length_bytes.resize(BYTES_PER_CHUNK, 0);
pub fn mix_in_length(root: &Hash256, length: usize) -> Hash256 {
let usize_len = std::mem::size_of::<usize>();
eth2_hashing::hash32_concat(root, &length_bytes)[..].to_vec()
let mut length_bytes = [0; BYTES_PER_CHUNK];
length_bytes[0..usize_len].copy_from_slice(&length.to_le_bytes());
Hash256::from_slice(&eth2_hashing::hash32_concat(root.as_bytes(), &length_bytes)[..])
}
/// Returns a cached padding node for a given height.
fn get_zero_hash(height: usize) -> &'static [u8] {
if height <= ZERO_HASHES_MAX_INDEX {
&ZERO_HASHES[height]
} else {
panic!("Tree exceeds MAX_TREE_DEPTH of {}", ZERO_HASHES_MAX_INDEX)
}
}
#[derive(Debug, PartialEq, Clone)]
@@ -42,7 +92,7 @@ pub trait TreeHash {
fn tree_hash_packing_factor() -> usize;
fn tree_hash_root(&self) -> Vec<u8>;
fn tree_hash_root(&self) -> Hash256;
}
#[macro_export]
@@ -104,6 +154,9 @@ mod test {
eth2_hashing::hash(&preimage)
};
assert_eq!(mix_in_length(&[42; BYTES_PER_CHUNK], 42), hash);
assert_eq!(
mix_in_length(&Hash256::from_slice(&[42; BYTES_PER_CHUNK]), 42).as_bytes(),
&hash[..]
);
}
}