Implement tree hash caching (#584)

* Implement basic tree hash caching

* Use spaces to indent top-level Cargo.toml

* Optimize BLS tree hash by hashing bytes directly

* Implement tree hash caching for validator registry

* Persist BeaconState tree hash cache to disk

* Address Paul's review comments
This commit is contained in:
Michael Sproul
2019-11-05 15:46:52 +11:00
committed by GitHub
parent 4ef66a544a
commit c1a2238f1a
38 changed files with 1112 additions and 248 deletions

View File

@@ -1,19 +0,0 @@
// This build script is symlinked from each project that requires BLS's "fake crypto",
// so that the `fake_crypto` feature of every sub-crate can be turned on by running
// with FAKE_CRYPTO=1 from the top-level workspace.
// At some point in the future it might be possible to do:
// $ cargo test --all --release --features fake_crypto
// but at the present time this doesn't work.
// Related: https://github.com/rust-lang/cargo/issues/5364
fn main() {
if let Ok(fake_crypto) = std::env::var("FAKE_CRYPTO") {
if fake_crypto == "1" {
println!("cargo:rustc-cfg=feature=\"fake_crypto\"");
println!("cargo:rerun-if-env-changed=FAKE_CRYPTO");
println!(
"cargo:warning=[{}]: Compiled with fake BLS cryptography. DO NOT USE, TESTING ONLY",
std::env::var("CARGO_PKG_NAME").unwrap()
);
}
}
}

View File

@@ -155,7 +155,7 @@ impl_ssz!(
"AggregateSignature"
);
impl_tree_hash!(AggregateSignature, U96);
impl_tree_hash!(AggregateSignature, BLS_AGG_SIG_BYTE_SIZE);
impl Serialize for AggregateSignature {
/// Serde serialization is compliant the Ethereum YAML test format.

View File

@@ -93,7 +93,7 @@ impl_ssz!(
"FakeAggregateSignature"
);
impl_tree_hash!(FakeAggregateSignature, U96);
impl_tree_hash!(FakeAggregateSignature, BLS_AGG_SIG_BYTE_SIZE);
impl Serialize for FakeAggregateSignature {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>

View File

@@ -102,7 +102,7 @@ impl default::Default for FakePublicKey {
impl_ssz!(FakePublicKey, BLS_PUBLIC_KEY_BYTE_SIZE, "FakePublicKey");
impl_tree_hash!(FakePublicKey, U48);
impl_tree_hash!(FakePublicKey, BLS_PUBLIC_KEY_BYTE_SIZE);
impl Serialize for FakePublicKey {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>

View File

@@ -91,7 +91,7 @@ impl FakeSignature {
impl_ssz!(FakeSignature, BLS_SIG_BYTE_SIZE, "FakeSignature");
impl_tree_hash!(FakeSignature, U96);
impl_tree_hash!(FakeSignature, BLS_SIG_BYTE_SIZE);
impl Serialize for FakeSignature {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>

View File

@@ -42,7 +42,7 @@ macro_rules! impl_ssz {
}
macro_rules! impl_tree_hash {
($type: ty, $byte_size: ident) => {
($type: ty, $byte_size: expr) => {
impl tree_hash::TreeHash for $type {
fn tree_hash_type() -> tree_hash::TreeHashType {
tree_hash::TreeHashType::Vector
@@ -57,16 +57,19 @@ macro_rules! impl_tree_hash {
}
fn tree_hash_root(&self) -> Vec<u8> {
let vector: ssz_types::FixedVector<u8, ssz_types::typenum::$byte_size> =
ssz_types::FixedVector::from(self.as_ssz_bytes());
vector.tree_hash_root()
// We could use the tree hash implementation for `FixedVec<u8, $byte_size>`,
// but benchmarks have show that to be at least 15% slower because of the
// unnecessary copying and allocation (one Vec per byte)
let values_per_chunk = tree_hash::BYTES_PER_CHUNK;
let minimum_chunk_count = ($byte_size + values_per_chunk - 1) / values_per_chunk;
tree_hash::merkle_root(&self.as_ssz_bytes(), minimum_chunk_count)
}
}
};
}
macro_rules! bytes_struct {
($name: ident, $type: ty, $byte_size: expr, $small_name: expr, $ssz_type_size: ident,
($name: ident, $type: ty, $byte_size: expr, $small_name: expr,
$type_str: expr, $byte_size_str: expr) => {
#[doc = "Stores `"]
#[doc = $byte_size_str]
@@ -82,9 +85,9 @@ macro_rules! bytes_struct {
#[derive(Clone)]
pub struct $name([u8; $byte_size]);
};
($name: ident, $type: ty, $byte_size: expr, $small_name: expr, $ssz_type_size: ident) => {
bytes_struct!($name, $type, $byte_size, $small_name, $ssz_type_size, stringify!($type),
stringify!($byte_size));
($name: ident, $type: ty, $byte_size: expr, $small_name: expr) => {
bytes_struct!($name, $type, $byte_size, $small_name, stringify!($type),
stringify!($byte_size));
impl $name {
pub fn from_bytes(bytes: &[u8]) -> Result<Self, ssz::DecodeError> {
@@ -144,7 +147,7 @@ macro_rules! bytes_struct {
impl_ssz!($name, $byte_size, "$type");
impl_tree_hash!($name, $ssz_type_size);
impl_tree_hash!($name, $byte_size);
impl serde::ser::Serialize for $name {
/// Serde serialization is compliant the Ethereum YAML test format.

View File

@@ -94,7 +94,7 @@ impl default::Default for PublicKey {
impl_ssz!(PublicKey, BLS_PUBLIC_KEY_BYTE_SIZE, "PublicKey");
impl_tree_hash!(PublicKey, U48);
impl_tree_hash!(PublicKey, BLS_PUBLIC_KEY_BYTE_SIZE);
impl Serialize for PublicKey {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>

View File

@@ -6,8 +6,7 @@ bytes_struct!(
PublicKeyBytes,
PublicKey,
BLS_PUBLIC_KEY_BYTE_SIZE,
"public key",
U48
"public key"
);
#[cfg(test)]

View File

@@ -49,7 +49,7 @@ impl SecretKey {
impl_ssz!(SecretKey, BLS_SECRET_KEY_BYTE_SIZE, "SecretKey");
impl_tree_hash!(SecretKey, U48);
impl_tree_hash!(SecretKey, BLS_SECRET_KEY_BYTE_SIZE);
impl Serialize for SecretKey {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>

View File

@@ -108,7 +108,7 @@ impl Signature {
impl_ssz!(Signature, BLS_SIG_BYTE_SIZE, "Signature");
impl_tree_hash!(Signature, U96);
impl_tree_hash!(Signature, BLS_SIG_BYTE_SIZE);
impl Serialize for Signature {
/// Serde serialization is compliant the Ethereum YAML test format.

View File

@@ -2,13 +2,7 @@ use ssz::{Decode, DecodeError, Encode};
use super::{Signature, BLS_SIG_BYTE_SIZE};
bytes_struct!(
SignatureBytes,
Signature,
BLS_SIG_BYTE_SIZE,
"signature",
U96
);
bytes_struct!(SignatureBytes, Signature, BLS_SIG_BYTE_SIZE, "signature");
#[cfg(test)]
mod tests {

View File

@@ -0,0 +1,17 @@
[package]
name = "cached_tree_hash"
version = "0.1.0"
authors = ["Michael Sproul <michael@sigmaprime.io>"]
edition = "2018"
[dependencies]
ethereum-types = "0.8"
eth2_ssz_types = { path = "../ssz_types" }
eth2_hashing = "0.1"
eth2_ssz_derive = "0.1.0"
eth2_ssz = "0.1.2"
tree_hash = "0.1"
[dev-dependencies]
quickcheck = "0.9"
quickcheck_macros = "0.8"

View File

@@ -0,0 +1,137 @@
use crate::{Error, Hash256};
use eth2_hashing::{hash_concat, ZERO_HASHES};
use ssz_derive::{Decode, Encode};
use tree_hash::BYTES_PER_CHUNK;
/// Sparse Merkle tree suitable for tree hashing vectors and lists.
#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
pub struct TreeHashCache {
/// Depth is such that the tree has a capacity for 2^depth leaves
depth: usize,
/// Sparse layers.
///
/// The leaves are contained in `self.layers[self.depth]`, and each other layer `i`
/// contains the parents of the nodes in layer `i + 1`.
layers: Vec<Vec<Hash256>>,
}
impl TreeHashCache {
/// Create a new cache with the given `depth`, but no actual content.
pub fn new(depth: usize) -> Self {
TreeHashCache {
depth,
layers: vec![vec![]; depth + 1],
}
}
/// Compute the updated Merkle root for the given `leaves`.
pub fn recalculate_merkle_root(
&mut self,
leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
) -> Result<Hash256, Error> {
let dirty_indices = self.update_leaves(leaves)?;
self.update_merkle_root(dirty_indices)
}
/// Phase 1 of the algorithm: compute the indices of all dirty leaves.
pub fn update_leaves(
&mut self,
mut leaves: impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator,
) -> Result<Vec<usize>, Error> {
let new_leaf_count = leaves.len();
if new_leaf_count < self.leaves().len() {
return Err(Error::CannotShrink);
} else if new_leaf_count > 2usize.pow(self.depth as u32) {
return Err(Error::TooManyLeaves);
}
// Update the existing leaves
let mut dirty = self
.leaves()
.iter_mut()
.enumerate()
.zip(&mut leaves)
.flat_map(|((i, leaf), new_leaf)| {
if leaf.as_bytes() != new_leaf {
leaf.assign_from_slice(&new_leaf);
Some(i)
} else {
None
}
})
.collect::<Vec<_>>();
// Push the rest of the new leaves (if any)
dirty.extend(self.leaves().len()..new_leaf_count);
self.leaves()
.extend(leaves.map(|l| Hash256::from_slice(&l)));
Ok(dirty)
}
/// Phase 2: propagate changes upwards from the leaves of the tree, and compute the root.
///
/// Returns an error if `dirty_indices` is inconsistent with the cache.
pub fn update_merkle_root(&mut self, mut dirty_indices: Vec<usize>) -> Result<Hash256, Error> {
if dirty_indices.is_empty() {
return Ok(self.root());
}
let mut depth = self.depth;
while depth > 0 {
let new_dirty_indices = lift_dirty(&dirty_indices);
for &idx in &new_dirty_indices {
let left_idx = 2 * idx;
let right_idx = left_idx + 1;
let left = self.layers[depth][left_idx];
let right = self.layers[depth]
.get(right_idx)
.copied()
.unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth - depth]));
let new_hash = hash_concat(left.as_bytes(), right.as_bytes());
match self.layers[depth - 1].get_mut(idx) {
Some(hash) => {
hash.assign_from_slice(&new_hash);
}
None => {
// Parent layer should already contain nodes for all non-dirty indices
if idx != self.layers[depth - 1].len() {
return Err(Error::CacheInconsistent);
}
self.layers[depth - 1].push(Hash256::from_slice(&new_hash));
}
}
}
dirty_indices = new_dirty_indices;
depth -= 1;
}
Ok(self.root())
}
/// Get the root of this cache, without doing any updates/computation.
pub fn root(&self) -> Hash256 {
self.layers[0]
.get(0)
.copied()
.unwrap_or_else(|| Hash256::from_slice(&ZERO_HASHES[self.depth]))
}
pub fn leaves(&mut self) -> &mut Vec<Hash256> {
&mut self.layers[self.depth]
}
}
/// Compute the dirty indices for one layer up.
fn lift_dirty(dirty_indices: &[usize]) -> Vec<usize> {
let mut new_dirty = dirty_indices.iter().map(|i| *i / 2).collect::<Vec<_>>();
new_dirty.dedup();
new_dirty
}

View File

@@ -0,0 +1,99 @@
use crate::{CachedTreeHash, Error, Hash256, TreeHashCache};
use ssz_types::{typenum::Unsigned, FixedVector, VariableList};
use std::mem::size_of;
use tree_hash::{mix_in_length, BYTES_PER_CHUNK};
/// Compute ceil(log(n))
///
/// Smallest number of bits d so that n <= 2^d
pub fn int_log(n: usize) -> usize {
match n.checked_next_power_of_two() {
Some(x) => x.trailing_zeros() as usize,
None => 8 * std::mem::size_of::<usize>(),
}
}
pub fn hash256_iter<'a>(
values: &'a [Hash256],
) -> impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator + 'a {
values.iter().copied().map(Hash256::to_fixed_bytes)
}
pub fn u64_iter<'a>(
values: &'a [u64],
) -> impl Iterator<Item = [u8; BYTES_PER_CHUNK]> + ExactSizeIterator + 'a {
let type_size = size_of::<u64>();
let vals_per_chunk = BYTES_PER_CHUNK / type_size;
values.chunks(vals_per_chunk).map(move |xs| {
xs.iter().map(|x| x.to_le_bytes()).enumerate().fold(
[0; BYTES_PER_CHUNK],
|mut chunk, (i, x_bytes)| {
chunk[i * type_size..(i + 1) * type_size].copy_from_slice(&x_bytes);
chunk
},
)
})
}
impl<N: Unsigned> CachedTreeHash<TreeHashCache> for FixedVector<Hash256, N> {
fn new_tree_hash_cache() -> TreeHashCache {
TreeHashCache::new(int_log(N::to_usize()))
}
fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
cache.recalculate_merkle_root(hash256_iter(&self))
}
}
impl<N: Unsigned> CachedTreeHash<TreeHashCache> for FixedVector<u64, N> {
fn new_tree_hash_cache() -> TreeHashCache {
let vals_per_chunk = BYTES_PER_CHUNK / size_of::<u64>();
TreeHashCache::new(int_log(N::to_usize() / vals_per_chunk))
}
fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
cache.recalculate_merkle_root(u64_iter(&self))
}
}
impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<Hash256, N> {
fn new_tree_hash_cache() -> TreeHashCache {
TreeHashCache::new(int_log(N::to_usize()))
}
fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
Ok(Hash256::from_slice(&mix_in_length(
cache
.recalculate_merkle_root(hash256_iter(&self))?
.as_bytes(),
self.len(),
)))
}
}
impl<N: Unsigned> CachedTreeHash<TreeHashCache> for VariableList<u64, N> {
fn new_tree_hash_cache() -> TreeHashCache {
let vals_per_chunk = BYTES_PER_CHUNK / size_of::<u64>();
TreeHashCache::new(int_log(N::to_usize() / vals_per_chunk))
}
fn recalculate_tree_hash_root(&self, cache: &mut TreeHashCache) -> Result<Hash256, Error> {
Ok(Hash256::from_slice(&mix_in_length(
cache.recalculate_merkle_root(u64_iter(&self))?.as_bytes(),
self.len(),
)))
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_int_log() {
for i in 0..63 {
assert_eq!(int_log(2usize.pow(i)), i as usize);
}
assert_eq!(int_log(10), 4);
}
}

View File

@@ -0,0 +1,31 @@
mod cache;
mod impls;
mod multi_cache;
#[cfg(test)]
mod test;
pub use crate::cache::TreeHashCache;
pub use crate::impls::int_log;
pub use crate::multi_cache::MultiTreeHashCache;
use ethereum_types::H256 as Hash256;
use tree_hash::TreeHash;
#[derive(Debug, PartialEq)]
pub enum Error {
/// Attempting to provide more than 2^depth leaves to a Merkle tree is disallowed.
TooManyLeaves,
/// Shrinking a Merkle tree cache by providing it with less leaves than it currently has is
/// disallowed (for simplicity).
CannotShrink,
/// Cache is inconsistent with the list of dirty indices provided.
CacheInconsistent,
}
/// Trait for types which can make use of a cache to accelerate calculation of their tree hash root.
pub trait CachedTreeHash<Cache>: TreeHash {
/// Create a new cache appropriate for use with values of this type.
fn new_tree_hash_cache() -> Cache;
/// Update the cache and use it to compute the tree hash root for `self`.
fn recalculate_tree_hash_root(&self, cache: &mut Cache) -> Result<Hash256, Error>;
}

View File

@@ -0,0 +1,62 @@
use crate::{int_log, CachedTreeHash, Error, Hash256, TreeHashCache};
use ssz_derive::{Decode, Encode};
use ssz_types::{typenum::Unsigned, VariableList};
use tree_hash::mix_in_length;
/// Multi-level tree hash cache.
///
/// Suitable for lists/vectors/containers holding values which themselves have caches.
///
/// Note: this cache could be made composable by replacing the hardcoded `Vec<TreeHashCache>` with
/// `Vec<C>`, allowing arbitrary nesting, but for now we stick to 2-level nesting because that's all
/// we need.
#[derive(Debug, PartialEq, Clone, Default, Encode, Decode)]
pub struct MultiTreeHashCache {
list_cache: TreeHashCache,
value_caches: Vec<TreeHashCache>,
}
impl<T, N> CachedTreeHash<MultiTreeHashCache> for VariableList<T, N>
where
T: CachedTreeHash<TreeHashCache>,
N: Unsigned,
{
fn new_tree_hash_cache() -> MultiTreeHashCache {
MultiTreeHashCache {
list_cache: TreeHashCache::new(int_log(N::to_usize())),
value_caches: vec![],
}
}
fn recalculate_tree_hash_root(&self, cache: &mut MultiTreeHashCache) -> Result<Hash256, Error> {
if self.len() < cache.value_caches.len() {
return Err(Error::CannotShrink);
}
// Resize the value caches to the size of the list.
cache
.value_caches
.resize(self.len(), T::new_tree_hash_cache());
// Update all individual value caches.
self.iter()
.zip(cache.value_caches.iter_mut())
.try_for_each(|(value, cache)| value.recalculate_tree_hash_root(cache).map(|_| ()))?;
// Pipe the value roots into the list cache, then mix in the length.
// Note: it's possible to avoid this 2nd iteration (or an allocation) by using
// `itertools::process_results`, but it requires removing the `ExactSizeIterator`
// bound from `recalculate_merkle_root`, and only saves about 5% in benchmarks.
let list_root = cache.list_cache.recalculate_merkle_root(
cache
.value_caches
.iter()
.map(|value_cache| value_cache.root().to_fixed_bytes()),
)?;
Ok(Hash256::from_slice(&mix_in_length(
list_root.as_bytes(),
self.len(),
)))
}
}

View File

@@ -0,0 +1,147 @@
use crate::impls::hash256_iter;
use crate::{CachedTreeHash, Error, Hash256, TreeHashCache};
use eth2_hashing::ZERO_HASHES;
use quickcheck_macros::quickcheck;
use ssz_types::{
typenum::{Unsigned, U16, U255, U256, U257},
FixedVector, VariableList,
};
use tree_hash::TreeHash;
fn int_hashes(start: u64, end: u64) -> Vec<Hash256> {
(start..end).map(Hash256::from_low_u64_le).collect()
}
type List16 = VariableList<Hash256, U16>;
type Vector16 = FixedVector<Hash256, U16>;
type Vector16u64 = FixedVector<u64, U16>;
#[test]
fn max_leaves() {
let depth = 4;
let max_len = 2u64.pow(depth as u32);
let mut cache = TreeHashCache::new(depth);
assert!(cache
.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len - 1)))
.is_ok());
assert!(cache
.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len)))
.is_ok());
assert_eq!(
cache.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len + 1))),
Err(Error::TooManyLeaves)
);
assert_eq!(
cache.recalculate_merkle_root(hash256_iter(&int_hashes(0, max_len * 2))),
Err(Error::TooManyLeaves)
);
}
#[test]
fn cannot_shrink() {
let init_len = 12;
let list1 = List16::new(int_hashes(0, init_len)).unwrap();
let list2 = List16::new(int_hashes(0, init_len - 1)).unwrap();
let mut cache = List16::new_tree_hash_cache();
assert!(list1.recalculate_tree_hash_root(&mut cache).is_ok());
assert_eq!(
list2.recalculate_tree_hash_root(&mut cache),
Err(Error::CannotShrink)
);
}
#[test]
fn empty_leaves() {
let depth = 20;
let mut cache = TreeHashCache::new(depth);
assert_eq!(
cache
.recalculate_merkle_root(vec![].into_iter())
.unwrap()
.as_bytes(),
&ZERO_HASHES[depth][..]
);
}
#[test]
fn fixed_vector_hash256() {
let len = 16;
let vec = Vector16::new(int_hashes(0, len)).unwrap();
let mut cache = Vector16::new_tree_hash_cache();
assert_eq!(
Hash256::from_slice(&vec.tree_hash_root()),
vec.recalculate_tree_hash_root(&mut cache).unwrap()
);
}
#[test]
fn fixed_vector_u64() {
let len = 16;
let vec = Vector16u64::new((0..len).collect()).unwrap();
let mut cache = Vector16u64::new_tree_hash_cache();
assert_eq!(
Hash256::from_slice(&vec.tree_hash_root()),
vec.recalculate_tree_hash_root(&mut cache).unwrap()
);
}
#[test]
fn variable_list_hash256() {
let len = 13;
let list = List16::new(int_hashes(0, len)).unwrap();
let mut cache = List16::new_tree_hash_cache();
assert_eq!(
Hash256::from_slice(&list.tree_hash_root()),
list.recalculate_tree_hash_root(&mut cache).unwrap()
);
}
#[quickcheck]
fn quickcheck_variable_list_h256_256(leaves_and_skips: Vec<(u64, bool)>) -> bool {
variable_list_h256_test::<U256>(leaves_and_skips)
}
#[quickcheck]
fn quickcheck_variable_list_h256_255(leaves_and_skips: Vec<(u64, bool)>) -> bool {
variable_list_h256_test::<U255>(leaves_and_skips)
}
#[quickcheck]
fn quickcheck_variable_list_h256_257(leaves_and_skips: Vec<(u64, bool)>) -> bool {
variable_list_h256_test::<U257>(leaves_and_skips)
}
fn variable_list_h256_test<Len: Unsigned>(leaves_and_skips: Vec<(u64, bool)>) -> bool {
let leaves: Vec<_> = leaves_and_skips
.iter()
.map(|(l, _)| Hash256::from_low_u64_be(*l))
.take(Len::to_usize())
.collect();
let mut list: VariableList<Hash256, Len>;
let mut cache = VariableList::<Hash256, Len>::new_tree_hash_cache();
for (end, (_, update_cache)) in leaves_and_skips.into_iter().enumerate() {
list = VariableList::new(leaves[..end].to_vec()).unwrap();
if update_cache {
if list
.recalculate_tree_hash_root(&mut cache)
.unwrap()
.as_bytes()
!= &list.tree_hash_root()[..]
{
return false;
}
}
}
true
}

View File

@@ -1,11 +1,14 @@
[package]
name = "eth2_hashing"
version = "0.1.0"
version = "0.1.1"
authors = ["Paul Hauner <paul@paulhauner.com>"]
edition = "2018"
license = "Apache-2.0"
description = "Hashing primitives used in Ethereum 2.0"
[dependencies]
lazy_static = { version = "1.4.0", optional = true }
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
ring = "0.16.9"
@@ -17,3 +20,7 @@ rustc-hex = "2.0.1"
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
wasm-bindgen-test = "0.3.2"
[features]
default = ["zero_hash_cache"]
zero_hash_cache = ["lazy_static"]

View File

@@ -10,6 +10,9 @@ use ring::digest::{digest, SHA256};
#[cfg(target_arch = "wasm32")]
use sha2::{Digest, Sha256};
#[cfg(feature = "zero_hash_cache")]
use lazy_static::lazy_static;
/// Returns the digest of `input`.
///
/// Uses `ring::digest::SHA256`.
@@ -23,6 +26,31 @@ pub fn hash(input: &[u8]) -> Vec<u8> {
h
}
/// Compute the hash of two slices concatenated.
pub fn hash_concat(h1: &[u8], h2: &[u8]) -> Vec<u8> {
let mut vec1 = h1.to_vec();
vec1.extend_from_slice(h2);
hash(&vec1)
}
/// The max index that can be used with `ZERO_HASHES`.
#[cfg(feature = "zero_hash_cache")]
pub const ZERO_HASHES_MAX_INDEX: usize = 48;
#[cfg(feature = "zero_hash_cache")]
lazy_static! {
/// Cached zero hashes where `ZERO_HASHES[i]` is the hash of a Merkle tree with 2^i zero leaves.
pub static ref ZERO_HASHES: Vec<Vec<u8>> = {
let mut hashes = vec![vec![0; 32]; ZERO_HASHES_MAX_INDEX + 1];
for i in 0..ZERO_HASHES_MAX_INDEX {
hashes[i + 1] = hash_concat(&hashes[i], &hashes[i]);
}
hashes
};
}
#[cfg(test)]
mod tests {
use super::*;
@@ -41,4 +69,14 @@ mod tests {
let expected: Vec<u8> = expected_hex.from_hex().unwrap();
assert_eq!(expected, output);
}
#[cfg(feature = "zero_hash_cache")]
mod zero_hash {
use super::*;
#[test]
fn zero_hash_zero() {
assert_eq!(ZERO_HASHES[0], vec![0; 32]);
}
}
}

View File

@@ -1,24 +1,11 @@
#[macro_use]
extern crate lazy_static;
use eth2_hashing::hash;
use eth2_hashing::{hash, hash_concat, ZERO_HASHES};
use ethereum_types::H256;
use lazy_static::lazy_static;
const MAX_TREE_DEPTH: usize = 32;
const EMPTY_SLICE: &[H256] = &[];
lazy_static! {
/// Cached zero hashes where `ZERO_HASHES[i]` is the hash of a Merkle tree with 2^i zero leaves.
static ref ZERO_HASHES: Vec<H256> = {
let mut hashes = vec![H256::from([0; 32]); MAX_TREE_DEPTH + 1];
for i in 0..MAX_TREE_DEPTH {
hashes[i + 1] = hash_concat(hashes[i], hashes[i]);
}
hashes
};
/// Zero nodes to act as "synthetic" left and right subtrees of other zero nodes.
static ref ZERO_NODES: Vec<MerkleTree> = {
(0..=MAX_TREE_DEPTH).map(MerkleTree::Zero).collect()
@@ -78,7 +65,10 @@ impl MerkleTree {
let left_subtree = MerkleTree::create(left_leaves, depth - 1);
let right_subtree = MerkleTree::create(right_leaves, depth - 1);
let hash = hash_concat(left_subtree.hash(), right_subtree.hash());
let hash = H256::from_slice(&hash_concat(
left_subtree.hash().as_bytes(),
right_subtree.hash().as_bytes(),
));
Node(hash, Box::new(left_subtree), Box::new(right_subtree))
}
@@ -146,7 +136,7 @@ impl MerkleTree {
match *self {
MerkleTree::Leaf(h) => h,
MerkleTree::Node(h, _, _) => h,
MerkleTree::Zero(depth) => ZERO_HASHES[depth],
MerkleTree::Zero(depth) => H256::from_slice(&ZERO_HASHES[depth]),
}
}
@@ -228,8 +218,7 @@ fn merkle_root_from_branch(leaf: H256, branch: &[H256], depth: usize, index: usi
for (i, leaf) in branch.iter().enumerate().take(depth) {
let ith_bit = (index >> i) & 0x01;
if ith_bit == 1 {
let input = concat(leaf.as_bytes().to_vec(), merkle_root);
merkle_root = hash(&input);
merkle_root = hash_concat(leaf.as_bytes(), &merkle_root);
} else {
let mut input = merkle_root;
input.extend_from_slice(leaf.as_bytes());
@@ -240,20 +229,6 @@ fn merkle_root_from_branch(leaf: H256, branch: &[H256], depth: usize, index: usi
H256::from_slice(&merkle_root)
}
/// Concatenate two vectors.
fn concat(mut vec1: Vec<u8>, mut vec2: Vec<u8>) -> Vec<u8> {
vec1.append(&mut vec2);
vec1
}
/// Compute the hash of two other hashes concatenated.
fn hash_concat(h1: H256, h2: H256) -> H256 {
H256::from_slice(&hash(&concat(
h1.as_bytes().to_vec(),
h2.as_bytes().to_vec(),
)))
}
#[cfg(test)]
mod tests {
use super::*;
@@ -318,10 +293,10 @@ mod tests {
let leaf_b10 = H256::from([0xCC; 32]);
let leaf_b11 = H256::from([0xDD; 32]);
let node_b0x = hash_concat(leaf_b00, leaf_b01);
let node_b1x = hash_concat(leaf_b10, leaf_b11);
let node_b0x = H256::from_slice(&hash_concat(leaf_b00.as_bytes(), leaf_b01.as_bytes()));
let node_b1x = H256::from_slice(&hash_concat(leaf_b10.as_bytes(), leaf_b11.as_bytes()));
let root = hash_concat(node_b0x, node_b1x);
let root = H256::from_slice(&hash_concat(node_b0x.as_bytes(), node_b1x.as_bytes()));
let tree = MerkleTree::create(&[leaf_b00, leaf_b01, leaf_b10, leaf_b11], 2);
assert_eq!(tree.hash(), root);
@@ -335,10 +310,10 @@ mod tests {
let leaf_b10 = H256::from([0xCC; 32]);
let leaf_b11 = H256::from([0xDD; 32]);
let node_b0x = hash_concat(leaf_b00, leaf_b01);
let node_b1x = hash_concat(leaf_b10, leaf_b11);
let node_b0x = H256::from_slice(&hash_concat(leaf_b00.as_bytes(), leaf_b01.as_bytes()));
let node_b1x = H256::from_slice(&hash_concat(leaf_b10.as_bytes(), leaf_b11.as_bytes()));
let root = hash_concat(node_b0x, node_b1x);
let root = H256::from_slice(&hash_concat(node_b0x.as_bytes(), node_b1x.as_bytes()));
// Run some proofs
assert!(verify_merkle_proof(

View File

@@ -15,8 +15,8 @@ criterion = "0.3.0"
rand = "0.7.2"
tree_hash_derive = "0.2"
types = { path = "../../types" }
lazy_static = "1.4.0"
[dependencies]
ethereum-types = "0.8.0"
eth2_hashing = "0.1.0"
lazy_static = "1.4.0"

View File

@@ -1,8 +1,6 @@
#[macro_use]
extern crate lazy_static;
use criterion::Criterion;
use criterion::{black_box, criterion_group, criterion_main, Benchmark};
use lazy_static::lazy_static;
use types::test_utils::{generate_deterministic_keypairs, TestingBeaconStateBuilder};
use types::{BeaconState, EthSpec, Keypair, MainnetEthSpec, MinimalEthSpec};
@@ -27,25 +25,61 @@ fn build_state<T: EthSpec>(validator_count: usize) -> BeaconState<T> {
state
}
// Note: `state.canonical_root()` uses whatever `tree_hash` that the `types` crate
// uses, which is not necessarily this crate. If you want to ensure that types is
// using this local version of `tree_hash`, ensure you add a workspace-level
// [dependency
// patch](https://doc.rust-lang.org/cargo/reference/manifest.html#the-patch-section).
fn bench_suite<T: EthSpec>(c: &mut Criterion, spec_desc: &str, validator_count: usize) {
let state = build_state::<T>(validator_count);
let state1 = build_state::<T>(validator_count);
let state2 = state1.clone();
let mut state3 = state1.clone();
state3.build_tree_hash_cache().unwrap();
c.bench(
&format!("{}/{}_validators", spec_desc, validator_count),
&format!("{}/{}_validators/no_cache", spec_desc, validator_count),
Benchmark::new("genesis_state", move |b| {
b.iter_batched_ref(
|| state.clone(),
// Note: `state.canonical_root()` uses whatever `tree_hash` that the `types` crate
// uses, which is not necessarily this crate. If you want to ensure that types is
// using this local version of `tree_hash`, ensure you add a workspace-level
// [dependency
// patch](https://doc.rust-lang.org/cargo/reference/manifest.html#the-patch-section).
|| state1.clone(),
|state| black_box(state.canonical_root()),
criterion::BatchSize::SmallInput,
)
})
.sample_size(10),
);
c.bench(
&format!("{}/{}_validators/empty_cache", spec_desc, validator_count),
Benchmark::new("genesis_state", move |b| {
b.iter_batched_ref(
|| state2.clone(),
|state| {
assert!(!state.tree_hash_cache.is_initialized());
black_box(state.update_tree_hash_cache().unwrap())
},
criterion::BatchSize::SmallInput,
)
})
.sample_size(10),
);
c.bench(
&format!(
"{}/{}_validators/up_to_date_cache",
spec_desc, validator_count
),
Benchmark::new("genesis_state", move |b| {
b.iter_batched_ref(
|| state3.clone(),
|state| {
assert!(state.tree_hash_cache.is_initialized());
black_box(state.update_tree_hash_cache().unwrap())
},
criterion::BatchSize::SmallInput,
)
})
.sample_size(10),
);
}
fn all_benches(c: &mut Criterion) {

View File

@@ -131,36 +131,6 @@ impl TreeHash for H256 {
}
}
// TODO: this implementation always panics, it only exists to allow us to compile whilst
// refactoring tree hash. Should be removed.
macro_rules! impl_for_list {
($type: ty) => {
impl<T> TreeHash for $type
where
T: TreeHash,
{
fn tree_hash_type() -> TreeHashType {
unimplemented!("TreeHash is not implemented for Vec or slice")
}
fn tree_hash_packed_encoding(&self) -> Vec<u8> {
unimplemented!("TreeHash is not implemented for Vec or slice")
}
fn tree_hash_packing_factor() -> usize {
unimplemented!("TreeHash is not implemented for Vec or slice")
}
fn tree_hash_root(&self) -> Vec<u8> {
unimplemented!("TreeHash is not implemented for Vec or slice")
}
}
};
}
impl_for_list!(Vec<T>);
impl_for_list!(&[T]);
/// Returns `int` as little-endian bytes with a length of 32.
fn int_to_bytes32(int: u64) -> Vec<u8> {
let mut vec = int.to_le_bytes().to_vec();

View File

@@ -1,6 +1,3 @@
#[macro_use]
extern crate lazy_static;
pub mod impls;
mod merkleize_padded;
mod merkleize_standard;
@@ -27,7 +24,7 @@ pub fn mix_in_length(root: &[u8], length: usize) -> Vec<u8> {
let mut length_bytes = length.to_le_bytes().to_vec();
length_bytes.resize(BYTES_PER_CHUNK, 0);
merkleize_padded::hash_concat(root, &length_bytes)
eth2_hashing::hash_concat(root, &length_bytes)
}
#[derive(Debug, PartialEq, Clone)]

View File

@@ -1,25 +1,10 @@
use super::BYTES_PER_CHUNK;
use eth2_hashing::hash;
use eth2_hashing::{hash, hash_concat, ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
/// The size of the cache that stores padding nodes for a given height.
///
/// Currently, we panic if we encounter a tree with a height larger than `MAX_TREE_DEPTH`.
///
/// It is set to 48 as we expect it to be sufficiently high that we won't exceed it.
pub const MAX_TREE_DEPTH: usize = 48;
lazy_static! {
/// Cached zero hashes where `ZERO_HASHES[i]` is the hash of a Merkle tree with 2^i zero leaves.
static ref ZERO_HASHES: Vec<Vec<u8>> = {
let mut hashes = vec![vec![0; 32]; MAX_TREE_DEPTH + 1];
for i in 0..MAX_TREE_DEPTH {
hashes[i + 1] = hash_concat(&hashes[i], &hashes[i]);
}
hashes
};
}
pub const MAX_TREE_DEPTH: usize = ZERO_HASHES_MAX_INDEX;
/// Merkleize `bytes` and return the root, optionally padding the tree out to `min_leaves` number of
/// leaves.
@@ -236,17 +221,6 @@ fn get_zero_hash(height: usize) -> &'static [u8] {
}
}
/// Concatenate two vectors.
fn concat(mut vec1: Vec<u8>, mut vec2: Vec<u8>) -> Vec<u8> {
vec1.append(&mut vec2);
vec1
}
/// Compute the hash of two other hashes concatenated.
pub fn hash_concat(h1: &[u8], h2: &[u8]) -> Vec<u8> {
hash(&concat(h1.to_vec(), h2.to_vec()))
}
/// Returns the next even number following `n`. If `n` is even, `n` is returned.
fn next_even_number(n: usize) -> usize {
n + n % 2

View File

@@ -3,14 +3,25 @@ extern crate proc_macro;
use proc_macro::TokenStream;
use quote::quote;
use syn::{parse_macro_input, DeriveInput};
use std::collections::HashMap;
use syn::{parse_macro_input, Attribute, DeriveInput, Meta};
/// Returns a Vec of `syn::Ident` for each named field in the struct, whilst filtering out fields
/// Return a Vec of `syn::Ident` for each named field in the struct, whilst filtering out fields
/// that should not be hashed.
///
/// # Panics
/// Any unnamed struct field (like in a tuple struct) will raise a panic at compile time.
fn get_hashable_named_field_idents<'a>(struct_data: &'a syn::DataStruct) -> Vec<&'a syn::Ident> {
fn get_hashable_fields<'a>(struct_data: &'a syn::DataStruct) -> Vec<&'a syn::Ident> {
get_hashable_fields_and_their_caches(struct_data)
.into_iter()
.map(|(ident, _, _)| ident)
.collect()
}
/// Return a Vec of the hashable fields of a struct, and each field's type and optional cache field.
fn get_hashable_fields_and_their_caches<'a>(
struct_data: &'a syn::DataStruct,
) -> Vec<(&'a syn::Ident, syn::Type, Option<syn::Ident>)> {
struct_data
.fields
.iter()
@@ -18,15 +29,77 @@ fn get_hashable_named_field_idents<'a>(struct_data: &'a syn::DataStruct) -> Vec<
if should_skip_hashing(&f) {
None
} else {
Some(match &f.ident {
Some(ref ident) => ident,
_ => panic!("tree_hash_derive only supports named struct fields."),
})
let ident = f
.ident
.as_ref()
.expect("tree_hash_derive only supports named struct fields");
let opt_cache_field = get_cache_field_for(&f);
Some((ident, f.ty.clone(), opt_cache_field))
}
})
.collect()
}
/// Parse the cached_tree_hash attribute for a field.
///
/// Extract the cache field name from `#[cached_tree_hash(cache_field_name)]`
///
/// Return `Some(cache_field_name)` if the field has a cached tree hash attribute,
/// or `None` otherwise.
fn get_cache_field_for<'a>(field: &'a syn::Field) -> Option<syn::Ident> {
use syn::{MetaList, NestedMeta};
let parsed_attrs = cached_tree_hash_attr_metas(&field.attrs);
if let [Meta::List(MetaList { nested, .. })] = &parsed_attrs[..] {
nested.iter().find_map(|x| match x {
NestedMeta::Meta(Meta::Word(cache_field_ident)) => Some(cache_field_ident.clone()),
_ => None,
})
} else {
None
}
}
/// Process the `cached_tree_hash` attributes from a list of attributes into structured `Meta`s.
fn cached_tree_hash_attr_metas(attrs: &[Attribute]) -> Vec<Meta> {
attrs
.iter()
.filter(|attr| attr.path.is_ident("cached_tree_hash"))
.flat_map(|attr| attr.parse_meta())
.collect()
}
/// Parse the top-level cached_tree_hash struct attribute.
///
/// Return the type from `#[cached_tree_hash(type = "T")]`.
///
/// **Panics** if the attribute is missing or the type is malformed.
fn parse_cached_tree_hash_struct_attrs(attrs: &[Attribute]) -> syn::Type {
use syn::{Lit, MetaList, MetaNameValue, NestedMeta};
let parsed_attrs = cached_tree_hash_attr_metas(attrs);
if let [Meta::List(MetaList { nested, .. })] = &parsed_attrs[..] {
let eqns = nested
.iter()
.flat_map(|x| match x {
NestedMeta::Meta(Meta::NameValue(MetaNameValue {
ident,
lit: Lit::Str(lit_str),
..
})) => Some((ident.to_string(), lit_str.clone())),
_ => None,
})
.collect::<HashMap<_, _>>();
eqns["type"]
.clone()
.parse()
.expect("valid type required for cache")
} else {
panic!("missing attribute `#[cached_tree_hash(type = ...)` on struct");
}
}
/// Returns true if some field has an attribute declaring it should not be hashed.
///
/// The field attribute is: `#[tree_hash(skip_hashing)]`
@@ -51,7 +124,7 @@ pub fn tree_hash_derive(input: TokenStream) -> TokenStream {
_ => panic!("tree_hash_derive only supports structs."),
};
let idents = get_hashable_named_field_idents(&struct_data);
let idents = get_hashable_fields(&struct_data);
let output = quote! {
impl #impl_generics tree_hash::TreeHash for #name #ty_generics #where_clause {
@@ -112,6 +185,82 @@ pub fn tree_hash_signed_root_derive(input: TokenStream) -> TokenStream {
output.into()
}
/// Derive the `CachedTreeHash` trait for a type.
///
/// Requires two attributes:
/// * `#[cached_tree_hash(type = "T")]` on the struct, declaring
/// that the type `T` should be used as the tree hash cache.
/// * `#[cached_tree_hash(f)]` on each struct field that makes use
/// of the cache, which declares that the sub-cache for that field
/// can be found in the field `cache.f` of the struct's cache.
#[proc_macro_derive(CachedTreeHash, attributes(cached_tree_hash))]
pub fn cached_tree_hash_derive(input: TokenStream) -> TokenStream {
let item = parse_macro_input!(input as DeriveInput);
let name = &item.ident;
let cache_type = parse_cached_tree_hash_struct_attrs(&item.attrs);
let (impl_generics, ty_generics, where_clause) = &item.generics.split_for_impl();
let struct_data = match &item.data {
syn::Data::Struct(s) => s,
_ => panic!("tree_hash_derive only supports structs."),
};
let fields = get_hashable_fields_and_their_caches(&struct_data);
let caching_field_ty = fields
.iter()
.filter(|(_, _, cache_field)| cache_field.is_some())
.map(|(_, ty, _)| ty);
let caching_field_cache_field = fields
.iter()
.flat_map(|(_, _, cache_field)| cache_field.as_ref());
let tree_hash_root_expr = fields
.iter()
.map(|(field, _, caching_field)| match caching_field {
None => quote! {
self.#field.tree_hash_root()
},
Some(caching_field) => quote! {
self.#field
.recalculate_tree_hash_root(&mut cache.#caching_field)?
.as_bytes()
.to_vec()
},
});
let output = quote! {
impl #impl_generics cached_tree_hash::CachedTreeHash<#cache_type> for #name #ty_generics #where_clause {
fn new_tree_hash_cache() -> #cache_type {
// Call new cache for each sub type
#cache_type {
initialized: true,
#(
#caching_field_cache_field: <#caching_field_ty>::new_tree_hash_cache()
),*
}
}
fn recalculate_tree_hash_root(
&self,
cache: &mut #cache_type)
-> Result<Hash256, cached_tree_hash::Error>
{
let mut leaves = vec![];
#(
leaves.append(&mut #tree_hash_root_expr);
)*
Ok(Hash256::from_slice(&tree_hash::merkle_root(&leaves, 0)))
}
}
};
output.into()
}
fn get_signed_root_named_field_idents(struct_data: &syn::DataStruct) -> Vec<&syn::Ident> {
struct_data
.fields