diff --git a/Cargo.lock b/Cargo.lock index 998f94597c..d9e64d900e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -450,6 +450,7 @@ dependencies = [ "ethereum-types 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "quickcheck_macros 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree_hash 0.1.1", ] @@ -4476,6 +4477,7 @@ dependencies = [ "ethereum-types 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree_hash_derive 0.2.0", "types 0.1.0", ] diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 0ffdf35d4b..e32ccff1fb 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -1705,7 +1705,7 @@ impl Drop for BeaconChain { fn write_state(prefix: &str, state: &BeaconState, log: &Logger) { if WRITE_BLOCK_PROCESSING_SSZ { - let root = Hash256::from_slice(&state.tree_hash_root()); + let root = state.tree_hash_root(); let filename = format!("{}_slot_{}_root_{}.ssz", prefix, state.slot, root); let mut path = std::env::temp_dir().join("lighthouse"); let _ = fs::create_dir_all(path.clone()); diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index 920c46f12a..551c079691 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -474,7 +474,7 @@ fn check_chain_dump(harness: &TestHarness, expected_len: u64) { // Check that the tree hash of the stored state is as expected assert_eq!( checkpoint.beacon_state_root, - Hash256::from_slice(&checkpoint.beacon_state.tree_hash_root()), + checkpoint.beacon_state.tree_hash_root(), "tree hash of stored state is incorrect" ); diff --git a/beacon_node/eth1/src/deposit_cache.rs b/beacon_node/eth1/src/deposit_cache.rs index 52e4350c86..c38cff62af 100644 --- a/beacon_node/eth1/src/deposit_cache.rs +++ b/beacon_node/eth1/src/deposit_cache.rs @@ -149,7 +149,7 @@ impl DepositCache { pub fn insert_log(&mut self, log: DepositLog) -> Result<(), Error> { match log.index.cmp(&(self.logs.len() as u64)) { Ordering::Equal => { - let deposit = Hash256::from_slice(&log.deposit_data.tree_hash_root()); + let deposit = log.deposit_data.tree_hash_root(); self.leaves.push(deposit); self.logs.push(log); self.deposit_tree diff --git a/beacon_node/eth1/tests/test.rs b/beacon_node/eth1/tests/test.rs index 83c26fa2fe..7945766680 100644 --- a/beacon_node/eth1/tests/test.rs +++ b/beacon_node/eth1/tests/test.rs @@ -529,7 +529,7 @@ mod deposit_tree { for (j, deposit) in deposits.iter().enumerate() { assert!( verify_merkle_proof( - Hash256::from_slice(&deposit.data.tree_hash_root()), + deposit.data.tree_hash_root(), &deposit.proof, DEPOSIT_CONTRACT_TREE_DEPTH + 1, j, diff --git a/beacon_node/genesis/src/common.rs b/beacon_node/genesis/src/common.rs index 5c44477384..0d483f9834 100644 --- a/beacon_node/genesis/src/common.rs +++ b/beacon_node/genesis/src/common.rs @@ -1,4 +1,4 @@ -use int_to_bytes::int_to_bytes32; +use int_to_bytes::int_to_fixed_bytes32; use merkle_proof::MerkleTree; use rayon::prelude::*; use tree_hash::TreeHash; @@ -12,7 +12,7 @@ pub fn genesis_deposits( ) -> Result, String> { let deposit_root_leaves = deposit_data .par_iter() - .map(|data| Hash256::from_slice(&data.tree_hash_root())) + .map(|data| data.tree_hash_root()) .collect::>(); let mut proofs = vec![]; @@ -24,7 +24,7 @@ pub fn genesis_deposits( } let (_, mut proof) = tree.generate_proof(i, depth); - proof.push(Hash256::from_slice(&int_to_bytes32((i + 1) as u64))); + proof.push(Hash256::from_slice(&int_to_fixed_bytes32((i + 1) as u64))); assert_eq!( proof.len(), diff --git a/eth2/state_processing/src/genesis.rs b/eth2/state_processing/src/genesis.rs index 209f025c4f..8a950b9eee 100644 --- a/eth2/state_processing/src/genesis.rs +++ b/eth2/state_processing/src/genesis.rs @@ -31,7 +31,7 @@ pub fn initialize_beacon_state_from_eth1( for deposit in deposits.iter() { deposit_tree - .push_leaf(Hash256::from_slice(&deposit.data.tree_hash_root())) + .push_leaf(deposit.data.tree_hash_root()) .map_err(BlockProcessingError::MerkleTreeError)?; state.eth1_data.deposit_root = deposit_tree.root(); process_deposit(&mut state, &deposit, spec, true)?; diff --git a/eth2/state_processing/src/per_block_processing.rs b/eth2/state_processing/src/per_block_processing.rs index 771af88ba0..844ae850f1 100644 --- a/eth2/state_processing/src/per_block_processing.rs +++ b/eth2/state_processing/src/per_block_processing.rs @@ -144,8 +144,7 @@ pub fn process_block_header( ) -> Result<(), BlockOperationError> { verify!(block.slot == state.slot, HeaderInvalid::StateSlotMismatch); - let expected_previous_block_root = - Hash256::from_slice(&state.latest_block_header.tree_hash_root()); + let expected_previous_block_root = state.latest_block_header.tree_hash_root(); verify!( block.parent_root == expected_previous_block_root, HeaderInvalid::ParentBlockRootMismatch { diff --git a/eth2/state_processing/src/per_block_processing/block_processing_builder.rs b/eth2/state_processing/src/per_block_processing/block_processing_builder.rs index 52aa15ec2d..695c10c032 100644 --- a/eth2/state_processing/src/per_block_processing/block_processing_builder.rs +++ b/eth2/state_processing/src/per_block_processing/block_processing_builder.rs @@ -50,9 +50,7 @@ impl BlockProcessingBuilder { match previous_block_root { Some(root) => builder.set_parent_root(root), - None => builder.set_parent_root(Hash256::from_slice( - &state.latest_block_header.tree_hash_root(), - )), + None => builder.set_parent_root(state.latest_block_header.tree_hash_root()), } let proposer_index = state.get_beacon_proposer_index(state.slot, spec).unwrap(); @@ -92,9 +90,7 @@ impl BlockProcessingBuilder { match previous_block_root { Some(root) => builder.set_parent_root(root), - None => builder.set_parent_root(Hash256::from_slice( - &state.latest_block_header.tree_hash_root(), - )), + None => builder.set_parent_root(state.latest_block_header.tree_hash_root()), } let proposer_index = state.get_beacon_proposer_index(state.slot, spec).unwrap(); @@ -149,9 +145,7 @@ impl BlockProcessingBuilder { match previous_block_root { Some(root) => builder.set_parent_root(root), - None => builder.set_parent_root(Hash256::from_slice( - &state.latest_block_header.tree_hash_root(), - )), + None => builder.set_parent_root(state.latest_block_header.tree_hash_root()), } let proposer_index = state.get_beacon_proposer_index(state.slot, spec).unwrap(); @@ -192,9 +186,7 @@ impl BlockProcessingBuilder { match previous_block_root { Some(root) => builder.set_parent_root(root), - None => builder.set_parent_root(Hash256::from_slice( - &state.latest_block_header.tree_hash_root(), - )), + None => builder.set_parent_root(state.latest_block_header.tree_hash_root()), } let proposer_index = state.get_beacon_proposer_index(state.slot, spec).unwrap(); @@ -241,9 +233,7 @@ impl BlockProcessingBuilder { match previous_block_root { Some(root) => builder.set_parent_root(root), - None => builder.set_parent_root(Hash256::from_slice( - &state.latest_block_header.tree_hash_root(), - )), + None => builder.set_parent_root(state.latest_block_header.tree_hash_root()), } let proposer_index = state.get_beacon_proposer_index(state.slot, spec).unwrap(); @@ -283,9 +273,7 @@ impl BlockProcessingBuilder { match previous_block_root { Some(root) => builder.set_parent_root(root), - None => builder.set_parent_root(Hash256::from_slice( - &state.latest_block_header.tree_hash_root(), - )), + None => builder.set_parent_root(state.latest_block_header.tree_hash_root()), } let proposer_index = state.get_beacon_proposer_index(state.slot, spec).unwrap(); diff --git a/eth2/state_processing/src/per_block_processing/signature_sets.rs b/eth2/state_processing/src/per_block_processing/signature_sets.rs index fb7742cd85..3c59e402be 100644 --- a/eth2/state_processing/src/per_block_processing/signature_sets.rs +++ b/eth2/state_processing/src/per_block_processing/signature_sets.rs @@ -62,13 +62,13 @@ pub fn block_proposal_signature_set<'a, T: EthSpec>( } .tree_hash_root() } else { - block.signing_root(domain).as_bytes().to_vec() + block.signing_root(domain) }; Ok(SignatureSet::single( &signed_block.signature, validator_pubkey(state, proposer_index)?, - message, + message.as_bytes().to_vec(), )) } diff --git a/eth2/state_processing/src/per_block_processing/verify_deposit.rs b/eth2/state_processing/src/per_block_processing/verify_deposit.rs index 1115f46ca0..9f624ddb8f 100644 --- a/eth2/state_processing/src/per_block_processing/verify_deposit.rs +++ b/eth2/state_processing/src/per_block_processing/verify_deposit.rs @@ -57,7 +57,7 @@ pub fn verify_deposit_merkle_proof( verify!( verify_merkle_proof( - Hash256::from_slice(&leaf), + leaf, &deposit.proof[..], spec.deposit_contract_tree_depth as usize + 1, deposit_index as usize, diff --git a/eth2/state_processing/src/per_epoch_processing.rs b/eth2/state_processing/src/per_epoch_processing.rs index 72e9e49cd5..74be1b1ed2 100644 --- a/eth2/state_processing/src/per_epoch_processing.rs +++ b/eth2/state_processing/src/per_epoch_processing.rs @@ -172,7 +172,7 @@ pub fn process_final_updates( let historical_batch = state.historical_batch(); state .historical_roots - .push(Hash256::from_slice(&historical_batch.tree_hash_root()))?; + .push(historical_batch.tree_hash_root())?; } // Rotate current/previous epoch attestations diff --git a/eth2/types/src/beacon_state.rs b/eth2/types/src/beacon_state.rs index c05c45de8b..45c71774f9 100644 --- a/eth2/types/src/beacon_state.rs +++ b/eth2/types/src/beacon_state.rs @@ -66,6 +66,7 @@ pub enum Error { CommitteeCacheUninitialized(Option), SszTypesError(ssz_types::Error), TreeHashCacheNotInitialized, + TreeHashError(tree_hash::Error), CachedTreeHashError(cached_tree_hash::Error), InvalidValidatorPubkey(ssz::DecodeError), ValidatorRegistryShrunk, @@ -1044,3 +1045,9 @@ impl From for Error { Error::CachedTreeHashError(e) } } + +impl From for Error { + fn from(e: tree_hash::Error) -> Error { + Error::TreeHashError(e) + } +} diff --git a/eth2/types/src/beacon_state/tree_hash_cache.rs b/eth2/types/src/beacon_state/tree_hash_cache.rs index a6271b7290..4449827acc 100644 --- a/eth2/types/src/beacon_state/tree_hash_cache.rs +++ b/eth2/types/src/beacon_state/tree_hash_cache.rs @@ -3,7 +3,13 @@ use crate::{BeaconState, EthSpec, Hash256, Unsigned, Validator}; use cached_tree_hash::{int_log, CacheArena, CachedTreeHash, TreeHashCache}; use rayon::prelude::*; use ssz_derive::{Decode, Encode}; -use tree_hash::{mix_in_length, TreeHash}; +use tree_hash::{mix_in_length, MerkleHasher, TreeHash}; + +/// The number of fields on a beacon state. +const NUM_BEACON_STATE_HASHING_FIELDS: usize = 20; + +/// The number of nodes in the Merkle tree of a validator record. +const NODES_PER_VALIDATOR: usize = 15; /// The number of validator record tree hash caches stored in each arena. /// @@ -73,64 +79,79 @@ impl BeaconTreeHashCache { &mut self, state: &BeaconState, ) -> Result { - let mut leaves = vec![]; + let mut hasher = MerkleHasher::with_leaves(NUM_BEACON_STATE_HASHING_FIELDS); - leaves.append(&mut state.genesis_time.tree_hash_root()); - leaves.append(&mut state.slot.tree_hash_root()); - leaves.append(&mut state.fork.tree_hash_root()); - leaves.append(&mut state.latest_block_header.tree_hash_root()); - leaves.extend_from_slice( + hasher.write(state.genesis_time.tree_hash_root().as_bytes())?; + hasher.write(state.slot.tree_hash_root().as_bytes())?; + hasher.write(state.fork.tree_hash_root().as_bytes())?; + hasher.write(state.latest_block_header.tree_hash_root().as_bytes())?; + hasher.write( state .block_roots .recalculate_tree_hash_root(&mut self.fixed_arena, &mut self.block_roots)? .as_bytes(), - ); - leaves.extend_from_slice( + )?; + hasher.write( state .state_roots .recalculate_tree_hash_root(&mut self.fixed_arena, &mut self.state_roots)? .as_bytes(), - ); - leaves.extend_from_slice( + )?; + hasher.write( state .historical_roots .recalculate_tree_hash_root(&mut self.fixed_arena, &mut self.historical_roots)? .as_bytes(), - ); - leaves.append(&mut state.eth1_data.tree_hash_root()); - leaves.append(&mut state.eth1_data_votes.tree_hash_root()); - leaves.append(&mut state.eth1_deposit_index.tree_hash_root()); - leaves.extend_from_slice( + )?; + hasher.write(state.eth1_data.tree_hash_root().as_bytes())?; + hasher.write(state.eth1_data_votes.tree_hash_root().as_bytes())?; + hasher.write(state.eth1_deposit_index.tree_hash_root().as_bytes())?; + hasher.write( self.validators .recalculate_tree_hash_root(&state.validators[..])? .as_bytes(), - ); - leaves.extend_from_slice( + )?; + hasher.write( state .balances .recalculate_tree_hash_root(&mut self.balances_arena, &mut self.balances)? .as_bytes(), - ); - leaves.extend_from_slice( + )?; + hasher.write( state .randao_mixes .recalculate_tree_hash_root(&mut self.fixed_arena, &mut self.randao_mixes)? .as_bytes(), - ); - leaves.extend_from_slice( + )?; + hasher.write( state .slashings .recalculate_tree_hash_root(&mut self.slashings_arena, &mut self.slashings)? .as_bytes(), - ); - leaves.append(&mut state.previous_epoch_attestations.tree_hash_root()); - leaves.append(&mut state.current_epoch_attestations.tree_hash_root()); - leaves.append(&mut state.justification_bits.tree_hash_root()); - leaves.append(&mut state.previous_justified_checkpoint.tree_hash_root()); - leaves.append(&mut state.current_justified_checkpoint.tree_hash_root()); - leaves.append(&mut state.finalized_checkpoint.tree_hash_root()); + )?; + hasher.write( + state + .previous_epoch_attestations + .tree_hash_root() + .as_bytes(), + )?; + hasher.write(state.current_epoch_attestations.tree_hash_root().as_bytes())?; + hasher.write(state.justification_bits.tree_hash_root().as_bytes())?; + hasher.write( + state + .previous_justified_checkpoint + .tree_hash_root() + .as_bytes(), + )?; + hasher.write( + state + .current_justified_checkpoint + .tree_hash_root() + .as_bytes(), + )?; + hasher.write(state.finalized_checkpoint.tree_hash_root().as_bytes())?; - Ok(Hash256::from_slice(&tree_hash::merkle_root(&leaves, 0))) + hasher.finish().map_err(Into::into) } } @@ -181,10 +202,7 @@ impl ValidatorsListTreeHashCache { std::mem::replace(&mut self.list_arena, list_arena); - Ok(Hash256::from_slice(&mix_in_length( - list_root.as_bytes(), - validators.len(), - ))) + Ok(mix_in_length(&list_root, validators.len())) } } @@ -202,8 +220,22 @@ impl ParallelValidatorTreeHash { /// Allocates the necessary memory to store all of the cached Merkle trees but does perform any /// hashing. fn new(validators: &[Validator]) -> Self { - let num_arenas = (validators.len() + VALIDATORS_PER_ARENA - 1) / VALIDATORS_PER_ARENA; - let mut arenas = vec![(CacheArena::default(), vec![]); num_arenas]; + let num_arenas = std::cmp::max( + 1, + (validators.len() + VALIDATORS_PER_ARENA - 1) / VALIDATORS_PER_ARENA, + ); + + let mut arenas = (1..=num_arenas) + .map(|i| { + let num_validators = if i == num_arenas { + validators.len() % VALIDATORS_PER_ARENA + } else { + VALIDATORS_PER_ARENA + }; + NODES_PER_VALIDATOR * num_validators + }) + .map(|capacity| (CacheArena::with_capacity(capacity), vec![])) + .collect::>(); validators.iter().enumerate().for_each(|(i, v)| { let (arena, caches) = &mut arenas[i / VALIDATORS_PER_ARENA]; @@ -272,3 +304,16 @@ impl ParallelValidatorTreeHash { .collect() } } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn validator_node_count() { + let mut arena = CacheArena::default(); + let v = Validator::default(); + let _cache = v.new_tree_hash_cache(&mut arena); + assert_eq!(arena.backing_len(), NODES_PER_VALIDATOR); + } +} diff --git a/eth2/types/src/signing_root.rs b/eth2/types/src/signing_root.rs index 2e764b8215..456064f52e 100644 --- a/eth2/types/src/signing_root.rs +++ b/eth2/types/src/signing_root.rs @@ -14,12 +14,10 @@ pub struct SigningRoot { pub trait SignedRoot: TreeHash { fn signing_root(&self, domain: u64) -> Hash256 { - Hash256::from_slice( - &SigningRoot { - object_root: Hash256::from_slice(&self.tree_hash_root()), - domain, - } - .tree_hash_root(), - ) + SigningRoot { + object_root: self.tree_hash_root(), + domain, + } + .tree_hash_root() } } diff --git a/eth2/types/src/slot_epoch_macros.rs b/eth2/types/src/slot_epoch_macros.rs index fa5a296624..49a1b8e0d5 100644 --- a/eth2/types/src/slot_epoch_macros.rs +++ b/eth2/types/src/slot_epoch_macros.rs @@ -237,8 +237,8 @@ macro_rules! impl_ssz { 32 / 8 } - fn tree_hash_root(&self) -> Vec { - int_to_bytes::int_to_bytes32(self.0) + fn tree_hash_root(&self) -> tree_hash::Hash256 { + tree_hash::Hash256::from_slice(&int_to_bytes::int_to_fixed_bytes32(self.0)) } } diff --git a/eth2/types/src/test_utils/builders/testing_beacon_block_builder.rs b/eth2/types/src/test_utils/builders/testing_beacon_block_builder.rs index 46d901beba..84f2296d64 100644 --- a/eth2/types/src/test_utils/builders/testing_beacon_block_builder.rs +++ b/eth2/types/src/test_utils/builders/testing_beacon_block_builder.rs @@ -284,14 +284,14 @@ impl TestingBeaconBlockBuilder { // Vector containing all leaves let leaves = datas .iter() - .map(|data| Hash256::from_slice(&data.tree_hash_root())) + .map(|data| data.tree_hash_root()) .collect::>(); // Building a VarList from leaves let deposit_data_list = VariableList::<_, U4294967296>::from(leaves.clone()); // Setting the deposit_root to be the tree_hash_root of the VarList - state.eth1_data.deposit_root = Hash256::from_slice(&deposit_data_list.tree_hash_root()); + state.eth1_data.deposit_root = deposit_data_list.tree_hash_root(); // Building the merkle tree used for generating proofs let tree = MerkleTree::create(&leaves[..], spec.deposit_contract_tree_depth as usize); diff --git a/eth2/types/src/test_utils/macros.rs b/eth2/types/src/test_utils/macros.rs index 95a7c480ca..df449c712d 100644 --- a/eth2/types/src/test_utils/macros.rs +++ b/eth2/types/src/test_utils/macros.rs @@ -39,9 +39,8 @@ macro_rules! tree_hash_tests { let mut rng = XorShiftRng::from_seed([42; 16]); let original = <$type>::random_for_test(&mut rng); - let result = original.tree_hash_root(); - - assert_eq!(result.len(), 32); + // Tree hashing should not panic. + original.tree_hash_root(); } }; } diff --git a/eth2/types/src/tree_hash_impls.rs b/eth2/types/src/tree_hash_impls.rs index bc2be51e52..0b08a550b7 100644 --- a/eth2/types/src/tree_hash_impls.rs +++ b/eth2/types/src/tree_hash_impls.rs @@ -2,10 +2,10 @@ //! //! It makes some assumptions about the layouts and update patterns of other structs in this //! crate, and should be updated carefully whenever those structs are changed. -use crate::{Epoch, Hash256, Validator}; +use crate::{Epoch, Hash256, PublicKeyBytes, Validator}; use cached_tree_hash::{int_log, CacheArena, CachedTreeHash, Error, TreeHashCache}; use int_to_bytes::int_to_fixed_bytes32; -use tree_hash::TreeHash; +use tree_hash::merkle_root; /// Number of struct fields on `Validator`. const NUM_VALIDATOR_FIELDS: usize = 8; @@ -53,7 +53,7 @@ fn process_field_by_index( force_update: bool, ) -> bool { match field_idx { - 0 => process_vec_field(v.pubkey.tree_hash_root(), leaf, force_update), + 0 => process_pubkey_bytes_field(&v.pubkey, leaf, force_update), 1 => process_slice_field(v.withdrawal_credentials.as_bytes(), leaf, force_update), 2 => process_u64_field(v.effective_balance, leaf, force_update), 3 => process_bool_field(v.slashed, leaf, force_update), @@ -68,13 +68,13 @@ fn process_field_by_index( } } -fn process_vec_field(new_tree_hash: Vec, leaf: &mut Hash256, force_update: bool) -> bool { - if force_update || leaf.as_bytes() != &new_tree_hash[..] { - leaf.assign_from_slice(&new_tree_hash); - true - } else { - false - } +fn process_pubkey_bytes_field( + val: &PublicKeyBytes, + leaf: &mut Hash256, + force_update: bool, +) -> bool { + let new_tree_hash = merkle_root(val.as_slice(), 0); + process_slice_field(new_tree_hash.as_bytes(), leaf, force_update) } fn process_slice_field(new_tree_hash: &[u8], leaf: &mut Hash256, force_update: bool) -> bool { @@ -106,6 +106,7 @@ mod test { use crate::Epoch; use rand::SeedableRng; use rand_xorshift::XorShiftRng; + use tree_hash::TreeHash; fn test_validator_tree_hash(v: &Validator) { let arena = &mut CacheArena::default(); @@ -152,4 +153,13 @@ mod test { .map(|_| Validator::random_for_test(&mut rng)) .for_each(|v| test_validator_tree_hash(&v)); } + + #[test] + pub fn smallvec_size_check() { + // If this test fails we need to go and reassess the length of the `SmallVec` in + // `cached_tree_hash::TreeHashCache`. If the size of the `SmallVec` is too slow we're going + // to start doing heap allocations for each validator, this will fragment memory and slow + // us down. + assert!(NUM_VALIDATOR_FIELDS <= 8,); + } } diff --git a/eth2/utils/bls/src/macros.rs b/eth2/utils/bls/src/macros.rs index 6119f355a3..f60832193d 100644 --- a/eth2/utils/bls/src/macros.rs +++ b/eth2/utils/bls/src/macros.rs @@ -56,13 +56,20 @@ macro_rules! impl_tree_hash { unreachable!("Vector should never be packed.") } - fn tree_hash_root(&self) -> Vec { + fn tree_hash_root(&self) -> tree_hash::Hash256 { // We could use the tree hash implementation for `FixedVec`, // but benchmarks have show that to be at least 15% slower because of the // unnecessary copying and allocation (one Vec per byte) let values_per_chunk = tree_hash::BYTES_PER_CHUNK; let minimum_chunk_count = ($byte_size + values_per_chunk - 1) / values_per_chunk; - tree_hash::merkle_root(&self.as_ssz_bytes(), minimum_chunk_count) + + let mut hasher = tree_hash::MerkleHasher::with_leaves(minimum_chunk_count); + hasher + .write(&self.as_ssz_bytes()) + .expect("bls should not exceed leaf count"); + hasher + .finish() + .expect("bls should not exceed leaf count from buffer") } } }; @@ -111,6 +118,10 @@ macro_rules! bytes_struct { self.bytes.to_vec() } + pub fn as_slice(&self) -> &[u8] { + &self.bytes + } + fn get_bytes(bytes: &[u8]) -> Result<[u8; $byte_size], ssz::DecodeError> { let mut result = [0; $byte_size]; if bytes.len() != $byte_size { @@ -171,7 +182,28 @@ macro_rules! bytes_struct { impl_ssz!($name, $byte_size, "$type"); - impl_tree_hash!($name, $byte_size); + impl tree_hash::TreeHash for $name { + fn tree_hash_type() -> tree_hash::TreeHashType { + tree_hash::TreeHashType::Vector + } + + fn tree_hash_packed_encoding(&self) -> Vec { + unreachable!("Vector should never be packed.") + } + + fn tree_hash_packing_factor() -> usize { + unreachable!("Vector should never be packed.") + } + + fn tree_hash_root(&self) -> tree_hash::Hash256 { + let values_per_chunk = tree_hash::BYTES_PER_CHUNK; + let minimum_chunk_count = ($byte_size + values_per_chunk - 1) / values_per_chunk; + + let mut hasher = tree_hash::MerkleHasher::with_leaves(minimum_chunk_count); + hasher.write(&self.bytes).expect("bls should not exceed leaf count"); + hasher.finish().expect("bls should not exceed leaf count from buffer") + } + } impl serde::ser::Serialize for $name { /// Serde serialization is compliant the Ethereum YAML test format. diff --git a/eth2/utils/cached_tree_hash/Cargo.toml b/eth2/utils/cached_tree_hash/Cargo.toml index 5ed95c78dc..252806aaaa 100644 --- a/eth2/utils/cached_tree_hash/Cargo.toml +++ b/eth2/utils/cached_tree_hash/Cargo.toml @@ -11,6 +11,7 @@ eth2_hashing = "0.1" eth2_ssz_derive = "0.1.0" eth2_ssz = "0.1.2" tree_hash = "0.1" +smallvec = "1.2.0" [dev-dependencies] quickcheck = "0.9" diff --git a/eth2/utils/cached_tree_hash/src/cache.rs b/eth2/utils/cached_tree_hash/src/cache.rs index 393cb112d2..782cedcbfc 100644 --- a/eth2/utils/cached_tree_hash/src/cache.rs +++ b/eth2/utils/cached_tree_hash/src/cache.rs @@ -1,6 +1,8 @@ use crate::cache_arena; +use crate::SmallVec8; use crate::{Error, Hash256}; use eth2_hashing::{hash32_concat, ZERO_HASHES}; +use smallvec::smallvec; use ssz_derive::{Decode, Encode}; use tree_hash::BYTES_PER_CHUNK; @@ -17,28 +19,25 @@ pub struct TreeHashCache { /// /// The leaves are contained in `self.layers[self.depth]`, and each other layer `i` /// contains the parents of the nodes in layer `i + 1`. - layers: Vec, + layers: SmallVec8, } impl TreeHashCache { /// Create a new cache with the given `depth` with enough nodes allocated to suit `leaves`. All /// leaves are set to `Hash256::zero()`. pub fn new(arena: &mut CacheArena, depth: usize, leaves: usize) -> Self { - // TODO: what about when leaves is zero? - let layers = (0..=depth) - .map(|i| { - let vec = arena.alloc(); - vec.extend_with_vec( - arena, - vec![Hash256::zero(); nodes_per_layer(i, depth, leaves)], - ) - .expect( - "A newly allocated sub-arena cannot fail unless it has reached max capacity", - ); + let mut layers = SmallVec8::with_capacity(depth + 1); - vec - }) - .collect(); + for i in 0..=depth { + let vec = arena.alloc(); + vec.extend_with_vec( + arena, + smallvec![Hash256::zero(); nodes_per_layer(i, depth, leaves)], + ) + .expect("A newly allocated sub-arena cannot fail unless it has reached max capacity"); + + layers.push(vec) + } TreeHashCache { initialized: false, @@ -62,7 +61,7 @@ impl TreeHashCache { &mut self, arena: &mut CacheArena, mut leaves: impl Iterator + ExactSizeIterator, - ) -> Result, Error> { + ) -> Result, Error> { let new_leaf_count = leaves.len(); if new_leaf_count < self.leaves().len(arena)? { @@ -71,21 +70,19 @@ impl TreeHashCache { return Err(Error::TooManyLeaves); } + let mut dirty = SmallVec8::new(); + // Update the existing leaves - let mut dirty = self - .leaves() + self.leaves() .iter_mut(arena)? .enumerate() .zip(&mut leaves) - .flat_map(|((i, leaf), new_leaf)| { + .for_each(|((i, leaf), new_leaf)| { if !self.initialized || leaf.as_bytes() != new_leaf { leaf.assign_from_slice(&new_leaf); - Some(i) - } else { - None + dirty.push(i); } - }) - .collect::>(); + }); // Push the rest of the new leaves (if any) dirty.extend(self.leaves().len(arena)?..new_leaf_count); @@ -101,7 +98,7 @@ impl TreeHashCache { pub fn update_merkle_root( &mut self, arena: &mut CacheArena, - mut dirty_indices: Vec, + mut dirty_indices: SmallVec8, ) -> Result { if dirty_indices.is_empty() { return Ok(self.root(arena)); @@ -164,8 +161,13 @@ impl TreeHashCache { } /// Compute the dirty indices for one layer up. -fn lift_dirty(dirty_indices: &[usize]) -> Vec { - let mut new_dirty = dirty_indices.iter().map(|i| *i / 2).collect::>(); +fn lift_dirty(dirty_indices: &[usize]) -> SmallVec8 { + let mut new_dirty = SmallVec8::with_capacity(dirty_indices.len()); + + for i in 0..dirty_indices.len() { + new_dirty.push(dirty_indices[i] / 2) + } + new_dirty.dedup(); new_dirty } @@ -202,6 +204,21 @@ fn nodes_per_layer(layer: usize, depth: usize, leaves: usize) -> usize { mod test { use super::*; + #[test] + fn zero_leaves() { + let arena = &mut CacheArena::default(); + + let depth = 3; + let num_leaves = 0; + + let mut cache = TreeHashCache::new(arena, depth, num_leaves); + let leaves: Vec<[u8; BYTES_PER_CHUNK]> = vec![]; + + cache + .recalculate_merkle_root(arena, leaves.into_iter()) + .expect("should calculate root"); + } + #[test] fn test_node_per_layer_unbalanced_tree() { assert_eq!(nodes_per_layer(0, 3, 5), 1); diff --git a/eth2/utils/cached_tree_hash/src/cache_arena.rs b/eth2/utils/cached_tree_hash/src/cache_arena.rs index 272a9d2db6..5923b386b5 100644 --- a/eth2/utils/cached_tree_hash/src/cache_arena.rs +++ b/eth2/utils/cached_tree_hash/src/cache_arena.rs @@ -1,3 +1,4 @@ +use crate::SmallVec8; use ssz::{Decode, Encode}; use ssz_derive::{Decode, Encode}; use std::marker::PhantomData; @@ -27,6 +28,14 @@ pub struct CacheArena { } impl CacheArena { + /// Instantiate self with a backing array of the given `capacity`. + pub fn with_capacity(capacity: usize) -> Self { + Self { + backing: Vec::with_capacity(capacity), + offsets: vec![], + } + } + /// Produce an allocation of zero length at the end of the backing array. pub fn alloc(&mut self) -> CacheArenaAllocation { let alloc_id = self.offsets.len(); @@ -204,7 +213,11 @@ pub struct CacheArenaAllocation { impl CacheArenaAllocation { /// Grow the allocation in `arena`, appending `vec` to the current values. - pub fn extend_with_vec(&self, arena: &mut CacheArena, vec: Vec) -> Result<(), Error> { + pub fn extend_with_vec( + &self, + arena: &mut CacheArena, + vec: SmallVec8, + ) -> Result<(), Error> { let len = arena.len(self.alloc_id)?; arena.splice_forgetful(self.alloc_id, len..len, vec)?; Ok(()) @@ -264,6 +277,7 @@ impl CacheArenaAllocation { #[cfg(test)] mod tests { use crate::Hash256; + use smallvec::smallvec; type CacheArena = super::CacheArena; type CacheArenaAllocation = super::CacheArenaAllocation; @@ -300,7 +314,7 @@ mod tests { len ); - sub.extend_with_vec(arena, vec![hash(len), hash(len + 1)]) + sub.extend_with_vec(arena, smallvec![hash(len), hash(len + 1)]) .expect("should extend with vec"); len += 2; diff --git a/eth2/utils/cached_tree_hash/src/impls.rs b/eth2/utils/cached_tree_hash/src/impls.rs index 36210db885..6c7e3cf41d 100644 --- a/eth2/utils/cached_tree_hash/src/impls.rs +++ b/eth2/utils/cached_tree_hash/src/impls.rs @@ -97,12 +97,10 @@ impl CachedTreeHash for VariableList { arena: &mut CacheArena, cache: &mut TreeHashCache, ) -> Result { - Ok(Hash256::from_slice(&mix_in_length( - cache - .recalculate_merkle_root(arena, hash256_iter(&self))? - .as_bytes(), + Ok(mix_in_length( + &cache.recalculate_merkle_root(arena, hash256_iter(&self))?, self.len(), - ))) + )) } } @@ -121,12 +119,10 @@ impl CachedTreeHash for VariableList { arena: &mut CacheArena, cache: &mut TreeHashCache, ) -> Result { - Ok(Hash256::from_slice(&mix_in_length( - cache - .recalculate_merkle_root(arena, u64_iter(&self))? - .as_bytes(), + Ok(mix_in_length( + &cache.recalculate_merkle_root(arena, u64_iter(&self))?, self.len(), - ))) + )) } } diff --git a/eth2/utils/cached_tree_hash/src/lib.rs b/eth2/utils/cached_tree_hash/src/lib.rs index 1c483948e3..d60c920c3e 100644 --- a/eth2/utils/cached_tree_hash/src/lib.rs +++ b/eth2/utils/cached_tree_hash/src/lib.rs @@ -3,7 +3,9 @@ mod cache_arena; mod impls; #[cfg(test)] mod test; +use smallvec::SmallVec; +type SmallVec8 = SmallVec<[T; 8]>; pub type CacheArena = cache_arena::CacheArena; pub use crate::cache::TreeHashCache; diff --git a/eth2/utils/cached_tree_hash/src/test.rs b/eth2/utils/cached_tree_hash/src/test.rs index ffd47e2fd9..244439ab30 100644 --- a/eth2/utils/cached_tree_hash/src/test.rs +++ b/eth2/utils/cached_tree_hash/src/test.rs @@ -76,7 +76,7 @@ fn fixed_vector_hash256() { let mut cache = vec.new_tree_hash_cache(arena); assert_eq!( - Hash256::from_slice(&vec.tree_hash_root()), + vec.tree_hash_root(), vec.recalculate_tree_hash_root(arena, &mut cache).unwrap() ); } @@ -90,7 +90,7 @@ fn fixed_vector_u64() { let mut cache = vec.new_tree_hash_cache(arena); assert_eq!( - Hash256::from_slice(&vec.tree_hash_root()), + vec.tree_hash_root(), vec.recalculate_tree_hash_root(arena, &mut cache).unwrap() ); } @@ -104,7 +104,7 @@ fn variable_list_hash256() { let mut cache = list.new_tree_hash_cache(arena); assert_eq!( - Hash256::from_slice(&list.tree_hash_root()), + list.tree_hash_root(), list.recalculate_tree_hash_root(arena, &mut cache).unwrap() ); } diff --git a/eth2/utils/eth2_hashing/src/lib.rs b/eth2/utils/eth2_hashing/src/lib.rs index 44dce67613..e2f29c0364 100644 --- a/eth2/utils/eth2_hashing/src/lib.rs +++ b/eth2/utils/eth2_hashing/src/lib.rs @@ -5,7 +5,7 @@ //! defining it once in this crate makes it easy to replace. #[cfg(not(target_arch = "wasm32"))] -pub use ring::digest::{digest, Context, SHA256}; +pub use ring::digest::{digest, Context, Digest, SHA256}; #[cfg(target_arch = "wasm32")] use sha2::{Digest, Sha256}; diff --git a/eth2/utils/ssz/src/decode/impls.rs b/eth2/utils/ssz/src/decode/impls.rs index 39b7fa3c1a..a33fcac189 100644 --- a/eth2/utils/ssz/src/decode/impls.rs +++ b/eth2/utils/ssz/src/decode/impls.rs @@ -1,6 +1,7 @@ use super::*; use core::num::NonZeroUsize; use ethereum_types::{H256, U128, U256}; +use smallvec::SmallVec; macro_rules! impl_decodable_for_uint { ($type: ident, $bit_size: expr) => { @@ -364,25 +365,39 @@ macro_rules! impl_decodable_for_u8_array { impl_decodable_for_u8_array!(4); impl_decodable_for_u8_array!(32); -impl Decode for Vec { - fn is_ssz_fixed_len() -> bool { - false - } +macro_rules! impl_for_vec { + ($type: ty) => { + impl Decode for $type { + fn is_ssz_fixed_len() -> bool { + false + } - fn from_ssz_bytes(bytes: &[u8]) -> Result { - if bytes.is_empty() { - Ok(vec![]) - } else if T::is_ssz_fixed_len() { - bytes - .chunks(T::ssz_fixed_len()) - .map(|chunk| T::from_ssz_bytes(chunk)) - .collect() - } else { - decode_list_of_variable_length_items(bytes) + fn from_ssz_bytes(bytes: &[u8]) -> Result { + if bytes.is_empty() { + Ok(vec![].into()) + } else if T::is_ssz_fixed_len() { + bytes + .chunks(T::ssz_fixed_len()) + .map(|chunk| T::from_ssz_bytes(chunk)) + .collect() + } else { + decode_list_of_variable_length_items(bytes).map(|vec| vec.into()) + } + } } - } + }; } +impl_for_vec!(Vec); +impl_for_vec!(SmallVec<[T; 1]>); +impl_for_vec!(SmallVec<[T; 2]>); +impl_for_vec!(SmallVec<[T; 3]>); +impl_for_vec!(SmallVec<[T; 4]>); +impl_for_vec!(SmallVec<[T; 5]>); +impl_for_vec!(SmallVec<[T; 6]>); +impl_for_vec!(SmallVec<[T; 7]>); +impl_for_vec!(SmallVec<[T; 8]>); + /// Decodes `bytes` as if it were a list of variable-length items. /// /// The `ssz::SszDecoder` can also perform this functionality, however it it significantly faster diff --git a/eth2/utils/ssz/src/encode/impls.rs b/eth2/utils/ssz/src/encode/impls.rs index ed619cc2b9..c9e9a4797f 100644 --- a/eth2/utils/ssz/src/encode/impls.rs +++ b/eth2/utils/ssz/src/encode/impls.rs @@ -1,6 +1,7 @@ use super::*; use core::num::NonZeroUsize; use ethereum_types::{H256, U128, U256}; +use smallvec::SmallVec; macro_rules! impl_encodable_for_uint { ($type: ident, $bit_size: expr) => { @@ -230,40 +231,54 @@ impl Encode for Option { } } -impl Encode for Vec { - fn is_ssz_fixed_len() -> bool { - false - } - - fn ssz_bytes_len(&self) -> usize { - if ::is_ssz_fixed_len() { - ::ssz_fixed_len() * self.len() - } else { - let mut len = self.iter().map(|item| item.ssz_bytes_len()).sum(); - len += BYTES_PER_LENGTH_OFFSET * self.len(); - len - } - } - - fn ssz_append(&self, buf: &mut Vec) { - if T::is_ssz_fixed_len() { - buf.reserve(T::ssz_fixed_len() * self.len()); - - for item in self { - item.ssz_append(buf); - } - } else { - let mut encoder = SszEncoder::list(buf, self.len() * BYTES_PER_LENGTH_OFFSET); - - for item in self { - encoder.append(item); +macro_rules! impl_for_vec { + ($type: ty) => { + impl Encode for $type { + fn is_ssz_fixed_len() -> bool { + false } - encoder.finalize(); + fn ssz_bytes_len(&self) -> usize { + if ::is_ssz_fixed_len() { + ::ssz_fixed_len() * self.len() + } else { + let mut len = self.iter().map(|item| item.ssz_bytes_len()).sum(); + len += BYTES_PER_LENGTH_OFFSET * self.len(); + len + } + } + + fn ssz_append(&self, buf: &mut Vec) { + if T::is_ssz_fixed_len() { + buf.reserve(T::ssz_fixed_len() * self.len()); + + for item in self { + item.ssz_append(buf); + } + } else { + let mut encoder = SszEncoder::list(buf, self.len() * BYTES_PER_LENGTH_OFFSET); + + for item in self { + encoder.append(item); + } + + encoder.finalize(); + } + } } - } + }; } +impl_for_vec!(Vec); +impl_for_vec!(SmallVec<[T; 1]>); +impl_for_vec!(SmallVec<[T; 2]>); +impl_for_vec!(SmallVec<[T; 3]>); +impl_for_vec!(SmallVec<[T; 4]>); +impl_for_vec!(SmallVec<[T; 5]>); +impl_for_vec!(SmallVec<[T; 6]>); +impl_for_vec!(SmallVec<[T; 7]>); +impl_for_vec!(SmallVec<[T; 8]>); + impl Encode for bool { fn is_ssz_fixed_len() -> bool { true diff --git a/eth2/utils/ssz_types/src/bitfield.rs b/eth2/utils/ssz_types/src/bitfield.rs index d18267ee28..974cdb2281 100644 --- a/eth2/utils/ssz_types/src/bitfield.rs +++ b/eth2/utils/ssz_types/src/bitfield.rs @@ -5,6 +5,7 @@ use serde::de::{Deserialize, Deserializer}; use serde::ser::{Serialize, Serializer}; use serde_hex::{encode as hex_encode, PrefixedHexVisitor}; use ssz::{Decode, Encode}; +use tree_hash::Hash256; use typenum::Unsigned; /// A marker trait applied to `Variable` and `Fixed` that defines the behaviour of a `Bitfield`. @@ -590,7 +591,7 @@ impl tree_hash::TreeHash for Bitfield> { unreachable!("List should never be packed.") } - fn tree_hash_root(&self) -> Vec { + fn tree_hash_root(&self) -> Hash256 { // Note: we use `as_slice` because it does _not_ have the length-delimiting bit set (or // present). let root = bitfield_bytes_tree_hash_root::(self.as_slice()); @@ -611,7 +612,7 @@ impl tree_hash::TreeHash for Bitfield> { unreachable!("Vector should never be packed.") } - fn tree_hash_root(&self) -> Vec { + fn tree_hash_root(&self) -> Hash256 { bitfield_bytes_tree_hash_root::(self.as_slice()) } } diff --git a/eth2/utils/ssz_types/src/fixed_vector.rs b/eth2/utils/ssz_types/src/fixed_vector.rs index eb63d6d39c..91b6912f81 100644 --- a/eth2/utils/ssz_types/src/fixed_vector.rs +++ b/eth2/utils/ssz_types/src/fixed_vector.rs @@ -4,6 +4,7 @@ use serde_derive::{Deserialize, Serialize}; use std::marker::PhantomData; use std::ops::{Deref, Index, IndexMut}; use std::slice::SliceIndex; +use tree_hash::Hash256; use typenum::Unsigned; pub use typenum; @@ -162,7 +163,7 @@ where unreachable!("Vector should never be packed.") } - fn tree_hash_root(&self) -> Vec { + fn tree_hash_root(&self) -> Hash256 { vec_tree_hash_root::(&self.vec) } } @@ -375,24 +376,27 @@ mod test { assert_eq!(fixed.tree_hash_root(), merkle_root(&[0; 32], 0)); let fixed: FixedVector = FixedVector::from(vec![a]); - assert_eq!(fixed.tree_hash_root(), merkle_root(&a.tree_hash_root(), 0)); + assert_eq!( + fixed.tree_hash_root(), + merkle_root(a.tree_hash_root().as_bytes(), 0) + ); let fixed: FixedVector = FixedVector::from(vec![a; 8]); assert_eq!( fixed.tree_hash_root(), - merkle_root(&repeat(&a.tree_hash_root(), 8), 0) + merkle_root(&repeat(a.tree_hash_root().as_bytes(), 8), 0) ); let fixed: FixedVector = FixedVector::from(vec![a; 13]); assert_eq!( fixed.tree_hash_root(), - merkle_root(&repeat(&a.tree_hash_root(), 13), 0) + merkle_root(&repeat(a.tree_hash_root().as_bytes(), 13), 0) ); let fixed: FixedVector = FixedVector::from(vec![a; 16]); assert_eq!( fixed.tree_hash_root(), - merkle_root(&repeat(&a.tree_hash_root(), 16), 0) + merkle_root(&repeat(a.tree_hash_root().as_bytes(), 16), 0) ); } } diff --git a/eth2/utils/ssz_types/src/tree_hash.rs b/eth2/utils/ssz_types/src/tree_hash.rs index 5074034dab..e08c1d62fb 100644 --- a/eth2/utils/ssz_types/src/tree_hash.rs +++ b/eth2/utils/ssz_types/src/tree_hash.rs @@ -1,48 +1,58 @@ -use tree_hash::{merkle_root, TreeHash, TreeHashType, BYTES_PER_CHUNK}; +use tree_hash::{Hash256, MerkleHasher, TreeHash, TreeHashType, BYTES_PER_CHUNK}; use typenum::Unsigned; /// A helper function providing common functionality between the `TreeHash` implementations for /// `FixedVector` and `VariableList`. -pub fn vec_tree_hash_root(vec: &[T]) -> Vec +pub fn vec_tree_hash_root(vec: &[T]) -> Hash256 where T: TreeHash, N: Unsigned, { - let (leaves, minimum_chunk_count) = match T::tree_hash_type() { + match T::tree_hash_type() { TreeHashType::Basic => { - let mut leaves = - Vec::with_capacity((BYTES_PER_CHUNK / T::tree_hash_packing_factor()) * vec.len()); + let mut hasher = MerkleHasher::with_leaves( + (N::to_usize() + T::tree_hash_packing_factor() - 1) / T::tree_hash_packing_factor(), + ); for item in vec { - leaves.append(&mut item.tree_hash_packed_encoding()); + hasher + .write(&item.tree_hash_packed_encoding()) + .expect("ssz_types variable vec should not contain more elements than max"); } - let values_per_chunk = T::tree_hash_packing_factor(); - let minimum_chunk_count = (N::to_usize() + values_per_chunk - 1) / values_per_chunk; - - (leaves, minimum_chunk_count) + hasher + .finish() + .expect("ssz_types variable vec should not have a remaining buffer") } TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => { - let mut leaves = Vec::with_capacity(vec.len() * BYTES_PER_CHUNK); + let mut hasher = MerkleHasher::with_leaves(N::to_usize()); for item in vec { - leaves.append(&mut item.tree_hash_root()) + hasher + .write(item.tree_hash_root().as_bytes()) + .expect("ssz_types vec should not contain more elements than max"); } - let minimum_chunk_count = N::to_usize(); - - (leaves, minimum_chunk_count) + hasher + .finish() + .expect("ssz_types vec should not have a remaining buffer") } - }; - - merkle_root(&leaves, minimum_chunk_count) + } } /// A helper function providing common functionality for finding the Merkle root of some bytes that /// represent a bitfield. -pub fn bitfield_bytes_tree_hash_root(bytes: &[u8]) -> Vec { +pub fn bitfield_bytes_tree_hash_root(bytes: &[u8]) -> Hash256 { let byte_size = (N::to_usize() + 7) / 8; - let minimum_chunk_count = (byte_size + BYTES_PER_CHUNK - 1) / BYTES_PER_CHUNK; + let leaf_count = (byte_size + BYTES_PER_CHUNK - 1) / BYTES_PER_CHUNK; - merkle_root(bytes, minimum_chunk_count) + let mut hasher = MerkleHasher::with_leaves(leaf_count); + + hasher + .write(bytes) + .expect("bitfield should not exceed tree hash leaf limit"); + + hasher + .finish() + .expect("bitfield tree hash buffer should not exceed leaf limit") } diff --git a/eth2/utils/ssz_types/src/variable_list.rs b/eth2/utils/ssz_types/src/variable_list.rs index c033315123..65f72f236d 100644 --- a/eth2/utils/ssz_types/src/variable_list.rs +++ b/eth2/utils/ssz_types/src/variable_list.rs @@ -4,6 +4,7 @@ use serde_derive::{Deserialize, Serialize}; use std::marker::PhantomData; use std::ops::{Deref, DerefMut, Index, IndexMut}; use std::slice::SliceIndex; +use tree_hash::Hash256; use typenum::Unsigned; pub use typenum; @@ -189,7 +190,7 @@ where unreachable!("List should never be packed.") } - fn tree_hash_root(&self) -> Vec { + fn tree_hash_root(&self) -> Hash256 { let root = vec_tree_hash_root::(&self.vec); tree_hash::mix_in_length(&root, self.len()) @@ -318,7 +319,7 @@ mod test { round_trip::>(vec![0; 8].into()); } - fn root_with_length(bytes: &[u8], len: usize) -> Vec { + fn root_with_length(bytes: &[u8], len: usize) -> Hash256 { let root = merkle_root(bytes, 0); tree_hash::mix_in_length(&root, len) } @@ -369,7 +370,7 @@ mod test { output } - fn padded_root_with_length(bytes: &[u8], len: usize, min_nodes: usize) -> Vec { + fn padded_root_with_length(bytes: &[u8], len: usize, min_nodes: usize) -> Hash256 { let root = merkle_root(bytes, min_nodes); tree_hash::mix_in_length(&root, len) } @@ -388,7 +389,7 @@ mod test { let fixed: VariableList = VariableList::from(vec![a; i]); assert_eq!( fixed.tree_hash_root(), - padded_root_with_length(&repeat(&a.tree_hash_root(), i), i, 1), + padded_root_with_length(&repeat(a.tree_hash_root().as_bytes(), i), i, 1), "U1 {}", i ); @@ -398,7 +399,7 @@ mod test { let fixed: VariableList = VariableList::from(vec![a; i]); assert_eq!( fixed.tree_hash_root(), - padded_root_with_length(&repeat(&a.tree_hash_root(), i), i, 8), + padded_root_with_length(&repeat(a.tree_hash_root().as_bytes(), i), i, 8), "U8 {}", i ); @@ -408,7 +409,7 @@ mod test { let fixed: VariableList = VariableList::from(vec![a; i]); assert_eq!( fixed.tree_hash_root(), - padded_root_with_length(&repeat(&a.tree_hash_root(), i), i, 13), + padded_root_with_length(&repeat(a.tree_hash_root().as_bytes(), i), i, 13), "U13 {}", i ); @@ -418,7 +419,7 @@ mod test { let fixed: VariableList = VariableList::from(vec![a; i]); assert_eq!( fixed.tree_hash_root(), - padded_root_with_length(&repeat(&a.tree_hash_root(), i), i, 16), + padded_root_with_length(&repeat(a.tree_hash_root().as_bytes(), i), i, 16), "U16 {}", i ); diff --git a/eth2/utils/tree_hash/Cargo.toml b/eth2/utils/tree_hash/Cargo.toml index 7d48b17072..7ef54a74ca 100644 --- a/eth2/utils/tree_hash/Cargo.toml +++ b/eth2/utils/tree_hash/Cargo.toml @@ -20,3 +20,4 @@ lazy_static = "1.4.0" [dependencies] ethereum-types = "0.8.0" eth2_hashing = "0.1.0" +smallvec = "1.2.0" diff --git a/eth2/utils/tree_hash/src/impls.rs b/eth2/utils/tree_hash/src/impls.rs index 25630cf970..ebd7e1be7b 100644 --- a/eth2/utils/tree_hash/src/impls.rs +++ b/eth2/utils/tree_hash/src/impls.rs @@ -1,6 +1,12 @@ use super::*; use ethereum_types::{H256, U128, U256}; +fn int_to_hash256(int: u64) -> Hash256 { + let mut bytes = [0; HASHSIZE]; + bytes[0..8].copy_from_slice(&int.to_le_bytes()); + Hash256::from_slice(&bytes) +} + macro_rules! impl_for_bitsize { ($type: ident, $bit_size: expr) => { impl TreeHash for $type { @@ -17,8 +23,8 @@ macro_rules! impl_for_bitsize { } #[allow(clippy::cast_lossless)] - fn tree_hash_root(&self) -> Vec { - int_to_bytes32(*self as u64) + fn tree_hash_root(&self) -> Hash256 { + int_to_hash256(*self as u64) } } }; @@ -43,12 +49,13 @@ impl TreeHash for bool { u8::tree_hash_packing_factor() } - fn tree_hash_root(&self) -> Vec { - int_to_bytes32(*self as u64) + fn tree_hash_root(&self) -> Hash256 { + int_to_hash256(*self as u64) } } -macro_rules! impl_for_u8_array { +/// Only valid for byte types less than 32 bytes. +macro_rules! impl_for_lt_32byte_u8_array { ($len: expr) => { impl TreeHash for [u8; $len] { fn tree_hash_type() -> TreeHashType { @@ -63,15 +70,17 @@ macro_rules! impl_for_u8_array { unreachable!("bytesN should never be packed.") } - fn tree_hash_root(&self) -> Vec { - merkle_root(&self[..], 0) + fn tree_hash_root(&self) -> Hash256 { + let mut result = [0; 32]; + result[0..$len].copy_from_slice(&self[..]); + Hash256::from_slice(&result) } } }; } -impl_for_u8_array!(4); -impl_for_u8_array!(32); +impl_for_lt_32byte_u8_array!(4); +impl_for_lt_32byte_u8_array!(32); impl TreeHash for U128 { fn tree_hash_type() -> TreeHashType { @@ -88,8 +97,10 @@ impl TreeHash for U128 { 2 } - fn tree_hash_root(&self) -> Vec { - merkle_root(&self.tree_hash_packed_encoding(), 0) + fn tree_hash_root(&self) -> Hash256 { + let mut result = [0; HASHSIZE]; + self.to_little_endian(&mut result[0..16]); + Hash256::from_slice(&result) } } @@ -108,8 +119,10 @@ impl TreeHash for U256 { 1 } - fn tree_hash_root(&self) -> Vec { - merkle_root(&self.tree_hash_packed_encoding(), 0) + fn tree_hash_root(&self) -> Hash256 { + let mut result = [0; 32]; + self.to_little_endian(&mut result[..]); + Hash256::from_slice(&result) } } @@ -126,18 +139,11 @@ impl TreeHash for H256 { 1 } - fn tree_hash_root(&self) -> Vec { - merkle_root(&self.as_bytes().to_vec(), 0) + fn tree_hash_root(&self) -> Hash256 { + *self } } -/// Returns `int` as little-endian bytes with a length of 32. -fn int_to_bytes32(int: u64) -> Vec { - let mut vec = int.to_le_bytes().to_vec(); - vec.resize(32, 0); - vec -} - #[cfg(test)] mod test { use super::*; @@ -149,22 +155,22 @@ mod test { let false_bytes: Vec = vec![0; 32]; - assert_eq!(true.tree_hash_root(), true_bytes); - assert_eq!(false.tree_hash_root(), false_bytes); + assert_eq!(true.tree_hash_root().as_bytes(), true_bytes.as_slice()); + assert_eq!(false.tree_hash_root().as_bytes(), false_bytes.as_slice()); } #[test] fn int_to_bytes() { - assert_eq!(&int_to_bytes32(0), &[0; 32]); + assert_eq!(int_to_hash256(0).as_bytes(), &[0; 32]); assert_eq!( - &int_to_bytes32(1), + int_to_hash256(1).as_bytes(), &[ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] ); assert_eq!( - &int_to_bytes32(u64::max_value()), + int_to_hash256(u64::max_value()).as_bytes(), &[ 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 diff --git a/eth2/utils/tree_hash/src/lib.rs b/eth2/utils/tree_hash/src/lib.rs index b98559f09a..7008e0068d 100644 --- a/eth2/utils/tree_hash/src/lib.rs +++ b/eth2/utils/tree_hash/src/lib.rs @@ -1,30 +1,80 @@ pub mod impls; +mod merkle_hasher; mod merkleize_padded; mod merkleize_standard; +pub use merkle_hasher::{Error, MerkleHasher}; pub use merkleize_padded::merkleize_padded; pub use merkleize_standard::merkleize_standard; +use eth2_hashing::{Context, SHA256}; +use eth2_hashing::{ZERO_HASHES, ZERO_HASHES_MAX_INDEX}; + pub const BYTES_PER_CHUNK: usize = 32; pub const HASHSIZE: usize = 32; pub const MERKLE_HASH_CHUNK: usize = 2 * BYTES_PER_CHUNK; -/// Alias to `merkleize_padded(&bytes, minimum_chunk_count)` +pub type Hash256 = ethereum_types::H256; + +/// Convenience method for `MerkleHasher` which also provides some fast-paths for small trees. /// -/// If `minimum_chunk_count < bytes / BYTES_PER_CHUNK`, padding will be added for the difference -/// between the two. -pub fn merkle_root(bytes: &[u8], minimum_chunk_count: usize) -> Vec { - merkleize_padded(&bytes, minimum_chunk_count) +/// `minimum_leaf_count` will only be used if it is greater than or equal to the minimum number of leaves that can be created from `bytes`. +pub fn merkle_root(bytes: &[u8], minimum_leaf_count: usize) -> Hash256 { + let leaves = std::cmp::max( + (bytes.len() + (HASHSIZE - 1)) / HASHSIZE, + minimum_leaf_count, + ); + + if leaves == 0 { + // If there are no bytes then the hash is always zero. + Hash256::zero() + } else if leaves == 1 { + // If there is only one leaf, the hash is always those leaf bytes padded out to 32-bytes. + let mut hash = [0; HASHSIZE]; + hash[0..bytes.len()].copy_from_slice(bytes); + Hash256::from_slice(&hash) + } else if leaves == 2 { + // If there are only two leaves (this is common with BLS pubkeys), we can avoid some + // overhead with `MerkleHasher` and just do a simple 3-node tree here. + let mut leaves = [0; HASHSIZE * 2]; + leaves[0..bytes.len()].copy_from_slice(bytes); + + let mut context = Context::new(&SHA256); + context.update(&leaves); + let digest = context.finish(); + + Hash256::from_slice(digest.as_ref()) + } else { + // If there are 3 or more leaves, use `MerkleHasher`. + let mut hasher = MerkleHasher::with_leaves(leaves); + hasher + .write(bytes) + .expect("the number of leaves is adequate for the number of bytes"); + hasher + .finish() + .expect("the number of leaves is adequate for the number of bytes") + } } /// Returns the node created by hashing `root` and `length`. /// /// Used in `TreeHash` for inserting the length of a list above it's root. -pub fn mix_in_length(root: &[u8], length: usize) -> Vec { - let mut length_bytes = length.to_le_bytes().to_vec(); - length_bytes.resize(BYTES_PER_CHUNK, 0); +pub fn mix_in_length(root: &Hash256, length: usize) -> Hash256 { + let usize_len = std::mem::size_of::(); - eth2_hashing::hash32_concat(root, &length_bytes)[..].to_vec() + let mut length_bytes = [0; BYTES_PER_CHUNK]; + length_bytes[0..usize_len].copy_from_slice(&length.to_le_bytes()); + + Hash256::from_slice(ð2_hashing::hash32_concat(root.as_bytes(), &length_bytes)[..]) +} + +/// Returns a cached padding node for a given height. +fn get_zero_hash(height: usize) -> &'static [u8] { + if height <= ZERO_HASHES_MAX_INDEX { + &ZERO_HASHES[height] + } else { + panic!("Tree exceeds MAX_TREE_DEPTH of {}", ZERO_HASHES_MAX_INDEX) + } } #[derive(Debug, PartialEq, Clone)] @@ -42,7 +92,7 @@ pub trait TreeHash { fn tree_hash_packing_factor() -> usize; - fn tree_hash_root(&self) -> Vec; + fn tree_hash_root(&self) -> Hash256; } #[macro_export] @@ -104,6 +154,9 @@ mod test { eth2_hashing::hash(&preimage) }; - assert_eq!(mix_in_length(&[42; BYTES_PER_CHUNK], 42), hash); + assert_eq!( + mix_in_length(&Hash256::from_slice(&[42; BYTES_PER_CHUNK]), 42).as_bytes(), + &hash[..] + ); } } diff --git a/eth2/utils/tree_hash/src/merkle_hasher.rs b/eth2/utils/tree_hash/src/merkle_hasher.rs new file mode 100644 index 0000000000..9c921c0751 --- /dev/null +++ b/eth2/utils/tree_hash/src/merkle_hasher.rs @@ -0,0 +1,575 @@ +use crate::{get_zero_hash, Hash256, HASHSIZE}; +use eth2_hashing::{Context, Digest, SHA256}; +use smallvec::{smallvec, SmallVec}; +use std::mem; + +type SmallVec8 = SmallVec<[T; 8]>; + +#[derive(Clone, Debug, PartialEq)] +pub enum Error { + /// The maximum number of leaves defined by the initialization `depth` has been exceed. + MaximumLeavesExceeded { max_leaves: usize }, +} + +/// Helper struct to store either a hash digest or a slice. +/// +/// Should be used as a left or right value for some node. +enum Preimage<'a> { + Digest(Digest), + Slice(&'a [u8]), +} + +impl<'a> Preimage<'a> { + /// Returns a 32-byte slice. + fn as_bytes(&self) -> &[u8] { + match self { + Preimage::Digest(digest) => digest.as_ref(), + Preimage::Slice(slice) => slice, + } + } +} + +/// A node that has had a left child supplied, but not a right child. +struct HalfNode { + /// The hasher context. + context: Context, + /// The tree id of the node. The root node has in id of `1` and ids increase moving down the + /// tree from left to right. + id: usize, +} + +impl HalfNode { + /// Create a new half-node from the given `left` value. + fn new(id: usize, left: Preimage) -> Self { + let mut context = Context::new(&SHA256); + context.update(left.as_bytes()); + + Self { context, id } + } + + /// Complete the half-node by providing a `right` value. Returns a digest of the left and right + /// nodes. + fn finish(mut self, right: Preimage) -> Digest { + self.context.update(right.as_bytes()); + self.context.finish() + } +} + +/// Provides a Merkle-root hasher that allows for streaming bytes (i.e., providing any-length byte +/// slices without need to separate into leaves). Efficiently handles cases where not all leaves +/// have been provided by assuming all non-provided leaves are `[0; 32]` and pre-computing the +/// zero-value hashes at all depths of the tree. +/// +/// This algorithm aims to allocate as little memory as possible and it does this by "folding" up +/// the tree has each leaf is provided. Consider this step-by-step functional diagram of hashing a +/// tree with depth three: +/// +/// ## Functional Diagram +/// +/// Nodes that are `-` have not been defined and do not occupy memory. Nodes that are `L` are +/// leaves that are provided but are not stored. Nodes that have integers (`1`, `2`) are stored in +/// our struct. Finally, nodes that are `X` were stored, but are now removed. +/// +/// ### Start +/// +/// ```ignore +/// - +/// / \ +/// - - +/// / \ / \ +/// - - - - +/// ``` +/// +/// ### Provide first leaf +/// +/// ```ignore +/// - +/// / \ +/// 2 - +/// / \ / \ +/// L - - - +/// ``` +/// +/// ### Provide second leaf +/// +/// ```ignore +/// 1 +/// / \ +/// X - +/// / \ / \ +/// L L - - +/// ``` +/// +/// ### Provide third leaf +/// +/// ```ignore +/// 1 +/// / \ +/// X 3 +/// / \ / \ +/// L L L - +/// ``` +/// +/// ### Provide fourth and final leaf +/// +/// ```ignore +/// 1 +/// / \ +/// X X +/// / \ / \ +/// L L L L +/// ``` +/// +pub struct MerkleHasher { + /// Stores the nodes that are half-complete and awaiting a right node. + /// + /// A smallvec of size 8 means we can hash a tree with 256 leaves without allocating on the + /// heap. Each half-node is 224 bytes, so this smallvec may store 1,792 bytes on the stack. + half_nodes: SmallVec8, + /// The depth of the tree that will be produced. + /// + /// Depth is counted top-down (i.e., the root node is at depth 0). A tree with 1 leaf has a + /// depth of 1, a tree with 4 leaves has a depth of 3. + depth: usize, + /// The next leaf that we are expecting to process. + next_leaf: usize, + /// A buffer of bytes that are waiting to be written to a leaf. + buffer: SmallVec<[u8; 32]>, + /// Set to Some(root) when the root of the tree is known. + root: Option, +} + +/// Returns the parent of node with id `i`. +fn get_parent(i: usize) -> usize { + i / 2 +} + +/// Gets the depth of a node with an id of `i`. +/// +/// It is a logic error to provide `i == 0`. +/// +/// E.g., if `i` is 1, depth is 0. If `i` is is 1, depth is 1. +fn get_depth(i: usize) -> usize { + let total_bits = mem::size_of::() * 8; + total_bits - i.leading_zeros() as usize - 1 +} + +impl MerkleHasher { + /// Instantiate a hasher for a tree with a given number of leaves. + /// + /// `num_leaves` will be rounded to the next power of two. E.g., if `num_leaves == 6`, then the + /// tree will _actually_ be able to accomodate 8 leaves and the resulting hasher is exactly the + /// same as one that was instantiated with `Self::with_leaves(8)`. + /// + /// ## Notes + /// + /// If `num_leaves == 0`, a tree of depth 1 will be created. If no leaves are provided it will + /// return a root of `[0; 32]`. + pub fn with_leaves(num_leaves: usize) -> Self { + let depth = get_depth(num_leaves.next_power_of_two()) + 1; + Self::with_depth(depth) + } + + /// Instantiates a new, empty hasher for a tree with `depth` layers which will have capacity + /// for `1 << (depth - 1)` leaf nodes. + /// + /// It is not possible to grow the depth of the tree after instantiation. + /// + /// ## Panics + /// + /// Panics if `depth == 0`. + fn with_depth(depth: usize) -> Self { + assert!(depth > 0, "merkle tree cannot have a depth of zero"); + + Self { + half_nodes: SmallVec::with_capacity(depth - 1), + depth, + next_leaf: 1 << (depth - 1), + buffer: SmallVec::with_capacity(32), + root: None, + } + } + + /// Write some bytes to the hasher. + /// + /// ## Errors + /// + /// Returns an error if the given bytes would create a leaf that would exceed the maximum + /// permissible number of leaves defined by the initialization `depth`. E.g., a tree of `depth + /// == 2` can only accept 2 leaves. A tree of `depth == 14` can only accept 8,192 leaves. + pub fn write(&mut self, bytes: &[u8]) -> Result<(), Error> { + let mut ptr = 0; + while ptr <= bytes.len() { + let slice = &bytes[ptr..std::cmp::min(bytes.len(), ptr + HASHSIZE)]; + + if self.buffer.is_empty() && slice.len() == HASHSIZE { + self.process_leaf(slice)?; + ptr += HASHSIZE + } else if self.buffer.len() + slice.len() < HASHSIZE { + self.buffer.extend_from_slice(slice); + ptr += HASHSIZE + } else { + let buf_len = self.buffer.len(); + let required = HASHSIZE - buf_len; + + let mut leaf = [0; HASHSIZE]; + leaf[..buf_len].copy_from_slice(&self.buffer); + leaf[buf_len..].copy_from_slice(&slice[0..required]); + + self.process_leaf(&leaf)?; + self.buffer = smallvec![]; + + ptr += required + } + } + + Ok(()) + } + + /// Process the next leaf in the tree. + /// + /// ## Errors + /// + /// Returns an error if the given leaf would exceed the maximum permissible number of leaves + /// defined by the initialization `depth`. E.g., a tree of `depth == 2` can only accept 2 + /// leaves. A tree of `depth == 14` can only accept 8,192 leaves. + fn process_leaf(&mut self, leaf: &[u8]) -> Result<(), Error> { + assert_eq!(leaf.len(), HASHSIZE, "a leaf must be 32 bytes"); + + let max_leaves = 1 << (self.depth + 1); + + if self.next_leaf > max_leaves { + return Err(Error::MaximumLeavesExceeded { max_leaves }); + } else if self.next_leaf == 1 { + // A tree of depth one has a root that is equal to the first given leaf. + self.root = Some(Hash256::from_slice(leaf)) + } else if self.next_leaf % 2 == 0 { + self.process_left_node(self.next_leaf, Preimage::Slice(leaf)) + } else { + self.process_right_node(self.next_leaf, Preimage::Slice(leaf)) + } + + self.next_leaf += 1; + + Ok(()) + } + + /// Returns the root of the Merkle tree. + /// + /// If not all leaves have been provided, the tree will be efficiently completed under the + /// assumption that all not-yet-provided leaves are equal to `[0; 32]`. + /// + /// ## Errors + /// + /// Returns an error if the bytes remaining in the buffer would create a leaf that would exceed + /// the maximum permissible number of leaves defined by the initialization `depth`. + pub fn finish(mut self) -> Result { + if !self.buffer.is_empty() { + let mut leaf = [0; HASHSIZE]; + leaf[..self.buffer.len()].copy_from_slice(&self.buffer); + self.process_leaf(&leaf)? + } + + // If the tree is incomplete, we must complete it by providing zero-hashes. + loop { + if let Some(root) = self.root { + break Ok(root); + } else { + if let Some(node) = self.half_nodes.last() { + let right_child = node.id * 2 + 1; + self.process_right_node(right_child, self.zero_hash(right_child)); + } else if self.next_leaf == 1 { + // The next_leaf can only be 1 if the tree has a depth of one. If have been no + // leaves supplied, assume a root of zero. + break Ok(Hash256::zero()); + } else { + // The only scenario where there are (a) no half nodes and (b) a tree of depth + // two or more is where no leaves have been supplied at all. + // + // Once we supply this first zero-hash leaf then all future operations will be + // triggered via the `process_right_node` branch. + self.process_left_node(self.next_leaf, self.zero_hash(self.next_leaf)) + } + } + } + } + + /// Process a node that will become the left-hand node of some parent. The supplied `id` is + /// that of the node (not the parent). The `preimage` is the value of the node (i.e., if this + /// is a leaf node it will be the value of that leaf). + /// + /// In this scenario, the only option is to push a new half-node. + fn process_left_node(&mut self, id: usize, preimage: Preimage) { + self.half_nodes + .push(HalfNode::new(get_parent(id), preimage)) + } + + /// Process a node that will become the right-hand node of some parent. The supplied `id` is + /// that of the node (not the parent). The `preimage` is the value of the node (i.e., if this + /// is a leaf node it will be the value of that leaf). + /// + /// This operation will always complete one node, then it will attempt to crawl up the tree and + /// collapse and other viable nodes. For example, consider a tree of depth 3 (see diagram + /// below). When providing the node with id `7`, the node with id `3` will be completed which + /// will also provide the right-node for the `1` node. This function will complete both of + /// those nodes and ultimately find the root of the tree. + /// + /// ```ignore + /// 1 <-- completed + /// / \ + /// 2 3 <-- completed + /// / \ / \ + /// 4 5 6 7 <-- supplied right node + /// ``` + fn process_right_node(&mut self, id: usize, mut preimage: Preimage) { + let mut parent = get_parent(id); + + loop { + match self.half_nodes.last() { + Some(node) if node.id == parent => { + preimage = Preimage::Digest( + self.half_nodes + .pop() + .expect("if .last() is Some then .pop() must succeed") + .finish(preimage), + ); + if parent == 1 { + self.root = Some(Hash256::from_slice(preimage.as_bytes())); + break; + } else { + parent = get_parent(parent); + } + } + _ => { + self.half_nodes.push(HalfNode::new(parent, preimage)); + break; + } + } + } + } + + /// Returns a "zero hash" from a pre-computed set for the given node. + /// + /// Note: this node is not always zero, instead it is the result of hashing up a tree where the + /// leaves are all zeros. E.g., in a tree of depth 2, the `zero_hash` of a node at depth 1 + /// will be `[0; 32]`. However, the `zero_hash` for a node at depth 0 will be + /// `hash(concat([0; 32], [0; 32])))`. + fn zero_hash(&self, id: usize) -> Preimage<'static> { + Preimage::Slice(get_zero_hash(self.depth - (get_depth(id) + 1))) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::merkleize_padded; + + /// This test is just to ensure that the stack size of the `Context` remains the same. We choose + /// our smallvec size based upon this, so it's good to know if it suddenly changes in size. + #[test] + fn context_size() { + assert_eq!( + mem::size_of::(), + 216 + 8, + "Halfnode size should be as expected" + ); + } + + fn compare_with_reference(leaves: &[Hash256], depth: usize) { + let reference_bytes = leaves + .iter() + .map(|hash| hash.as_bytes().to_vec()) + .flatten() + .collect::>(); + + let reference_root = merkleize_padded(&reference_bytes, 1 << (depth - 1)); + + let merklizer_root_32_bytes = { + let mut m = MerkleHasher::with_depth(depth); + for leaf in leaves.iter() { + m.write(leaf.as_bytes()).expect("should process leaf"); + } + m.finish().expect("should finish") + }; + + assert_eq!( + reference_root, merklizer_root_32_bytes, + "32 bytes should match reference root" + ); + + let merklizer_root_individual_3_bytes = { + let mut m = MerkleHasher::with_depth(depth); + for bytes in reference_bytes.clone().chunks(3) { + m.write(bytes).expect("should process byte"); + } + m.finish().expect("should finish") + }; + + assert_eq!( + reference_root, merklizer_root_individual_3_bytes, + "3 bytes should match reference root" + ); + + let merklizer_root_individual_single_bytes = { + let mut m = MerkleHasher::with_depth(depth); + for byte in reference_bytes.iter() { + m.write(&[*byte]).expect("should process byte"); + } + m.finish().expect("should finish") + }; + + assert_eq!( + reference_root, merklizer_root_individual_single_bytes, + "single bytes should match reference root" + ); + } + + /// A simple wrapper to compare MerkleHasher to the reference function by just giving a number + /// of leaves and a depth. + fn compare_reference_with_len(leaves: u64, depth: usize) { + let leaves = (0..leaves) + .map(|i| Hash256::from_low_u64_be(i)) + .collect::>(); + compare_with_reference(&leaves, depth) + } + + /// Compares the `MerkleHasher::with_depth` and `MerkleHasher::with_leaves` generate consistent + /// results. + fn compare_new_with_leaf_count(num_leaves: u64, depth: usize) { + let leaves = (0..num_leaves) + .map(|i| Hash256::from_low_u64_be(i)) + .collect::>(); + + let from_depth = { + let mut m = MerkleHasher::with_depth(depth); + for leaf in leaves.iter() { + m.write(leaf.as_bytes()).expect("should process leaf"); + } + m.finish() + }; + + let from_num_leaves = { + let mut m = MerkleHasher::with_leaves(num_leaves as usize); + for leaf in leaves.iter() { + m.process_leaf(leaf.as_bytes()) + .expect("should process leaf"); + } + m.finish() + }; + + assert_eq!( + from_depth, from_num_leaves, + "hash generated by depth should match that from num leaves" + ); + } + + #[test] + fn with_leaves() { + compare_new_with_leaf_count(1, 1); + compare_new_with_leaf_count(2, 2); + compare_new_with_leaf_count(3, 3); + compare_new_with_leaf_count(4, 3); + compare_new_with_leaf_count(5, 4); + compare_new_with_leaf_count(6, 4); + compare_new_with_leaf_count(7, 4); + compare_new_with_leaf_count(8, 4); + compare_new_with_leaf_count(9, 5); + compare_new_with_leaf_count(10, 5); + compare_new_with_leaf_count(11, 5); + compare_new_with_leaf_count(12, 5); + compare_new_with_leaf_count(13, 5); + compare_new_with_leaf_count(14, 5); + compare_new_with_leaf_count(15, 5); + } + + #[test] + fn depth() { + assert_eq!(get_depth(1), 0); + assert_eq!(get_depth(2), 1); + assert_eq!(get_depth(3), 1); + assert_eq!(get_depth(4), 2); + assert_eq!(get_depth(5), 2); + assert_eq!(get_depth(6), 2); + assert_eq!(get_depth(7), 2); + assert_eq!(get_depth(8), 3); + } + + #[test] + fn with_0_leaves() { + let hasher = MerkleHasher::with_leaves(0); + assert_eq!(hasher.finish().unwrap(), Hash256::zero()); + } + + #[test] + #[should_panic] + fn too_many_leaves() { + compare_reference_with_len(2, 1); + } + + #[test] + fn full_trees() { + compare_reference_with_len(1, 1); + compare_reference_with_len(2, 2); + compare_reference_with_len(4, 3); + compare_reference_with_len(8, 4); + compare_reference_with_len(16, 5); + compare_reference_with_len(32, 6); + compare_reference_with_len(64, 7); + compare_reference_with_len(128, 8); + compare_reference_with_len(256, 9); + compare_reference_with_len(256, 9); + compare_reference_with_len(8192, 14); + } + + #[test] + fn incomplete_trees() { + compare_reference_with_len(0, 1); + + compare_reference_with_len(0, 2); + compare_reference_with_len(1, 2); + + for i in 0..=4 { + compare_reference_with_len(i, 3); + } + + for i in 0..=7 { + compare_reference_with_len(i, 4); + } + + for i in 0..=15 { + compare_reference_with_len(i, 5); + } + + for i in 0..=32 { + compare_reference_with_len(i, 6); + } + + for i in 0..=64 { + compare_reference_with_len(i, 7); + } + + compare_reference_with_len(0, 14); + compare_reference_with_len(13, 14); + compare_reference_with_len(8191, 14); + } + + #[test] + fn remaining_buffer() { + let a = { + let mut m = MerkleHasher::with_leaves(2); + m.write(&[1]).expect("should write"); + m.finish().expect("should finish") + }; + + let b = { + let mut m = MerkleHasher::with_leaves(2); + let mut leaf = vec![1]; + leaf.extend_from_slice(&[0; 31]); + m.write(&leaf).expect("should write"); + m.write(&[0; 32]).expect("should write"); + m.finish().expect("should finish") + }; + + assert_eq!(a, b, "should complete buffer"); + } +} diff --git a/eth2/utils/tree_hash/src/merkleize_padded.rs b/eth2/utils/tree_hash/src/merkleize_padded.rs index 095179799a..18beb53629 100644 --- a/eth2/utils/tree_hash/src/merkleize_padded.rs +++ b/eth2/utils/tree_hash/src/merkleize_padded.rs @@ -1,14 +1,12 @@ -use super::BYTES_PER_CHUNK; -use eth2_hashing::{hash, hash32_concat, ZERO_HASHES, ZERO_HASHES_MAX_INDEX}; - -/// The size of the cache that stores padding nodes for a given height. -/// -/// Currently, we panic if we encounter a tree with a height larger than `MAX_TREE_DEPTH`. -pub const MAX_TREE_DEPTH: usize = ZERO_HASHES_MAX_INDEX; +use super::{get_zero_hash, Hash256, BYTES_PER_CHUNK}; +use eth2_hashing::{hash, hash32_concat}; /// Merkleize `bytes` and return the root, optionally padding the tree out to `min_leaves` number of /// leaves. /// +/// **Note**: This function is generally worse than using the `crate::merkle_root` which uses +/// `MerkleHasher`. We only keep this function around for reference testing. +/// /// First all nodes are extracted from `bytes` and then a padding node is added until the number of /// leaf chunks is greater than or equal to `min_leaves`. Callers may set `min_leaves` to `0` if no /// adding additional chunks should be added to the given `bytes`. @@ -34,12 +32,12 @@ pub const MAX_TREE_DEPTH: usize = ZERO_HASHES_MAX_INDEX; /// /// _Note: there are some minor memory overheads, including a handful of usizes and a list of /// `MAX_TREE_DEPTH` hashes as `lazy_static` constants._ -pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Vec { +pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Hash256 { // If the bytes are just one chunk or less, pad to one chunk and return without hashing. if bytes.len() <= BYTES_PER_CHUNK && min_leaves <= 1 { let mut o = bytes.to_vec(); o.resize(BYTES_PER_CHUNK, 0); - return o; + return Hash256::from_slice(&o); } assert!( @@ -157,7 +155,7 @@ pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Vec { assert_eq!(root.len(), BYTES_PER_CHUNK, "Only one chunk should remain"); - root + Hash256::from_slice(&root) } /// A helper struct for storing words of `BYTES_PER_CHUNK` size in a flat byte array. @@ -212,15 +210,6 @@ impl ChunkStore { } } -/// Returns a cached padding node for a given height. -fn get_zero_hash(height: usize) -> &'static [u8] { - if height <= MAX_TREE_DEPTH { - &ZERO_HASHES[height] - } else { - panic!("Tree exceeds MAX_TREE_DEPTH of {}", MAX_TREE_DEPTH) - } -} - /// Returns the next even number following `n`. If `n` is even, `n` is returned. fn next_even_number(n: usize) -> usize { n + n % 2 @@ -229,9 +218,10 @@ fn next_even_number(n: usize) -> usize { #[cfg(test)] mod test { use super::*; + use crate::ZERO_HASHES_MAX_INDEX; - pub fn reference_root(bytes: &[u8]) -> Vec { - crate::merkleize_standard(&bytes)[0..32].to_vec() + pub fn reference_root(bytes: &[u8]) -> Hash256 { + crate::merkleize_standard(&bytes) } macro_rules! common_tests { @@ -288,10 +278,10 @@ mod test { #[test] fn max_tree_depth_min_nodes() { let input = vec![0; 10 * BYTES_PER_CHUNK]; - let min_nodes = 2usize.pow(MAX_TREE_DEPTH as u32); + let min_nodes = 2usize.pow(ZERO_HASHES_MAX_INDEX as u32); assert_eq!( - merkleize_padded(&input, min_nodes), - get_zero_hash(MAX_TREE_DEPTH) + merkleize_padded(&input, min_nodes).as_bytes(), + get_zero_hash(ZERO_HASHES_MAX_INDEX) ); } }; diff --git a/eth2/utils/tree_hash/src/merkleize_standard.rs b/eth2/utils/tree_hash/src/merkleize_standard.rs index 55cab85d21..6dd046991e 100644 --- a/eth2/utils/tree_hash/src/merkleize_standard.rs +++ b/eth2/utils/tree_hash/src/merkleize_standard.rs @@ -4,9 +4,10 @@ use eth2_hashing::hash; /// Merkleizes bytes and returns the root, using a simple algorithm that does not optimize to avoid /// processing or storing padding bytes. /// -/// The input `bytes` will be padded to ensure that the number of leaves is a power-of-two. +/// **Note**: This function is generally worse than using the `crate::merkle_root` which uses +/// `MerkleHasher`. We only keep this function around for reference testing. /// -/// It is likely a better choice to use [merkleize_padded](fn.merkleize_padded.html) instead. +/// The input `bytes` will be padded to ensure that the number of leaves is a power-of-two. /// /// ## CPU Performance /// @@ -17,12 +18,12 @@ use eth2_hashing::hash; /// - Duplicates the input `bytes`. /// - Stores all internal nodes, even if they are padding. /// - Does not free up unused memory during operation. -pub fn merkleize_standard(bytes: &[u8]) -> Vec { +pub fn merkleize_standard(bytes: &[u8]) -> Hash256 { // If the bytes are just one chunk (or less than one chunk) just return them. if bytes.len() <= HASHSIZE { let mut o = bytes.to_vec(); o.resize(HASHSIZE, 0); - return o; + return Hash256::from_slice(&o[0..HASHSIZE]); } let leaves = num_sanitized_leaves(bytes.len()); @@ -67,7 +68,7 @@ pub fn merkleize_standard(bytes: &[u8]) -> Vec { o[j..j + HASHSIZE].copy_from_slice(&hash); } - o + Hash256::from_slice(&o[0..HASHSIZE]) } fn num_sanitized_leaves(num_bytes: usize) -> usize { diff --git a/eth2/utils/tree_hash_derive/src/lib.rs b/eth2/utils/tree_hash_derive/src/lib.rs index 48fe751a25..e233e4ed57 100644 --- a/eth2/utils/tree_hash_derive/src/lib.rs +++ b/eth2/utils/tree_hash_derive/src/lib.rs @@ -93,6 +93,7 @@ pub fn tree_hash_derive(input: TokenStream) -> TokenStream { }; let idents = get_hashable_fields(&struct_data); + let num_leaves = idents.len(); let output = quote! { impl #impl_generics tree_hash::TreeHash for #name #ty_generics #where_clause { @@ -108,14 +109,15 @@ pub fn tree_hash_derive(input: TokenStream) -> TokenStream { unreachable!("Struct should never be packed.") } - fn tree_hash_root(&self) -> Vec { - let mut leaves = Vec::with_capacity(4 * tree_hash::HASHSIZE); + fn tree_hash_root(&self) -> tree_hash::Hash256 { + let mut hasher = tree_hash::MerkleHasher::with_leaves(#num_leaves); #( - leaves.append(&mut self.#idents.tree_hash_root()); + hasher.write(self.#idents.tree_hash_root().as_bytes()) + .expect("tree hash derive should not apply too many leaves"); )* - tree_hash::merkle_root(&leaves, 0) + hasher.finish().expect("tree hash derive should not have a remaining buffer") } } }; diff --git a/tests/ef_tests/src/cases/common.rs b/tests/ef_tests/src/cases/common.rs index 8e787f157c..e648ef6ec5 100644 --- a/tests/ef_tests/src/cases/common.rs +++ b/tests/ef_tests/src/cases/common.rs @@ -50,7 +50,7 @@ macro_rules! uint_wrapper { <$wrapped_type>::tree_hash_packing_factor() } - fn tree_hash_root(&self) -> Vec { + fn tree_hash_root(&self) -> tree_hash::Hash256 { self.x.tree_hash_root() } } diff --git a/tests/ef_tests/src/cases/ssz_generic.rs b/tests/ef_tests/src/cases/ssz_generic.rs index 7aa198beae..3a7131bbe0 100644 --- a/tests/ef_tests/src/cases/ssz_generic.rs +++ b/tests/ef_tests/src/cases/ssz_generic.rs @@ -218,7 +218,7 @@ fn ssz_generic_test(path: &Path) -> Result<(), Error> { check_serialization(&value, &serialized)?; if let Some(ref meta) = meta { - check_tree_hash(&meta.root, &value.tree_hash_root())?; + check_tree_hash(&meta.root, value.tree_hash_root().as_bytes())?; } } // Invalid diff --git a/tests/ef_tests/src/cases/ssz_static.rs b/tests/ef_tests/src/cases/ssz_static.rs index 338ea26451..88afea770a 100644 --- a/tests/ef_tests/src/cases/ssz_static.rs +++ b/tests/ef_tests/src/cases/ssz_static.rs @@ -82,7 +82,7 @@ pub fn check_tree_hash(expected_str: &str, actual_root: &[u8]) -> Result<(), Err impl Case for SszStatic { fn result(&self, _case_index: usize) -> Result<(), Error> { check_serialization(&self.value, &self.serialized)?; - check_tree_hash(&self.roots.root, &self.value.tree_hash_root())?; + check_tree_hash(&self.roots.root, self.value.tree_hash_root().as_bytes())?; Ok(()) } } @@ -90,7 +90,7 @@ impl Case for SszStatic { impl, C: Debug + Sync> Case for SszStaticTHC { fn result(&self, _case_index: usize) -> Result<(), Error> { check_serialization(&self.value, &self.serialized)?; - check_tree_hash(&self.roots.root, &self.value.tree_hash_root())?; + check_tree_hash(&self.roots.root, self.value.tree_hash_root().as_bytes())?; let arena = &mut CacheArena::default(); let mut cache = self.value.new_tree_hash_cache(arena);