From c1c37098d7be0d8697e4f22d4766b1282a48c076 Mon Sep 17 00:00:00 2001 From: mjkeating Date: Wed, 28 Nov 2018 12:13:25 -0800 Subject: [PATCH 01/20] tree_hash WIP --- beacon_chain/utils/ssz/src/tree_hash.rs | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 beacon_chain/utils/ssz/src/tree_hash.rs diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs new file mode 100644 index 0000000000..41d7a9e4d6 --- /dev/null +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -0,0 +1,36 @@ +extern crate blake2_rfc; + +use self::blake2_rfc::blake2s::blake2s; + +/** + * Extends data length to a power of 2 by minimally right-zero-padding + */ +fn extend_to_power_of_2(data: &mut Vec) { + let len = data.len(); + let new_len = len.next_power_of_two(); + if new_len > len { + data.append(&mut vec![0; new_len - len]); + } +} + +fn hash(data: Vec) -> Vec { + let result = blake2s(32, &[], data.as_slice()); + result.as_bytes().to_vec() +} + +// fn list_to_glob() {} +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extend_to_power_of_2() { + let mut data = vec![1, 2, 3, 4, 5]; + + // an array length of 5 should be extended to + // a length of 8 (the next power of 2) by right + // padding it with 3 zeros + extend_to_power_of_2(&mut data); + assert_eq!(data, [1, 2, 3, 4, 5, 0, 0, 0]); + } +} From 56b1639f10ab38be4745c6667499c03eb356b7ab Mon Sep 17 00:00:00 2001 From: mjkeating Date: Tue, 4 Dec 2018 12:37:12 -0800 Subject: [PATCH 02/20] WIP for tree_hash --- beacon_chain/types/src/shard_and_committee.rs | 33 ++++ beacon_chain/types/src/validator_record.rs | 50 ++++++ beacon_chain/utils/ssz/Cargo.toml | 1 + beacon_chain/utils/ssz/src/impl_encode.rs | 18 ++- beacon_chain/utils/ssz/src/impl_tree_hash.rs | 67 ++++++++ beacon_chain/utils/ssz/src/lib.rs | 3 + beacon_chain/utils/ssz/src/tree_hash.rs | 147 ++++++++++++++++-- 7 files changed, 305 insertions(+), 14 deletions(-) create mode 100644 beacon_chain/utils/ssz/src/impl_tree_hash.rs diff --git a/beacon_chain/types/src/shard_and_committee.rs b/beacon_chain/types/src/shard_and_committee.rs index 44c2e57ffb..8388b9cbc2 100644 --- a/beacon_chain/types/src/shard_and_committee.rs +++ b/beacon_chain/types/src/shard_and_committee.rs @@ -1,3 +1,5 @@ +use super::ssz::{merkle_hash, TreeHash}; + #[derive(Clone, Debug, PartialEq)] pub struct ShardAndCommittee { pub shard: u16, @@ -15,6 +17,26 @@ impl ShardAndCommittee { } } +impl TreeHash for ShardAndCommittee { + // python sample code: + // def hash_shard_and_committee(val): + // committee = merkle_hash([x.to_bytes(3, 'big') for x in val.committee]) + // return hash(val.shard_id.to_bytes(2, 'big') + committee) + fn tree_hash(&self) -> Vec { + let mut committee_ssz_items = Vec::new(); + for c in &self.committee { + let mut h = (*c as u32).tree_hash(); + h.resize(3, 0); + committee_ssz_items.push(h); + } + let mut result = Vec::new(); + result.append(&mut self.shard.tree_hash()); + result.append(&mut merkle_hash(&mut committee_ssz_items)); + + result.tree_hash() + } +} + #[cfg(test)] mod tests { use super::*; @@ -25,4 +47,15 @@ mod tests { assert_eq!(s.shard, 0); assert_eq!(s.committee.len(), 0); } + + #[test] + fn test_shard_and_committee_tree_hash() { + let s = ShardAndCommittee { + shard: 1, + committee: vec![1, 2, 3], + }; + + // should test a known hash value + assert_eq!(s.tree_hash().len(), 32); + } } diff --git a/beacon_chain/types/src/validator_record.rs b/beacon_chain/types/src/validator_record.rs index 3a15baeecb..3d4a57e201 100644 --- a/beacon_chain/types/src/validator_record.rs +++ b/beacon_chain/types/src/validator_record.rs @@ -1,4 +1,5 @@ use super::bls::{Keypair, PublicKey}; +use super::ssz::TreeHash; use super::{Address, Hash256}; #[derive(Debug, PartialEq, Clone, Copy)] @@ -44,6 +45,46 @@ impl ValidatorRecord { } } +impl TreeHash for ValidatorRecord { + /* python sample code: + def hash_validator_record(val): + return hash(val.pubkey.to_bytes(32, 'big') + val.withdrawal_shard.to_bytes(2, 'big') + \ + val.withdrawal_address + val.randao_commitment + val.balance.to_bytes(16, 'big') + \ + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big')) + */ + fn tree_hash(&self) -> Vec { + // the serialized fields, to be hashed, should add up to 118 bytes in length. + // allocating it once here + let mut ssz = Vec::with_capacity(118); + + // "val.pubkey.to_bytes(32, 'big')" logic + // TODO: + // probably all kinds of wrong here. Not sure how to convert (szz) + // pubkey into a big-endian 32 byte array. Note: as_bytes(), the only method on + // PublicKey, returns a 192 byte array. + let pub_key_bytes = &mut self.pubkey.as_bytes(); + pub_key_bytes.resize(32, 0); + ssz.append(pub_key_bytes); + + ssz.append(&mut self.withdrawal_shard.tree_hash()); + ssz.append(&mut self.withdrawal_address.tree_hash()); + ssz.append(&mut self.randao_commitment.tree_hash()); + + // balance is a 64bit number that serializes to 8 bytes. + // Right padding here to resize to 16 bytes - not sure why + // a 16 byte array is implemented in the python code: "val.balance.to_bytes(16, 'big')" + let mut balance = self.balance.tree_hash(); + balance.resize(16, 0); + ssz.append(&mut balance); + + // TODO: + // ... + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big') + // Our ValidatorRecord seems to be missing the start_dynasty and end_dynasty fields + + ssz.tree_hash() + } +} + #[cfg(test)] mod tests { use super::*; @@ -59,4 +100,13 @@ mod tests { assert_eq!(v.status, 0); assert_eq!(v.exit_slot, 0); } + + #[test] + fn test_validator_record_ree_hash() { + let (v, _kp) = ValidatorRecord::zero_with_thread_rand_keypair(); + let h = v.tree_hash(); + + // TODO: should check a known hash result value + assert_eq!(h.len(), 32); + } } diff --git a/beacon_chain/utils/ssz/Cargo.toml b/beacon_chain/utils/ssz/Cargo.toml index aa4dc5d72c..ec91009274 100644 --- a/beacon_chain/utils/ssz/Cargo.toml +++ b/beacon_chain/utils/ssz/Cargo.toml @@ -6,3 +6,4 @@ authors = ["Paul Hauner "] [dependencies] bytes = "0.4.9" ethereum-types = "0.4.0" +blake2-rfc = "0.2.18" \ No newline at end of file diff --git a/beacon_chain/utils/ssz/src/impl_encode.rs b/beacon_chain/utils/ssz/src/impl_encode.rs index 3f366bdf3a..c9ca8b0064 100644 --- a/beacon_chain/utils/ssz/src/impl_encode.rs +++ b/beacon_chain/utils/ssz/src/impl_encode.rs @@ -1,7 +1,7 @@ extern crate bytes; use self::bytes::{BufMut, BytesMut}; -use super::ethereum_types::H256; +use super::ethereum_types::{Address, H256}; use super::{Encodable, SszStream}; /* @@ -48,10 +48,18 @@ impl_encodable_for_uint!(usize, 64); impl Encodable for H256 { fn ssz_append(&self, s: &mut SszStream) { + assert_eq!(32, self.len()); s.append_encoded_raw(&self.to_vec()); } } +impl Encodable for Address { + fn ssz_append(&self, s: &mut SszStream) { + assert_eq!(20, self.len()); + s.append_encoded_raw(&self) + } +} + #[cfg(test)] mod tests { use super::*; @@ -64,6 +72,14 @@ mod tests { assert_eq!(ssz.drain(), vec![0; 32]); } + #[test] + fn test_ssz_encode_adress() { + let h = Address::zero(); + let mut ssz = SszStream::new(); + ssz.append(&h); + assert_eq!(ssz.drain(), vec![0; 20]); + } + #[test] fn test_ssz_encode_u8() { let x: u8 = 0; diff --git a/beacon_chain/utils/ssz/src/impl_tree_hash.rs b/beacon_chain/utils/ssz/src/impl_tree_hash.rs new file mode 100644 index 0000000000..c3fccb2bc5 --- /dev/null +++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs @@ -0,0 +1,67 @@ +extern crate blake2_rfc; + +use self::blake2_rfc::blake2b::blake2b; + +use super::ethereum_types::{Address, H256}; +use super::{ssz_encode, TreeHash}; + +// I haven't added tests for tree_hash implementations that simply pass +// thru to the szz_encode lib for which tests already exist. Do we want +// test anyway? + +impl TreeHash for u8 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for u16 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for u32 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for u64 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for Address { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for H256 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +// hash byte arrays +impl TreeHash for [u8] { + fn tree_hash(&self) -> Vec { + hash(&self) + } +} + +/** + * From the Spec: + * We define hash(x) as BLAKE2b-512(x)[0:32] + * From the python sample code: + * return blake2b(x).digest()[:32] + * + * This was orginally writting for blake2s before it was changed to blake2b + * Perhaps, we should be using 'canonical_hash' in the hashing lib? + */ +fn hash(data: &[u8]) -> Vec { + let result = blake2b(32, &[], &data); + result.as_bytes().to_vec() +} diff --git a/beacon_chain/utils/ssz/src/lib.rs b/beacon_chain/utils/ssz/src/lib.rs index f3a195e427..9f71e36f19 100644 --- a/beacon_chain/utils/ssz/src/lib.rs +++ b/beacon_chain/utils/ssz/src/lib.rs @@ -12,12 +12,15 @@ extern crate ethereum_types; pub mod decode; pub mod encode; +pub mod tree_hash; mod impl_decode; mod impl_encode; +mod impl_tree_hash; pub use decode::{decode_ssz, decode_ssz_list, Decodable, DecodeError}; pub use encode::{Encodable, SszStream}; +pub use tree_hash::{merkle_hash, TreeHash}; pub const LENGTH_BYTES: usize = 4; pub const MAX_LIST_SIZE: usize = 1 << (4 * 8); diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 41d7a9e4d6..63b5740495 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -1,6 +1,123 @@ -extern crate blake2_rfc; +const CHUNKSIZE: usize = 128; +const HASHSIZE: usize = 32; -use self::blake2_rfc::blake2s::blake2s; +pub trait TreeHash { + // Note: it would be nice to have a default trait implementation here + // i.e. szz_encode(self) - but rust complains it does not know + // the size of 'self'. Not sure if there's a way around this. + + fn tree_hash(&self) -> Vec; +} + +// python example: Note - I'm seeing some inconsistencies +// between this and the 'Tree Hash' section in the SSZ spec. +// So, I imagine it will change. +/* def merkle_hash(lst): + # Concatenate list into data + if len(lst[0]) != next_power_of_2(len(lst[0])): + lst = [extend_to_power_of_2(x) for x in lst] + data = b''.join(lst) + # Add padding + data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE)) + assert len(data) % CHUNKSIZE == 0 + # Store length (to compensate for non-bijectiveness of padding) + datalen = len(lst).to_bytes(32, 'big') + # Convert to chunks + chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)] + chunkz = [None] * next_power_of_2(len(chunkz)) + chunkz + [b'\x00' * CHUNKSIZE] + for i in range(len(chunkz)//2 - 1, 0, -1): + chunkz[i] = hash(chunkz[i*2] + chunkz[i*2+1]) + return hash(chunkz[1] + datalen) */ + +/** + * Returns a 32 byte hash of 'list', a vector of byte vectors. + * Note that this will consume 'list'. + * */ +pub fn merkle_hash(list: &mut Vec>) -> Vec { + // flatten list + let data = &mut list_to_blob(list); + + // data should be divisible by CHUNKSIZE + assert_eq!(data.len() % CHUNKSIZE, 0); + + // get data_len as bytes. It will hashed will the merkle root + let dlen = data.len() as u64; + let data_len_bytes = &mut dlen.tree_hash(); + data_len_bytes.resize(32, 0); + + // merklize + // + // From the Spec: + // while len(chunkz) > 1: + // if len(chunkz) % 2 == 1: + // chunkz.append(b'\x00' * SSZ_CHUNK_SIZE) + // chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] + let mut mhash = hash_level(data, CHUNKSIZE); + while mhash.len() > HASHSIZE { + mhash = hash_level(&mut mhash, HASHSIZE); + } + + assert_eq!(mhash.len(), HASHSIZE); + + mhash.append(data_len_bytes); + mhash.tree_hash() +} + +/** + * Takes a flat vector of bytes. It then hashes (chunk_size * 2) into + * a byte vector of hashes, divisible by the 32 byte hashsize + */ +fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { + assert!(data.len() % chunk_size == 0); + + let mut result: Vec = Vec::new(); + for two_chunks in data.chunks(chunk_size * 2) { + if two_chunks.len() == chunk_size && data.len() > chunk_size { + // if there is only one chunk here, hash it with a zero-byte + // CHUNKSIZE vector + let mut c = two_chunks.to_vec(); + c.append(&mut vec![0; CHUNKSIZE]); + result.append(&mut c.tree_hash()); + } else { + result.append(&mut two_chunks.tree_hash()); + } + } + + result +} + +fn list_to_blob(list: &mut Vec>) -> Vec { + let mut data_len = 0; + if list[0].len().is_power_of_two() == false { + for x in list.iter_mut() { + extend_to_power_of_2(x); + data_len += x.len(); + } + } + + // do we need padding? + let extend_by = if data_len % CHUNKSIZE > 0 { + CHUNKSIZE - (data_len % CHUNKSIZE) + } else { + 0 + }; + + // allocate buffer and append each list element (flatten the vec of vecs) + data_len += extend_by; + let mut data: Vec = Vec::with_capacity(data_len); + for x in list.iter_mut() { + data.append(x); + } + + // add padding + let mut i = 0; + while i < extend_by { + data.push(0); + i += 1; + } + + data +} /** * Extends data length to a power of 2 by minimally right-zero-padding @@ -9,16 +126,10 @@ fn extend_to_power_of_2(data: &mut Vec) { let len = data.len(); let new_len = len.next_power_of_two(); if new_len > len { - data.append(&mut vec![0; new_len - len]); + data.resize(new_len, 0); } } -fn hash(data: Vec) -> Vec { - let result = blake2s(32, &[], data.as_slice()); - result.as_bytes().to_vec() -} - -// fn list_to_glob() {} #[cfg(test)] mod tests { use super::*; @@ -26,11 +137,21 @@ mod tests { #[test] fn test_extend_to_power_of_2() { let mut data = vec![1, 2, 3, 4, 5]; - - // an array length of 5 should be extended to - // a length of 8 (the next power of 2) by right - // padding it with 3 zeros extend_to_power_of_2(&mut data); assert_eq!(data, [1, 2, 3, 4, 5, 0, 0, 0]); } + + #[test] + fn test_merkle_hash() { + let data1 = vec![1; 100]; + let data2 = vec![2; 100]; + let data3 = vec![3; 100]; + let mut list = vec![data1, data2, data3]; + let result = merkle_hash(&mut list); + + //note: should test againt a known test hash value + assert_eq!(HASHSIZE, result.len()); + println!("merkle_hash: {:?}", result); + } + } From bfcce4fe47749a058238686bba33b34886457d80 Mon Sep 17 00:00:00 2001 From: mjkeating Date: Sat, 8 Dec 2018 15:25:59 -0800 Subject: [PATCH 03/20] Added tree_hash impl for Vec and Hashtable (list and container); plus various cleanup code --- beacon_chain/types/src/shard_and_committee.rs | 4 - beacon_chain/types/src/validator_record.rs | 36 ++++---- beacon_chain/utils/ssz/src/impl_encode.rs | 4 +- beacon_chain/utils/ssz/src/impl_tree_hash.rs | 92 ++++++++++++++++--- beacon_chain/utils/ssz/src/tree_hash.rs | 51 +--------- 5 files changed, 99 insertions(+), 88 deletions(-) diff --git a/beacon_chain/types/src/shard_and_committee.rs b/beacon_chain/types/src/shard_and_committee.rs index 8388b9cbc2..9982611e8f 100644 --- a/beacon_chain/types/src/shard_and_committee.rs +++ b/beacon_chain/types/src/shard_and_committee.rs @@ -18,10 +18,6 @@ impl ShardAndCommittee { } impl TreeHash for ShardAndCommittee { - // python sample code: - // def hash_shard_and_committee(val): - // committee = merkle_hash([x.to_bytes(3, 'big') for x in val.committee]) - // return hash(val.shard_id.to_bytes(2, 'big') + committee) fn tree_hash(&self) -> Vec { let mut committee_ssz_items = Vec::new(); for c in &self.committee { diff --git a/beacon_chain/types/src/validator_record.rs b/beacon_chain/types/src/validator_record.rs index 3d4a57e201..1b18a06436 100644 --- a/beacon_chain/types/src/validator_record.rs +++ b/beacon_chain/types/src/validator_record.rs @@ -2,6 +2,16 @@ use super::bls::{Keypair, PublicKey}; use super::ssz::TreeHash; use super::{Address, Hash256}; +pub const HASH_SSZ_VALIDATOR_RECORD_LENGTH: usize = { + 32 + // pubkey.to_bytes(32, 'big') + 2 + // withdrawal_shard.to_bytes(2, 'big') + 20 + // withdrawal_address + 32 + // randao_commitment + 16 + // balance.to_bytes(16, 'big') + 16 + // start_dynasty.to_bytes(8, 'big') + 8 // end_dynasty.to_bytes(8, 'big') +}; + #[derive(Debug, PartialEq, Clone, Copy)] pub enum ValidatorStatus { PendingActivation = 0, @@ -46,22 +56,15 @@ impl ValidatorRecord { } impl TreeHash for ValidatorRecord { - /* python sample code: - def hash_validator_record(val): - return hash(val.pubkey.to_bytes(32, 'big') + val.withdrawal_shard.to_bytes(2, 'big') + \ - val.withdrawal_address + val.randao_commitment + val.balance.to_bytes(16, 'big') + \ - val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big')) - */ fn tree_hash(&self) -> Vec { - // the serialized fields, to be hashed, should add up to 118 bytes in length. - // allocating it once here - let mut ssz = Vec::with_capacity(118); + let mut ssz = Vec::with_capacity(HASH_SSZ_VALIDATOR_RECORD_LENGTH); - // "val.pubkey.to_bytes(32, 'big')" logic + // From python sample: "val.pubkey.to_bytes(32, 'big')" // TODO: - // probably all kinds of wrong here. Not sure how to convert (szz) - // pubkey into a big-endian 32 byte array. Note: as_bytes(), the only method on - // PublicKey, returns a 192 byte array. + // Need to actually convert (szz) pubkey into a big-endian 32 byte + // array. + // Also, our ValidatorRecord seems to be missing the start_dynasty + // and end_dynasty fields let pub_key_bytes = &mut self.pubkey.as_bytes(); pub_key_bytes.resize(32, 0); ssz.append(pub_key_bytes); @@ -70,17 +73,10 @@ impl TreeHash for ValidatorRecord { ssz.append(&mut self.withdrawal_address.tree_hash()); ssz.append(&mut self.randao_commitment.tree_hash()); - // balance is a 64bit number that serializes to 8 bytes. - // Right padding here to resize to 16 bytes - not sure why - // a 16 byte array is implemented in the python code: "val.balance.to_bytes(16, 'big')" let mut balance = self.balance.tree_hash(); balance.resize(16, 0); ssz.append(&mut balance); - // TODO: - // ... + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big') - // Our ValidatorRecord seems to be missing the start_dynasty and end_dynasty fields - ssz.tree_hash() } } diff --git a/beacon_chain/utils/ssz/src/impl_encode.rs b/beacon_chain/utils/ssz/src/impl_encode.rs index c9ca8b0064..8a05b97051 100644 --- a/beacon_chain/utils/ssz/src/impl_encode.rs +++ b/beacon_chain/utils/ssz/src/impl_encode.rs @@ -48,14 +48,12 @@ impl_encodable_for_uint!(usize, 64); impl Encodable for H256 { fn ssz_append(&self, s: &mut SszStream) { - assert_eq!(32, self.len()); s.append_encoded_raw(&self.to_vec()); } } impl Encodable for Address { fn ssz_append(&self, s: &mut SszStream) { - assert_eq!(20, self.len()); s.append_encoded_raw(&self) } } @@ -73,7 +71,7 @@ mod tests { } #[test] - fn test_ssz_encode_adress() { + fn test_ssz_encode_address() { let h = Address::zero(); let mut ssz = SszStream::new(); ssz.append(&h); diff --git a/beacon_chain/utils/ssz/src/impl_tree_hash.rs b/beacon_chain/utils/ssz/src/impl_tree_hash.rs index c3fccb2bc5..8c5a5d17a7 100644 --- a/beacon_chain/utils/ssz/src/impl_tree_hash.rs +++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs @@ -1,13 +1,11 @@ extern crate blake2_rfc; use self::blake2_rfc::blake2b::blake2b; - use super::ethereum_types::{Address, H256}; use super::{ssz_encode, TreeHash}; - -// I haven't added tests for tree_hash implementations that simply pass -// thru to the szz_encode lib for which tests already exist. Do we want -// test anyway? +use std::cmp::Ord; +use std::collections::HashMap; +use std::hash::Hash; impl TreeHash for u8 { fn tree_hash(&self) -> Vec { @@ -45,23 +43,87 @@ impl TreeHash for H256 { } } -// hash byte arrays impl TreeHash for [u8] { fn tree_hash(&self) -> Vec { hash(&self) } } -/** - * From the Spec: - * We define hash(x) as BLAKE2b-512(x)[0:32] - * From the python sample code: - * return blake2b(x).digest()[:32] - * - * This was orginally writting for blake2s before it was changed to blake2b - * Perhaps, we should be using 'canonical_hash' in the hashing lib? - */ +impl TreeHash for Vec +where + T: TreeHash, +{ + /// Appends the tree_hash for each value of 'self' into a byte array + /// and returns the hash of said byte array + fn tree_hash(&self) -> Vec { + let mut result = Vec::new(); + for x in self { + result.append(&mut x.tree_hash()); + } + + hash(&result) + } +} + +impl TreeHash for HashMap +where + K: Eq, + K: Hash, + K: Ord, + V: TreeHash, +{ + /// Appends the tree_hash for each value of 'self, sorted by key, + /// into a byte array and returns the hash of said byte array + fn tree_hash(&self) -> Vec { + let mut items: Vec<_> = self.iter().collect(); + items.sort_by(|a, b| a.0.cmp(b.0)); + let mut result = Vec::new(); + for item in items { + result.append(&mut item.1.tree_hash()); + } + + hash(&result) + } +} + +/// From the Spec: +/// We define hash(x) as BLAKE2b-512(x)[0:32] fn hash(data: &[u8]) -> Vec { let result = blake2b(32, &[], &data); result.as_bytes().to_vec() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_impl_tree_hash_vec() { + let result = vec![1u32, 2, 3, 4, 5, 6, 7].tree_hash(); + assert_eq!(result.len(), 32); + } + + #[test] + fn test_impl_tree_hash_hashmap() { + let mut map = HashMap::new(); + map.insert("c", 3); + map.insert("b", 2); + map.insert("g", 7); + map.insert("d", 6); + map.insert("e", 4); + map.insert("a", 1u32); + map.insert("f", 5); + let result = map.tree_hash(); + + // TODO: resolve inconsistencies between the python sample code and + // the spec; and create tests that tie-out to an offical result + assert_eq!( + result, + [ + 59, 110, 242, 24, 177, 184, 73, 109, 190, 19, 172, 39, 74, 94, 224, 198, 0, 170, + 225, 152, 249, 59, 10, 76, 137, 124, 52, 159, 37, 42, 26, 157 + ] + ); + } + +} diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 63b5740495..60a052b354 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -2,71 +2,32 @@ const CHUNKSIZE: usize = 128; const HASHSIZE: usize = 32; pub trait TreeHash { - // Note: it would be nice to have a default trait implementation here - // i.e. szz_encode(self) - but rust complains it does not know - // the size of 'self'. Not sure if there's a way around this. - fn tree_hash(&self) -> Vec; } -// python example: Note - I'm seeing some inconsistencies -// between this and the 'Tree Hash' section in the SSZ spec. -// So, I imagine it will change. -/* def merkle_hash(lst): - # Concatenate list into data - if len(lst[0]) != next_power_of_2(len(lst[0])): - lst = [extend_to_power_of_2(x) for x in lst] - data = b''.join(lst) - # Add padding - data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE)) - assert len(data) % CHUNKSIZE == 0 - # Store length (to compensate for non-bijectiveness of padding) - datalen = len(lst).to_bytes(32, 'big') - # Convert to chunks - chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)] - chunkz = [None] * next_power_of_2(len(chunkz)) + chunkz + [b'\x00' * CHUNKSIZE] - for i in range(len(chunkz)//2 - 1, 0, -1): - chunkz[i] = hash(chunkz[i*2] + chunkz[i*2+1]) - return hash(chunkz[1] + datalen) */ - -/** - * Returns a 32 byte hash of 'list', a vector of byte vectors. - * Note that this will consume 'list'. - * */ +/// Returns a 32 byte hash of 'list' - a vector of byte vectors. +/// Note that this will consume 'list'. pub fn merkle_hash(list: &mut Vec>) -> Vec { // flatten list let data = &mut list_to_blob(list); - // data should be divisible by CHUNKSIZE - assert_eq!(data.len() % CHUNKSIZE, 0); - // get data_len as bytes. It will hashed will the merkle root let dlen = data.len() as u64; let data_len_bytes = &mut dlen.tree_hash(); data_len_bytes.resize(32, 0); // merklize - // - // From the Spec: - // while len(chunkz) > 1: - // if len(chunkz) % 2 == 1: - // chunkz.append(b'\x00' * SSZ_CHUNK_SIZE) - // chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] let mut mhash = hash_level(data, CHUNKSIZE); while mhash.len() > HASHSIZE { mhash = hash_level(&mut mhash, HASHSIZE); } - assert_eq!(mhash.len(), HASHSIZE); - mhash.append(data_len_bytes); mhash.tree_hash() } -/** - * Takes a flat vector of bytes. It then hashes (chunk_size * 2) into - * a byte vector of hashes, divisible by the 32 byte hashsize - */ +/// Takes a flat vector of bytes. It then hashes 'chunk_size * 2' slices into +/// a byte vector of hashes, divisible by HASHSIZE fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { assert!(data.len() % chunk_size == 0); @@ -119,9 +80,7 @@ fn list_to_blob(list: &mut Vec>) -> Vec { data } -/** - * Extends data length to a power of 2 by minimally right-zero-padding - */ +/// Extends data length to a power of 2 by minimally right-zero-padding fn extend_to_power_of_2(data: &mut Vec) { let len = data.len(); let new_len = len.next_power_of_two(); From a3b22cf6a490b4d5a4dec70d4b8df1e0329379b7 Mon Sep 17 00:00:00 2001 From: mjkeating Date: Sun, 9 Dec 2018 10:12:41 -0800 Subject: [PATCH 04/20] fixed tree_hash() for Vec and a couple of other issues --- beacon_chain/types/src/shard_and_committee.rs | 2 +- beacon_chain/types/src/validator_record.rs | 2 +- beacon_chain/utils/ssz/src/impl_tree_hash.rs | 16 +++++++--------- beacon_chain/utils/ssz/src/tree_hash.rs | 19 ++++++------------- 4 files changed, 15 insertions(+), 24 deletions(-) diff --git a/beacon_chain/types/src/shard_and_committee.rs b/beacon_chain/types/src/shard_and_committee.rs index 9982611e8f..c7fed2e8d2 100644 --- a/beacon_chain/types/src/shard_and_committee.rs +++ b/beacon_chain/types/src/shard_and_committee.rs @@ -29,7 +29,7 @@ impl TreeHash for ShardAndCommittee { result.append(&mut self.shard.tree_hash()); result.append(&mut merkle_hash(&mut committee_ssz_items)); - result.tree_hash() + result.as_slice().tree_hash() } } diff --git a/beacon_chain/types/src/validator_record.rs b/beacon_chain/types/src/validator_record.rs index 1b18a06436..799e0137f6 100644 --- a/beacon_chain/types/src/validator_record.rs +++ b/beacon_chain/types/src/validator_record.rs @@ -77,7 +77,7 @@ impl TreeHash for ValidatorRecord { balance.resize(16, 0); ssz.append(&mut balance); - ssz.tree_hash() + ssz.as_slice().tree_hash() } } diff --git a/beacon_chain/utils/ssz/src/impl_tree_hash.rs b/beacon_chain/utils/ssz/src/impl_tree_hash.rs index 8c5a5d17a7..dfa6da2b28 100644 --- a/beacon_chain/utils/ssz/src/impl_tree_hash.rs +++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs @@ -2,7 +2,7 @@ extern crate blake2_rfc; use self::blake2_rfc::blake2b::blake2b; use super::ethereum_types::{Address, H256}; -use super::{ssz_encode, TreeHash}; +use super::{merkle_hash, ssz_encode, TreeHash}; use std::cmp::Ord; use std::collections::HashMap; use std::hash::Hash; @@ -53,15 +53,13 @@ impl TreeHash for Vec where T: TreeHash, { - /// Appends the tree_hash for each value of 'self' into a byte array - /// and returns the hash of said byte array + /// Returns the merkle_hash of a list of tree_hash values created + /// from the given list. + /// Note: A byte vector, Vec, must be converted to a slice (as_slice()) + /// to be handled properly (i.e. hashed) as byte array. fn tree_hash(&self) -> Vec { - let mut result = Vec::new(); - for x in self { - result.append(&mut x.tree_hash()); - } - - hash(&result) + let mut tree_hashes = self.iter().map(|x| x.tree_hash()).collect(); + merkle_hash(&mut tree_hashes) } } diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 60a052b354..31e1d3720c 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -23,14 +23,12 @@ pub fn merkle_hash(list: &mut Vec>) -> Vec { } mhash.append(data_len_bytes); - mhash.tree_hash() + mhash.as_slice().tree_hash() } /// Takes a flat vector of bytes. It then hashes 'chunk_size * 2' slices into /// a byte vector of hashes, divisible by HASHSIZE fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { - assert!(data.len() % chunk_size == 0); - let mut result: Vec = Vec::new(); for two_chunks in data.chunks(chunk_size * 2) { if two_chunks.len() == chunk_size && data.len() > chunk_size { @@ -38,7 +36,7 @@ fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { // CHUNKSIZE vector let mut c = two_chunks.to_vec(); c.append(&mut vec![0; CHUNKSIZE]); - result.append(&mut c.tree_hash()); + result.append(&mut c.as_slice().tree_hash()); } else { result.append(&mut two_chunks.tree_hash()); } @@ -48,14 +46,14 @@ fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { } fn list_to_blob(list: &mut Vec>) -> Vec { - let mut data_len = 0; if list[0].len().is_power_of_two() == false { for x in list.iter_mut() { extend_to_power_of_2(x); - data_len += x.len(); } } + let mut data_len = list[0].len() * list.len(); + // do we need padding? let extend_by = if data_len % CHUNKSIZE > 0 { CHUNKSIZE - (data_len % CHUNKSIZE) @@ -63,6 +61,8 @@ fn list_to_blob(list: &mut Vec>) -> Vec { 0 }; + println!("data_len {}, extend_by {}", data_len, extend_by); + // allocate buffer and append each list element (flatten the vec of vecs) data_len += extend_by; let mut data: Vec = Vec::with_capacity(data_len); @@ -93,13 +93,6 @@ fn extend_to_power_of_2(data: &mut Vec) { mod tests { use super::*; - #[test] - fn test_extend_to_power_of_2() { - let mut data = vec![1, 2, 3, 4, 5]; - extend_to_power_of_2(&mut data); - assert_eq!(data, [1, 2, 3, 4, 5, 0, 0, 0]); - } - #[test] fn test_merkle_hash() { let data1 = vec![1; 100]; From fbf7f0d53746216da8cdf51c11df3a9cc23f536c Mon Sep 17 00:00:00 2001 From: mjkeating Date: Sun, 9 Dec 2018 14:43:41 -0800 Subject: [PATCH 05/20] removed a debugging println statement --- beacon_chain/utils/ssz/src/tree_hash.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 31e1d3720c..0375d207c1 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -61,8 +61,6 @@ fn list_to_blob(list: &mut Vec>) -> Vec { 0 }; - println!("data_len {}, extend_by {}", data_len, extend_by); - // allocate buffer and append each list element (flatten the vec of vecs) data_len += extend_by; let mut data: Vec = Vec::with_capacity(data_len); From 3a26f73cf22e693a8be060e0921630e485253df0 Mon Sep 17 00:00:00 2001 From: Alex Stokes Date: Wed, 24 Oct 2018 12:21:51 +0200 Subject: [PATCH 06/20] Simplifies the boolean-bitfield implementation to use `bit-vec` crate --- beacon_chain/types/src/attestation_record.rs | 2 +- .../utils/boolean-bitfield/Cargo.toml | 4 + beacon_chain/utils/boolean-bitfield/README.md | 6 +- .../utils/boolean-bitfield/src/lib.rs | 384 ++++++------------ 4 files changed, 119 insertions(+), 277 deletions(-) diff --git a/beacon_chain/types/src/attestation_record.rs b/beacon_chain/types/src/attestation_record.rs index 15b1a2e71b..569cfa2dc5 100644 --- a/beacon_chain/types/src/attestation_record.rs +++ b/beacon_chain/types/src/attestation_record.rs @@ -31,7 +31,7 @@ impl Encodable for AttestationRecord { s.append(&self.shard_id); s.append_vec(&self.oblique_parent_hashes); s.append(&self.shard_block_hash); - s.append_vec(&self.attester_bitfield.to_be_vec()); + s.append_vec(&self.attester_bitfield.to_bytes()); s.append(&self.justified_slot); s.append(&self.justified_block_hash); s.append_vec(&self.aggregate_sig.as_bytes()); diff --git a/beacon_chain/utils/boolean-bitfield/Cargo.toml b/beacon_chain/utils/boolean-bitfield/Cargo.toml index feacb844a5..b93e88f239 100644 --- a/beacon_chain/utils/boolean-bitfield/Cargo.toml +++ b/beacon_chain/utils/boolean-bitfield/Cargo.toml @@ -5,3 +5,7 @@ authors = ["Paul Hauner "] [dependencies] ssz = { path = "../ssz" } +bit-vec = "0.5.0" + +[dev-dependencies] +rand = "0.5.5" \ No newline at end of file diff --git a/beacon_chain/utils/boolean-bitfield/README.md b/beacon_chain/utils/boolean-bitfield/README.md index 749ccdb51d..adf83f6f88 100644 --- a/beacon_chain/utils/boolean-bitfield/README.md +++ b/beacon_chain/utils/boolean-bitfield/README.md @@ -1,7 +1,3 @@ # Boolean Bitfield -A work-in-progress implementation of an unbounded boolean bitfield. - -Based upon a `Vec` - -Documentation TBC... +Implements a set of boolean as a tightly-packed vector of bits. diff --git a/beacon_chain/utils/boolean-bitfield/src/lib.rs b/beacon_chain/utils/boolean-bitfield/src/lib.rs index db3d4f99f0..4d562d95b3 100644 --- a/beacon_chain/utils/boolean-bitfield/src/lib.rs +++ b/beacon_chain/utils/boolean-bitfield/src/lib.rs @@ -1,161 +1,74 @@ -/* - * Implemenation of a bitfield as a vec. Only - * supports bytes (Vec) as the underlying - * storage. - * - * A future implementation should be more efficient, - * this is just to get the job done for now. - */ +extern crate bit_vec; extern crate ssz; -use std::cmp::max; +#[cfg(test)] +extern crate rand; -#[derive(Eq, Clone, Default, Debug)] -pub struct BooleanBitfield { - len: usize, - vec: Vec, +use bit_vec::BitVec; + +/// A BooleanBitfield represents a set of booleans compactly stored as a vector of bits. +#[derive(Debug, Clone, PartialEq)] +pub struct BooleanBitfield(BitVec); + +/// Error represents some reason a request against a bitfield was not satisfied +#[derive(Debug)] +pub enum Error { + /// OutOfBounds refers to indexing into a bitfield where no bits exist; returns the illegal index and the current size of the bitfield, respectively + OutOfBounds(usize, usize), } impl BooleanBitfield { /// Create a new bitfield with a length of zero. pub fn new() -> Self { - Self { - len: 0, - vec: vec![0], - } + Self { 0: BitVec::new() } } - /// Create a new bitfield of a certain capacity - pub fn with_capacity(capacity: usize) -> Self { - let mut vec = Vec::with_capacity(capacity / 8 + 1); - vec.push(0); - Self { len: 0, vec } + /// Create a new bitfield using the supplied `bytes` as input + pub fn from_bytes(bytes: &[u8]) -> Self { + Self { + 0: BitVec::from_bytes(bytes), + } } /// Read the value of a bit. /// - /// Will return `true` if the bit has been set to `true` - /// without then being set to `False`. - pub fn get_bit(&self, i: usize) -> bool { - let bit = |i: usize| i % 8; - let byte = |i: usize| i / 8; - - if byte(i) >= self.vec.len() { - false - } else { - self.vec[byte(i)] & (1 << (bit(i) as u8)) != 0 + /// If the index is in bounds, then result is Ok(value) where value is `true` if the bit is 1 and `false` if the bit is 0. + /// If the index is out of bounds, we return an error to that extent. + pub fn get(&self, i: usize) -> Result { + match self.0.get(i) { + Some(value) => Ok(value), + None => Err(Error::OutOfBounds(i, self.0.len())), } } /// Set the value of a bit. /// - /// If this bit is larger than the length of the underlying byte - /// array it will be extended. - pub fn set_bit(&mut self, i: usize, to: bool) { - let bit = |i: usize| i % 8; - let byte = |i: usize| i / 8; - - self.len = max(self.len, i + 1); - - if byte(i) >= self.vec.len() { - self.vec.resize(byte(i) + 1, 0); - } - if to { - self.vec[byte(i)] = self.vec[byte(i)] | (1 << (bit(i) as u8)) - } else { - self.vec[byte(i)] = self.vec[byte(i)] & !(1 << (bit(i) as u8)) - } + /// Returns the previous value if successful. + /// If the index is out of bounds, we return an error to that extent. + pub fn set(&mut self, i: usize, value: bool) -> Result { + let previous = self.get(i)?; + self.0.set(i, value); + Ok(previous) } - /// Return the "length" of this bitfield. Length is defined as - /// the highest bit that has been set. - /// - /// Note: this is distinct from the length of the underlying - /// vector. + /// Returns the index of the highest set bit. Some(n) if some bit is set, None otherwise. + pub fn highest_set_bit(&self) -> Option { + self.0.iter().rposition(|bit| bit) + } + + /// Returns the number of bits in this bitfield. pub fn len(&self) -> usize { - self.len + self.0.len() } - /// True if no bits have ever been set. A bit that is set and then - /// unset will still count to the length of the bitfield. - /// - /// Note: this is distinct from the length of the underlying - /// vector. - pub fn is_empty(&self) -> bool { - self.len == 0 + /// Returns the number of `1` bits in the bitfield + pub fn num_set_bits(&self) -> usize { + self.0.iter().filter(|&bit| bit).count() } - /// The number of bytes required to represent the bitfield. - pub fn num_bytes(&self) -> usize { - self.vec.len() - } - - /// Iterate through the underlying vector and count the number of - /// true bits. - pub fn num_true_bits(&self) -> u64 { - let mut count: u64 = 0; - for byte in &self.vec { - for bit in 0..8 { - if byte & (1 << (bit as u8)) != 0 { - count += 1; - } - } - } - count - } - - /// Iterate through the underlying vector and find the highest - /// set bit. Useful for instantiating a new instance from - /// some set of bytes. - pub fn compute_length(bytes: &[u8]) -> usize { - for byte in (0..bytes.len()).rev() { - for bit in (0..8).rev() { - if bytes[byte] & (1 << (bit as u8)) != 0 { - return (byte * 8) + bit + 1; - } - } - } - 0 - } - - /// Get the byte at a position, assuming big-endian encoding. - pub fn get_byte(&self, n: usize) -> Option<&u8> { - self.vec.get(n) - } - - /// Clone and return the underlying byte array (`Vec`). - pub fn to_vec(&self) -> Vec { - self.vec.clone() - } - - /// Clone and return the underlying byte array (`Vec`) in big-endinan format. - pub fn to_be_vec(&self) -> Vec { - let mut o = self.vec.clone(); - o.reverse(); - o - } -} - -impl<'a> From<&'a [u8]> for BooleanBitfield { - fn from(input: &[u8]) -> Self { - let mut vec = input.to_vec(); - vec.reverse(); - BooleanBitfield { - vec, - len: BooleanBitfield::compute_length(input), - } - } -} - -impl PartialEq for BooleanBitfield { - fn eq(&self, other: &BooleanBitfield) -> bool { - (self.vec == other.vec) & (self.len == other.len) - } -} - -impl ssz::Encodable for BooleanBitfield { - fn ssz_append(&self, s: &mut ssz::SszStream) { - s.append_vec(&self.to_vec()); + /// Returns a vector of bytes representing the bitfield + pub fn to_bytes(&self) -> Vec { + self.0.to_bytes() } } @@ -165,12 +78,13 @@ impl ssz::Decodable for BooleanBitfield { if (ssz::LENGTH_BYTES + len) > bytes.len() { return Err(ssz::DecodeError::TooShort); } + if len == 0 { Ok((BooleanBitfield::new(), index + ssz::LENGTH_BYTES)) } else { - let b = BooleanBitfield::from(&bytes[(index + 4)..(index + len + 4)]); + let field = BooleanBitfield::from_bytes(&bytes[(index + 4)..(index + len + 4)]); let index = index + ssz::LENGTH_BYTES + len; - Ok((b, index)) + Ok((field, index)) } } } @@ -178,149 +92,77 @@ impl ssz::Decodable for BooleanBitfield { #[cfg(test)] mod tests { use super::*; - use ssz::Decodable; #[test] - fn test_new_from_slice() { - let s = [0]; - let b = BooleanBitfield::from(&s[..]); - assert_eq!(b.len, 0); + fn test_empty_bitfield() { + let mut field = BooleanBitfield::new(); - let s = [255]; - let b = BooleanBitfield::from(&s[..]); - assert_eq!(b.len, 8); - - let s = [0, 1]; - let b = BooleanBitfield::from(&s[..]); - assert_eq!(b.len, 9); - - let s = [31]; - let b = BooleanBitfield::from(&s[..]); - assert_eq!(b.len, 5); - } - - #[test] - fn test_ssz_encoding() { - let mut b = BooleanBitfield::new(); - b.set_bit(8, true); - - let mut stream = ssz::SszStream::new(); - stream.append(&b); - - assert_eq!(stream.drain(), vec![0, 0, 0, 2, 0, 1]); - } - - /* - #[test] - fn test_ssz_decoding() { - /* - * Correct input - */ - let input = vec![0, 0, 0, 2, 0, 1]; - let (b, i) = BooleanBitfield::ssz_decode(&input, 0).unwrap(); - assert_eq!(i, 6); - assert_eq!(b.num_true_bits(), 1); - assert_eq!(b.get_bit(8), true); - - /* - * Input too long - */ - let mut input = vec![0, 0, 0, 2, 0, 1]; - input.push(42); - let (b, i) = BooleanBitfield::ssz_decode(&input, 0).unwrap(); - assert_eq!(i, 6); - assert_eq!(b.num_true_bits(), 1); - assert_eq!(b.get_bit(8), true); - - /* - * Input too short - */ - let input = vec![0, 0, 0, 2, 1]; - let res = BooleanBitfield::ssz_decode(&input, 0); - assert_eq!(res, Err(ssz::DecodeError::TooShort)); - } - */ - - #[test] - fn test_new_bitfield_len() { - let b = BooleanBitfield::new(); - assert_eq!(b.len(), 0); - assert_eq!(b.to_be_vec(), vec![0]); - - let b = BooleanBitfield::with_capacity(100); - assert_eq!(b.len(), 0); - assert_eq!(b.to_be_vec(), vec![0]); - } - - #[test] - fn test_bitfield_set() { - let mut b = BooleanBitfield::new(); - b.set_bit(0, false); - assert_eq!(b.to_be_vec(), [0]); - - b = BooleanBitfield::new(); - b.set_bit(7, true); - assert_eq!(b.to_be_vec(), [128]); - b.set_bit(7, false); - assert_eq!(b.to_be_vec(), [0]); - assert_eq!(b.len(), 8); - - b = BooleanBitfield::new(); - b.set_bit(7, true); - b.set_bit(0, true); - assert_eq!(b.to_be_vec(), [129]); - b.set_bit(7, false); - assert_eq!(b.to_be_vec(), [1]); - assert_eq!(b.len(), 8); - - b = BooleanBitfield::new(); - b.set_bit(8, true); - assert_eq!(b.to_be_vec(), [1, 0]); - assert_eq!(b.len(), 9); - b.set_bit(8, false); - assert_eq!(b.to_be_vec(), [0, 0]); - assert_eq!(b.len(), 9); - - b = BooleanBitfield::new(); - b.set_bit(15, true); - assert_eq!(b.to_be_vec(), [128, 0]); - b.set_bit(15, false); - assert_eq!(b.to_be_vec(), [0, 0]); - assert_eq!(b.len(), 16); - - b = BooleanBitfield::new(); - b.set_bit(8, true); - b.set_bit(15, true); - assert_eq!(b.to_be_vec(), [129, 0]); - b.set_bit(15, false); - assert_eq!(b.to_be_vec(), [1, 0]); - assert_eq!(b.len(), 16); - } - - #[test] - fn test_bitfield_get() { - let test_nums = vec![0, 8, 15, 42, 1337]; - for i in test_nums { - let mut b = BooleanBitfield::new(); - assert_eq!(b.get_bit(i), false); - b.set_bit(i, true); - assert_eq!(b.get_bit(i), true); - b.set_bit(i, true); + for _ in 0..100 { + let index: usize = rand::random(); + assert!(field.get(index).is_err()); + assert!(field.set(index, rand::random()).is_err()) } } + const INPUT: &[u8] = &[0b0000_0010, 0b0000_0010]; + #[test] - fn test_bitfield_num_true_bits() { - let mut b = BooleanBitfield::new(); - assert_eq!(b.num_true_bits(), 0); - b.set_bit(15, true); - assert_eq!(b.num_true_bits(), 1); - b.set_bit(15, false); - assert_eq!(b.num_true_bits(), 0); - b.set_bit(0, true); - b.set_bit(7, true); - b.set_bit(8, true); - b.set_bit(1337, true); - assert_eq!(b.num_true_bits(), 4); + fn test_get_from_bitfield() { + let field = BooleanBitfield::from_bytes(INPUT); + let unset = field.get(0).unwrap(); + assert!(!unset); + let set = field.get(6).unwrap(); + assert!(set); + let set = field.get(14).unwrap(); + assert!(set); + } + + #[test] + fn test_set_for_bitfield() { + let mut field = BooleanBitfield::from_bytes(INPUT); + let previous = field.set(10, true).unwrap(); + assert!(!previous); + let previous = field.get(10).unwrap(); + assert!(previous); + let previous = field.set(6, false).unwrap(); + assert!(previous); + let previous = field.get(6).unwrap(); + assert!(!previous); + } + + #[test] + fn test_highest_set_bit() { + let field = BooleanBitfield::from_bytes(INPUT); + assert_eq!(field.highest_set_bit().unwrap(), 14); + + let field = BooleanBitfield::new(); + assert_eq!(field.highest_set_bit(), None); + } + + #[test] + fn test_len() { + let field = BooleanBitfield::from_bytes(INPUT); + assert_eq!(field.len(), 16); + + let field = BooleanBitfield::new(); + assert_eq!(field.len(), 0); + } + + #[test] + fn test_num_set_bits() { + let field = BooleanBitfield::from_bytes(INPUT); + assert_eq!(field.num_set_bits(), 2); + + let field = BooleanBitfield::new(); + assert_eq!(field.num_set_bits(), 0); + } + + #[test] + fn test_to_bytes() { + let field = BooleanBitfield::from_bytes(INPUT); + assert_eq!(field.to_bytes(), INPUT); + + let field = BooleanBitfield::new(); + assert_eq!(field.to_bytes(), vec![]); } } From 2defe8e4ee4e8b0ad5ad856937ff73eb51f61981 Mon Sep 17 00:00:00 2001 From: Alex Stokes Date: Wed, 7 Nov 2018 14:32:33 -0800 Subject: [PATCH 07/20] get tests passing (except one) --- beacon_chain/types/src/lib.rs | 9 +++++-- .../utils/boolean-bitfield/src/lib.rs | 16 ++++++------ .../validation/src/attestation_validation.rs | 7 +++-- .../validation/src/signature_verification.rs | 15 ++++++++--- .../tests/attestation_validation/helpers.rs | 2 +- .../tests/attestation_validation/tests.rs | 26 +++++-------------- 6 files changed, 36 insertions(+), 39 deletions(-) diff --git a/beacon_chain/types/src/lib.rs b/beacon_chain/types/src/lib.rs index e8e42e8e92..c531da2990 100644 --- a/beacon_chain/types/src/lib.rs +++ b/beacon_chain/types/src/lib.rs @@ -15,7 +15,11 @@ pub mod validator_record; pub mod validator_registration; use self::boolean_bitfield::BooleanBitfield; -use self::ethereum_types::{H160, H256, U256}; +use self::ethereum_types::{ + H256, + H160, + U256 +}; use std::collections::HashMap; pub use active_state::ActiveState; @@ -32,7 +36,8 @@ pub use validator_registration::ValidatorRegistration; pub type Hash256 = H256; pub type Address = H160; pub type EthBalance = U256; -pub type Bitfield = BooleanBitfield; +pub type Bitfield = boolean_bitfield::BooleanBitfield; +pub type BitfieldError = boolean_bitfield::Error; /// Maps a (slot, shard_id) to attestation_indices. pub type AttesterMap = HashMap<(u64, u16), Vec>; diff --git a/beacon_chain/utils/boolean-bitfield/src/lib.rs b/beacon_chain/utils/boolean-bitfield/src/lib.rs index 4d562d95b3..1f96a9afd4 100644 --- a/beacon_chain/utils/boolean-bitfield/src/lib.rs +++ b/beacon_chain/utils/boolean-bitfield/src/lib.rs @@ -24,7 +24,7 @@ impl BooleanBitfield { } /// Create a new bitfield using the supplied `bytes` as input - pub fn from_bytes(bytes: &[u8]) -> Self { + pub fn from(bytes: &[u8]) -> Self { Self { 0: BitVec::from_bytes(bytes), } @@ -82,7 +82,7 @@ impl ssz::Decodable for BooleanBitfield { if len == 0 { Ok((BooleanBitfield::new(), index + ssz::LENGTH_BYTES)) } else { - let field = BooleanBitfield::from_bytes(&bytes[(index + 4)..(index + len + 4)]); + let field = BooleanBitfield::from(&bytes[(index + 4)..(index + len + 4)]); let index = index + ssz::LENGTH_BYTES + len; Ok((field, index)) } @@ -108,7 +108,7 @@ mod tests { #[test] fn test_get_from_bitfield() { - let field = BooleanBitfield::from_bytes(INPUT); + let field = BooleanBitfield::from(INPUT); let unset = field.get(0).unwrap(); assert!(!unset); let set = field.get(6).unwrap(); @@ -119,7 +119,7 @@ mod tests { #[test] fn test_set_for_bitfield() { - let mut field = BooleanBitfield::from_bytes(INPUT); + let mut field = BooleanBitfield::from(INPUT); let previous = field.set(10, true).unwrap(); assert!(!previous); let previous = field.get(10).unwrap(); @@ -132,7 +132,7 @@ mod tests { #[test] fn test_highest_set_bit() { - let field = BooleanBitfield::from_bytes(INPUT); + let field = BooleanBitfield::from(INPUT); assert_eq!(field.highest_set_bit().unwrap(), 14); let field = BooleanBitfield::new(); @@ -141,7 +141,7 @@ mod tests { #[test] fn test_len() { - let field = BooleanBitfield::from_bytes(INPUT); + let field = BooleanBitfield::from(INPUT); assert_eq!(field.len(), 16); let field = BooleanBitfield::new(); @@ -150,7 +150,7 @@ mod tests { #[test] fn test_num_set_bits() { - let field = BooleanBitfield::from_bytes(INPUT); + let field = BooleanBitfield::from(INPUT); assert_eq!(field.num_set_bits(), 2); let field = BooleanBitfield::new(); @@ -159,7 +159,7 @@ mod tests { #[test] fn test_to_bytes() { - let field = BooleanBitfield::from_bytes(INPUT); + let field = BooleanBitfield::from(INPUT); assert_eq!(field.to_bytes(), INPUT); let field = BooleanBitfield::new(); diff --git a/beacon_chain/validation/src/attestation_validation.rs b/beacon_chain/validation/src/attestation_validation.rs index d4e451d381..d63d74483e 100644 --- a/beacon_chain/validation/src/attestation_validation.rs +++ b/beacon_chain/validation/src/attestation_validation.rs @@ -32,6 +32,7 @@ pub enum AttestationValidationError { NonZeroTrailingBits, BadAggregateSignature, DBError(String), + OutOfBoundsBitfieldIndex, } /// The context against which some attestation should be validated. @@ -198,10 +199,6 @@ where } } -fn bytes_for_bits(bits: usize) -> usize { - (bits.saturating_sub(1) / 8) + 1 -} - impl From for AttestationValidationError { fn from(e: ParentHashesError) -> Self { match e { @@ -242,6 +239,8 @@ impl From for AttestationValidationError { AttestationValidationError::NoPublicKeyForValidator } SignatureVerificationError::DBError(s) => AttestationValidationError::DBError(s), + SignatureVerificationError::OutOfBoundsBitfieldIndex + => AttestationValidationError::OutOfBoundsBitfieldIndex, } } } diff --git a/beacon_chain/validation/src/signature_verification.rs b/beacon_chain/validation/src/signature_verification.rs index fddaabb997..18a4472675 100644 --- a/beacon_chain/validation/src/signature_verification.rs +++ b/beacon_chain/validation/src/signature_verification.rs @@ -1,7 +1,7 @@ use super::bls::{AggregatePublicKey, AggregateSignature}; use super::db::stores::{ValidatorStore, ValidatorStoreError}; use super::db::ClientDB; -use super::types::Bitfield; +use super::types::{Bitfield, BitfieldError}; use std::collections::HashSet; #[derive(Debug, PartialEq)] @@ -10,6 +10,13 @@ pub enum SignatureVerificationError { PublicKeyCorrupt, NoPublicKeyForValidator, DBError(String), + OutOfBoundsBitfieldIndex, +} + +impl From for SignatureVerificationError { + fn from(_error: BitfieldError) -> Self { + SignatureVerificationError::OutOfBoundsBitfieldIndex + } } /// Verify an aggregate signature across the supplied message. @@ -33,7 +40,7 @@ where let mut agg_pub_key = AggregatePublicKey::new(); for i in 0..attestation_indices.len() { - let voted = bitfield.get_bit(i); + let voted = bitfield.get(i)?; if voted { /* * De-reference the attestation index into a canonical ValidatorRecord index. @@ -123,7 +130,7 @@ mod tests { let attestation_indices: Vec = (0..all_keypairs.len()).collect(); let mut bitfield = Bitfield::new(); for i in 0..signing_keypairs.len() { - bitfield.set_bit(i, true); + bitfield.set(i, true).unwrap(); } let db = Arc::new(MemoryDB::open()); @@ -159,7 +166,7 @@ mod tests { * Add another validator to the bitfield, run validation will all other * parameters the same and assert that it fails. */ - bitfield.set_bit(signing_keypairs.len() + 1, true); + bitfield.set(signing_keypairs.len() + 1, true).unwrap(); let voters = verify_aggregate_signature_for_indices( &message, &agg_sig, diff --git a/beacon_chain/validation/tests/attestation_validation/helpers.rs b/beacon_chain/validation/tests/attestation_validation/helpers.rs index a148f9a690..a9c92ca5a6 100644 --- a/beacon_chain/validation/tests/attestation_validation/helpers.rs +++ b/beacon_chain/validation/tests/attestation_validation/helpers.rs @@ -95,7 +95,7 @@ pub fn generate_attestation( * and sign the aggregate sig. */ if let Some(sk) = secret_key { - attester_bitfield.set_bit(i, true); + attester_bitfield.set(i, true).unwrap(); let sig = Signature::new(&attestation_message, sk); aggregate_sig.add(&sig); } diff --git a/beacon_chain/validation/tests/attestation_validation/tests.rs b/beacon_chain/validation/tests/attestation_validation/tests.rs index 7c96170708..4ea954c89a 100644 --- a/beacon_chain/validation/tests/attestation_validation/tests.rs +++ b/beacon_chain/validation/tests/attestation_validation/tests.rs @@ -133,12 +133,8 @@ fn test_attestation_validation_invalid_bad_bitfield_length() { * of the bitfield. */ let one_byte_higher = rig.attester_count + 8; - rig.attestation - .attester_bitfield - .set_bit(one_byte_higher, true); - rig.attestation - .attester_bitfield - .set_bit(one_byte_higher, false); + rig.attestation.attester_bitfield.set(one_byte_higher, true).unwrap(); + rig.attestation.attester_bitfield.set(one_byte_higher, false).unwrap(); let result = rig.context.validate_attestation(&rig.attestation); assert_eq!(result, Err(AttestationValidationError::BadBitfieldLength)); @@ -149,9 +145,7 @@ fn test_attestation_validation_invalid_invalid_bitfield_end_bit() { let mut rig = generic_rig(); let one_bit_high = rig.attester_count + 1; - rig.attestation - .attester_bitfield - .set_bit(one_bit_high, true); + rig.attestation.attester_bitfield.set(one_bit_high, true).unwrap(); let result = rig.context.validate_attestation(&rig.attestation); assert_eq!( @@ -174,19 +168,11 @@ fn test_attestation_validation_invalid_invalid_bitfield_end_bit_with_irreguar_bi * bit in a bitfield and the byte length of that bitfield */ let one_bit_high = rig.attester_count + 1; - assert!( - one_bit_high % 8 != 0, - "the test is ineffective in this case." - ); - rig.attestation - .attester_bitfield - .set_bit(one_bit_high, true); + assert!(one_bit_high % 8 != 0, "the test is ineffective in this case."); + rig.attestation.attester_bitfield.set(one_bit_high, true).unwrap(); let result = rig.context.validate_attestation(&rig.attestation); - assert_eq!( - result, - Err(AttestationValidationError::InvalidBitfieldEndBits) - ); + assert_eq!(result, Err(AttestationValidationError::InvalidBitfieldEndBits)); } #[test] From b1f804656375f64d1d3acdc5bade06ab1f18cd3f Mon Sep 17 00:00:00 2001 From: Alex Stokes Date: Thu, 15 Nov 2018 09:19:59 -0800 Subject: [PATCH 08/20] Update bitfield to expand size when writing out-of-bounds --- .../utils/boolean-bitfield/Cargo.toml | 5 +- .../utils/boolean-bitfield/src/lib.rs | 96 ++++++++++++++----- 2 files changed, 73 insertions(+), 28 deletions(-) diff --git a/beacon_chain/utils/boolean-bitfield/Cargo.toml b/beacon_chain/utils/boolean-bitfield/Cargo.toml index b93e88f239..1633401e22 100644 --- a/beacon_chain/utils/boolean-bitfield/Cargo.toml +++ b/beacon_chain/utils/boolean-bitfield/Cargo.toml @@ -5,7 +5,4 @@ authors = ["Paul Hauner "] [dependencies] ssz = { path = "../ssz" } -bit-vec = "0.5.0" - -[dev-dependencies] -rand = "0.5.5" \ No newline at end of file +bit-vec = "0.5.0" \ No newline at end of file diff --git a/beacon_chain/utils/boolean-bitfield/src/lib.rs b/beacon_chain/utils/boolean-bitfield/src/lib.rs index 1f96a9afd4..ceff3bbcf2 100644 --- a/beacon_chain/utils/boolean-bitfield/src/lib.rs +++ b/beacon_chain/utils/boolean-bitfield/src/lib.rs @@ -1,12 +1,12 @@ extern crate bit_vec; extern crate ssz; -#[cfg(test)] -extern crate rand; - use bit_vec::BitVec; +use std::default; + /// A BooleanBitfield represents a set of booleans compactly stored as a vector of bits. +/// The BooleanBitfield is given a fixed size during construction. Reads outside of the current size return an out-of-bounds error. Writes outside of the current size expand the size of the set. #[derive(Debug, Clone, PartialEq)] pub struct BooleanBitfield(BitVec); @@ -18,13 +18,20 @@ pub enum Error { } impl BooleanBitfield { - /// Create a new bitfield with a length of zero. + /// Create a new bitfield. pub fn new() -> Self { - Self { 0: BitVec::new() } + Default::default() + } + + /// Create a new bitfield with the given length `initial_len` and all values set to `bit`. + pub fn from_elem(inital_len: usize, bit: bool) -> Self { + Self { + 0: BitVec::from_elem(inital_len, bit), + } } /// Create a new bitfield using the supplied `bytes` as input - pub fn from(bytes: &[u8]) -> Self { + pub fn from_bytes(bytes: &[u8]) -> Self { Self { 0: BitVec::from_bytes(bytes), } @@ -43,12 +50,19 @@ impl BooleanBitfield { /// Set the value of a bit. /// - /// Returns the previous value if successful. - /// If the index is out of bounds, we return an error to that extent. - pub fn set(&mut self, i: usize, value: bool) -> Result { - let previous = self.get(i)?; + /// If the index is out of bounds, we expand the size of the underlying set to include the new index. + /// Returns the previous value if there was one. + pub fn set(&mut self, i: usize, value: bool) -> Option { + let previous = match self.get(i) { + Ok(previous) => Some(previous), + Err(Error::OutOfBounds(_, len)) => { + let new_len = i - len + 1; + self.0.grow(new_len, false); + None + } + }; self.0.set(i, value); - Ok(previous) + previous } /// Returns the index of the highest set bit. Some(n) if some bit is set, None otherwise. @@ -72,6 +86,14 @@ impl BooleanBitfield { } } +impl default::Default for BooleanBitfield { + /// default provides the "empty" bitfield + /// Note: the empty bitfield is set to the `0` byte. + fn default() -> Self { + Self::from_elem(8, false) + } +} + impl ssz::Decodable for BooleanBitfield { fn ssz_decode(bytes: &[u8], index: usize) -> Result<(Self, usize), ssz::DecodeError> { let len = ssz::decode::decode_length(bytes, index, ssz::LENGTH_BYTES)?; @@ -82,7 +104,7 @@ impl ssz::Decodable for BooleanBitfield { if len == 0 { Ok((BooleanBitfield::new(), index + ssz::LENGTH_BYTES)) } else { - let field = BooleanBitfield::from(&bytes[(index + 4)..(index + len + 4)]); + let field = BooleanBitfield::from_bytes(&bytes[(index + 4)..(index + len + 4)]); let index = index + ssz::LENGTH_BYTES + len; Ok((field, index)) } @@ -96,11 +118,20 @@ mod tests { #[test] fn test_empty_bitfield() { let mut field = BooleanBitfield::new(); + let original_len = field.len(); - for _ in 0..100 { - let index: usize = rand::random(); - assert!(field.get(index).is_err()); - assert!(field.set(index, rand::random()).is_err()) + for i in 0..100 { + if i < original_len { + assert!(!field.get(i).unwrap()); + } else { + assert!(field.get(i).is_err()); + } + let previous = field.set(i, true); + if i < original_len { + assert!(!previous.unwrap()); + } else { + assert!(previous.is_none()); + } } } @@ -108,7 +139,7 @@ mod tests { #[test] fn test_get_from_bitfield() { - let field = BooleanBitfield::from(INPUT); + let field = BooleanBitfield::from_bytes(INPUT); let unset = field.get(0).unwrap(); assert!(!unset); let set = field.get(6).unwrap(); @@ -119,7 +150,7 @@ mod tests { #[test] fn test_set_for_bitfield() { - let mut field = BooleanBitfield::from(INPUT); + let mut field = BooleanBitfield::from_bytes(INPUT); let previous = field.set(10, true).unwrap(); assert!(!previous); let previous = field.get(10).unwrap(); @@ -132,7 +163,7 @@ mod tests { #[test] fn test_highest_set_bit() { - let field = BooleanBitfield::from(INPUT); + let field = BooleanBitfield::from_bytes(INPUT); assert_eq!(field.highest_set_bit().unwrap(), 14); let field = BooleanBitfield::new(); @@ -141,16 +172,16 @@ mod tests { #[test] fn test_len() { - let field = BooleanBitfield::from(INPUT); + let field = BooleanBitfield::from_bytes(INPUT); assert_eq!(field.len(), 16); let field = BooleanBitfield::new(); - assert_eq!(field.len(), 0); + assert_eq!(field.len(), 8); } #[test] fn test_num_set_bits() { - let field = BooleanBitfield::from(INPUT); + let field = BooleanBitfield::from_bytes(INPUT); assert_eq!(field.num_set_bits(), 2); let field = BooleanBitfield::new(); @@ -159,10 +190,27 @@ mod tests { #[test] fn test_to_bytes() { - let field = BooleanBitfield::from(INPUT); + let field = BooleanBitfield::from_bytes(INPUT); assert_eq!(field.to_bytes(), INPUT); let field = BooleanBitfield::new(); - assert_eq!(field.to_bytes(), vec![]); + assert_eq!(field.to_bytes(), vec![0]); + } + + #[test] + fn test_out_of_bounds() { + let mut field = BooleanBitfield::from_bytes(INPUT); + + let out_of_bounds_index = field.len(); + assert!(field.set(out_of_bounds_index, true).is_none()); + assert!(field.get(out_of_bounds_index).unwrap()); + + for i in 0..100 { + if i <= out_of_bounds_index { + assert!(field.set(i, true).is_some()); + } else { + assert!(field.set(i, true).is_none()); + } + } } } From f611602235bdf3019baa2df2dd0bcbcc383c9bcb Mon Sep 17 00:00:00 2001 From: Alex Stokes Date: Thu, 15 Nov 2018 09:20:46 -0800 Subject: [PATCH 09/20] Begin updating tests to reflect changes to bitfield --- beacon_chain/types/src/attestation_record.rs | 2 +- .../utils/ssz_helpers/src/attestation_ssz_splitter.rs | 4 ++-- beacon_chain/validation/src/signature_verification.rs | 2 +- .../validation/tests/attestation_validation/helpers.rs | 2 +- .../validation/tests/attestation_validation/tests.rs | 10 ++++------ 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/beacon_chain/types/src/attestation_record.rs b/beacon_chain/types/src/attestation_record.rs index 569cfa2dc5..ed38664a93 100644 --- a/beacon_chain/types/src/attestation_record.rs +++ b/beacon_chain/types/src/attestation_record.rs @@ -103,7 +103,7 @@ mod tests { shard_id: 9, oblique_parent_hashes: vec![Hash256::from(&vec![14; 32][..])], shard_block_hash: Hash256::from(&vec![15; 32][..]), - attester_bitfield: Bitfield::from(&vec![17; 42][..]), + attester_bitfield: Bitfield::from_bytes(&vec![17; 42][..]), justified_slot: 19, justified_block_hash: Hash256::from(&vec![15; 32][..]), aggregate_sig: AggregateSignature::new(), diff --git a/beacon_chain/utils/ssz_helpers/src/attestation_ssz_splitter.rs b/beacon_chain/utils/ssz_helpers/src/attestation_ssz_splitter.rs index 2c5b1d6aa5..a2f05ccb7d 100644 --- a/beacon_chain/utils/ssz_helpers/src/attestation_ssz_splitter.rs +++ b/beacon_chain/utils/ssz_helpers/src/attestation_ssz_splitter.rs @@ -62,7 +62,7 @@ mod tests { shard_id: 9, oblique_parent_hashes: vec![Hash256::from(&vec![14; 32][..])], shard_block_hash: Hash256::from(&vec![15; 32][..]), - attester_bitfield: Bitfield::from(&vec![17; 42][..]), + attester_bitfield: Bitfield::from_bytes(&vec![17; 42][..]), justified_slot: 19, justified_block_hash: Hash256::from(&vec![15; 32][..]), aggregate_sig: AggregateSignature::new(), @@ -72,7 +72,7 @@ mod tests { shard_id: 7, oblique_parent_hashes: vec![Hash256::from(&vec![15; 32][..])], shard_block_hash: Hash256::from(&vec![14; 32][..]), - attester_bitfield: Bitfield::from(&vec![19; 42][..]), + attester_bitfield: Bitfield::from_bytes(&vec![19; 42][..]), justified_slot: 15, justified_block_hash: Hash256::from(&vec![17; 32][..]), aggregate_sig: AggregateSignature::new(), diff --git a/beacon_chain/validation/src/signature_verification.rs b/beacon_chain/validation/src/signature_verification.rs index 18a4472675..59fa59dcb0 100644 --- a/beacon_chain/validation/src/signature_verification.rs +++ b/beacon_chain/validation/src/signature_verification.rs @@ -128,7 +128,7 @@ mod tests { all_keypairs.append(&mut non_signing_keypairs.clone()); let attestation_indices: Vec = (0..all_keypairs.len()).collect(); - let mut bitfield = Bitfield::new(); + let mut bitfield = Bitfield::from_elem(all_keypairs.len(), false); for i in 0..signing_keypairs.len() { bitfield.set(i, true).unwrap(); } diff --git a/beacon_chain/validation/tests/attestation_validation/helpers.rs b/beacon_chain/validation/tests/attestation_validation/helpers.rs index a9c92ca5a6..680f979da3 100644 --- a/beacon_chain/validation/tests/attestation_validation/helpers.rs +++ b/beacon_chain/validation/tests/attestation_validation/helpers.rs @@ -63,7 +63,7 @@ pub fn generate_attestation( signing_keys: &[Option], block_store: &BeaconBlockStore, ) -> AttestationRecord { - let mut attester_bitfield = Bitfield::new(); + let mut attester_bitfield = Bitfield::from_elem(signing_keys.len(), false); let mut aggregate_sig = AggregateSignature::new(); let parent_hashes_slice = { diff --git a/beacon_chain/validation/tests/attestation_validation/tests.rs b/beacon_chain/validation/tests/attestation_validation/tests.rs index 4ea954c89a..171d983f58 100644 --- a/beacon_chain/validation/tests/attestation_validation/tests.rs +++ b/beacon_chain/validation/tests/attestation_validation/tests.rs @@ -129,12 +129,10 @@ fn test_attestation_validation_invalid_bad_bitfield_length() { /* * Extend the bitfield by one byte * - * This is a little hacky and makes assumptions about the internals - * of the bitfield. + * We take advantage of the fact that setting a bit outside the current bounds will grow the bitvector. */ let one_byte_higher = rig.attester_count + 8; - rig.attestation.attester_bitfield.set(one_byte_higher, true).unwrap(); - rig.attestation.attester_bitfield.set(one_byte_higher, false).unwrap(); + rig.attestation.attester_bitfield.set(one_byte_higher, false); let result = rig.context.validate_attestation(&rig.attestation); assert_eq!(result, Err(AttestationValidationError::BadBitfieldLength)); @@ -145,7 +143,7 @@ fn test_attestation_validation_invalid_invalid_bitfield_end_bit() { let mut rig = generic_rig(); let one_bit_high = rig.attester_count + 1; - rig.attestation.attester_bitfield.set(one_bit_high, true).unwrap(); + rig.attestation.attester_bitfield.set(one_bit_high, true); let result = rig.context.validate_attestation(&rig.attestation); assert_eq!( @@ -169,7 +167,7 @@ fn test_attestation_validation_invalid_invalid_bitfield_end_bit_with_irreguar_bi */ let one_bit_high = rig.attester_count + 1; assert!(one_bit_high % 8 != 0, "the test is ineffective in this case."); - rig.attestation.attester_bitfield.set(one_bit_high, true).unwrap(); + rig.attestation.attester_bitfield.set(one_bit_high, true); let result = rig.context.validate_attestation(&rig.attestation); assert_eq!(result, Err(AttestationValidationError::InvalidBitfieldEndBits)); From 38d9d7ac928a68b2c5fb5bc67b42fc8a12f032a9 Mon Sep 17 00:00:00 2001 From: Alex Stokes Date: Tue, 20 Nov 2018 10:12:49 -0800 Subject: [PATCH 10/20] Add method to calculate the underlying number of bytes Required for part of attestation validation logic --- beacon_chain/utils/boolean-bitfield/src/lib.rs | 18 ++++++++++++++++++ .../validation/src/attestation_validation.rs | 6 +++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/beacon_chain/utils/boolean-bitfield/src/lib.rs b/beacon_chain/utils/boolean-bitfield/src/lib.rs index ceff3bbcf2..2a08930fec 100644 --- a/beacon_chain/utils/boolean-bitfield/src/lib.rs +++ b/beacon_chain/utils/boolean-bitfield/src/lib.rs @@ -75,6 +75,11 @@ impl BooleanBitfield { self.0.len() } + /// Returns the number of bytes required to represent this bitfield. + pub fn num_bytes(&self) -> usize { + self.to_bytes().len() + } + /// Returns the number of `1` bits in the bitfield pub fn num_set_bits(&self) -> usize { self.0.iter().filter(|&bit| bit).count() @@ -203,6 +208,7 @@ mod tests { let out_of_bounds_index = field.len(); assert!(field.set(out_of_bounds_index, true).is_none()); + assert!(field.len() == out_of_bounds_index + 1); assert!(field.get(out_of_bounds_index).unwrap()); for i in 0..100 { @@ -213,4 +219,16 @@ mod tests { } } } + + #[test] + fn test_num_bytes() { + let field = BooleanBitfield::from_bytes(INPUT); + assert_eq!(field.num_bytes(), 2); + + let field = BooleanBitfield::from_elem(2, true); + assert_eq!(field.num_bytes(), 1); + + let field = BooleanBitfield::from_elem(13, true); + assert_eq!(field.num_bytes(), 2); + } } diff --git a/beacon_chain/validation/src/attestation_validation.rs b/beacon_chain/validation/src/attestation_validation.rs index d63d74483e..da761432e5 100644 --- a/beacon_chain/validation/src/attestation_validation.rs +++ b/beacon_chain/validation/src/attestation_validation.rs @@ -199,6 +199,10 @@ where } } +fn bytes_for_bits(bits: usize) -> usize { + (bits.saturating_sub(1) / 8) + 1 +} + impl From for AttestationValidationError { fn from(e: ParentHashesError) -> Self { match e { @@ -243,4 +247,4 @@ impl From for AttestationValidationError { => AttestationValidationError::OutOfBoundsBitfieldIndex, } } -} +} \ No newline at end of file From 8c78dde43b9b13d7dbc0b7fab72c13e5174ad74e Mon Sep 17 00:00:00 2001 From: Alex Stokes Date: Tue, 20 Nov 2018 12:27:44 -0800 Subject: [PATCH 11/20] Fixes bug with `ssz` encoding of `BooleanBitfield` --- beacon_chain/types/src/attestation_record.rs | 2 +- .../utils/boolean-bitfield/src/lib.rs | 64 ++++++++++++++++++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/beacon_chain/types/src/attestation_record.rs b/beacon_chain/types/src/attestation_record.rs index ed38664a93..e4ddffd62d 100644 --- a/beacon_chain/types/src/attestation_record.rs +++ b/beacon_chain/types/src/attestation_record.rs @@ -31,7 +31,7 @@ impl Encodable for AttestationRecord { s.append(&self.shard_id); s.append_vec(&self.oblique_parent_hashes); s.append(&self.shard_block_hash); - s.append_vec(&self.attester_bitfield.to_bytes()); + s.append(&self.attester_bitfield); s.append(&self.justified_slot); s.append(&self.justified_block_hash); s.append_vec(&self.aggregate_sig.as_bytes()); diff --git a/beacon_chain/utils/boolean-bitfield/src/lib.rs b/beacon_chain/utils/boolean-bitfield/src/lib.rs index 2a08930fec..b90e4949e9 100644 --- a/beacon_chain/utils/boolean-bitfield/src/lib.rs +++ b/beacon_chain/utils/boolean-bitfield/src/lib.rs @@ -86,6 +86,7 @@ impl BooleanBitfield { } /// Returns a vector of bytes representing the bitfield + /// Note that this returns the bit layout of the underlying implementation in the `bit-vec` crate. pub fn to_bytes(&self) -> Vec { self.0.to_bytes() } @@ -99,6 +100,28 @@ impl default::Default for BooleanBitfield { } } +// borrowed from bit_vec crate +fn reverse_bits(byte: u8) -> u8 { + let mut result = 0; + for i in 0..8 { + result = result | ((byte >> i) & 1) << (7 - i); + } + result +} + +impl ssz::Encodable for BooleanBitfield { + // ssz_append encodes Self according to the `ssz` spec. + // Note that we have to flip the endianness of the encoding with `reverse_bits` to account for an implementation detail of `bit-vec` crate. + fn ssz_append(&self, s: &mut ssz::SszStream) { + let bytes: Vec = self + .to_bytes() + .iter() + .map(|&byte| reverse_bits(byte)) + .collect(); + s.append_vec(&bytes); + } +} + impl ssz::Decodable for BooleanBitfield { fn ssz_decode(bytes: &[u8], index: usize) -> Result<(Self, usize), ssz::DecodeError> { let len = ssz::decode::decode_length(bytes, index, ssz::LENGTH_BYTES)?; @@ -109,7 +132,18 @@ impl ssz::Decodable for BooleanBitfield { if len == 0 { Ok((BooleanBitfield::new(), index + ssz::LENGTH_BYTES)) } else { - let field = BooleanBitfield::from_bytes(&bytes[(index + 4)..(index + len + 4)]); + let bytes = &bytes[(index + 4)..(index + len + 4)]; + + let mut field = BooleanBitfield::from_elem(0, false); + for (byte_index, byte) in bytes.iter().enumerate() { + for i in 0..8 { + let bit = byte & (1 << i); + if bit != 0 { + field.set(8 * byte_index + i, true); + } + } + } + let index = index + ssz::LENGTH_BYTES + len; Ok((field, index)) } @@ -119,6 +153,7 @@ impl ssz::Decodable for BooleanBitfield { #[cfg(test)] mod tests { use super::*; + use ssz::SszStream; #[test] fn test_empty_bitfield() { @@ -231,4 +266,31 @@ mod tests { let field = BooleanBitfield::from_elem(13, true); assert_eq!(field.num_bytes(), 2); } + + #[test] + fn test_ssz_encode() { + let field = BooleanBitfield::from_elem(5, true); + + let mut stream = SszStream::new(); + stream.append(&field); + assert_eq!(stream.drain(), vec![0, 0, 0, 1, 31]); + + let field = BooleanBitfield::from_elem(18, true); + let mut stream = SszStream::new(); + stream.append(&field); + assert_eq!(stream.drain(), vec![0, 0, 0, 3, 255, 255, 3]); + } + + #[test] + fn test_ssz_decode() { + let encoded = vec![0, 0, 0, 1, 31]; + let (field, _): (BooleanBitfield, usize) = ssz::decode_ssz(&encoded, 0).unwrap(); + let expected = BooleanBitfield::from_elem(5, true); + assert_eq!(field, expected); + + let encoded = vec![0, 0, 0, 3, 255, 255, 3]; + let (field, _): (BooleanBitfield, usize) = ssz::decode_ssz(&encoded, 0).unwrap(); + let expected = BooleanBitfield::from_elem(18, true); + assert_eq!(field, expected); + } } From 9021227c1c2b223b5e2c2a2b73562f9e92512e68 Mon Sep 17 00:00:00 2001 From: Alex Stokes Date: Tue, 20 Nov 2018 12:51:51 -0800 Subject: [PATCH 12/20] Remove warning about unused import --- beacon_chain/types/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/beacon_chain/types/src/lib.rs b/beacon_chain/types/src/lib.rs index c531da2990..65c8b7bce7 100644 --- a/beacon_chain/types/src/lib.rs +++ b/beacon_chain/types/src/lib.rs @@ -14,7 +14,6 @@ pub mod special_record; pub mod validator_record; pub mod validator_registration; -use self::boolean_bitfield::BooleanBitfield; use self::ethereum_types::{ H256, H160, From db4c4bf223ef44ceb51c51465c7606a26521ce68 Mon Sep 17 00:00:00 2001 From: Alex Stokes Date: Tue, 20 Nov 2018 12:54:35 -0800 Subject: [PATCH 13/20] Run `cargo fmt` that got clobbered in merge --- beacon_chain/types/src/lib.rs | 6 +----- .../validation/src/attestation_validation.rs | 7 ++++--- .../tests/attestation_validation/tests.rs | 14 +++++++++++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/beacon_chain/types/src/lib.rs b/beacon_chain/types/src/lib.rs index 65c8b7bce7..c70d62555e 100644 --- a/beacon_chain/types/src/lib.rs +++ b/beacon_chain/types/src/lib.rs @@ -14,11 +14,7 @@ pub mod special_record; pub mod validator_record; pub mod validator_registration; -use self::ethereum_types::{ - H256, - H160, - U256 -}; +use self::ethereum_types::{H160, H256, U256}; use std::collections::HashMap; pub use active_state::ActiveState; diff --git a/beacon_chain/validation/src/attestation_validation.rs b/beacon_chain/validation/src/attestation_validation.rs index da761432e5..e31f3ae523 100644 --- a/beacon_chain/validation/src/attestation_validation.rs +++ b/beacon_chain/validation/src/attestation_validation.rs @@ -243,8 +243,9 @@ impl From for AttestationValidationError { AttestationValidationError::NoPublicKeyForValidator } SignatureVerificationError::DBError(s) => AttestationValidationError::DBError(s), - SignatureVerificationError::OutOfBoundsBitfieldIndex - => AttestationValidationError::OutOfBoundsBitfieldIndex, + SignatureVerificationError::OutOfBoundsBitfieldIndex => { + AttestationValidationError::OutOfBoundsBitfieldIndex + } } } -} \ No newline at end of file +} diff --git a/beacon_chain/validation/tests/attestation_validation/tests.rs b/beacon_chain/validation/tests/attestation_validation/tests.rs index 171d983f58..e4a86b3e08 100644 --- a/beacon_chain/validation/tests/attestation_validation/tests.rs +++ b/beacon_chain/validation/tests/attestation_validation/tests.rs @@ -132,7 +132,9 @@ fn test_attestation_validation_invalid_bad_bitfield_length() { * We take advantage of the fact that setting a bit outside the current bounds will grow the bitvector. */ let one_byte_higher = rig.attester_count + 8; - rig.attestation.attester_bitfield.set(one_byte_higher, false); + rig.attestation + .attester_bitfield + .set(one_byte_higher, false); let result = rig.context.validate_attestation(&rig.attestation); assert_eq!(result, Err(AttestationValidationError::BadBitfieldLength)); @@ -166,11 +168,17 @@ fn test_attestation_validation_invalid_invalid_bitfield_end_bit_with_irreguar_bi * bit in a bitfield and the byte length of that bitfield */ let one_bit_high = rig.attester_count + 1; - assert!(one_bit_high % 8 != 0, "the test is ineffective in this case."); + assert!( + one_bit_high % 8 != 0, + "the test is ineffective in this case." + ); rig.attestation.attester_bitfield.set(one_bit_high, true); let result = rig.context.validate_attestation(&rig.attestation); - assert_eq!(result, Err(AttestationValidationError::InvalidBitfieldEndBits)); + assert_eq!( + result, + Err(AttestationValidationError::InvalidBitfieldEndBits) + ); } #[test] From 7fa9c90d629dd57a9d1b20e62a885eba587582e3 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Fri, 23 Nov 2018 08:57:28 +1100 Subject: [PATCH 14/20] Add some extra tests for boolean-bitfield --- .../utils/boolean-bitfield/src/lib.rs | 52 ++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/beacon_chain/utils/boolean-bitfield/src/lib.rs b/beacon_chain/utils/boolean-bitfield/src/lib.rs index b90e4949e9..e0adc64ddf 100644 --- a/beacon_chain/utils/boolean-bitfield/src/lib.rs +++ b/beacon_chain/utils/boolean-bitfield/src/lib.rs @@ -11,7 +11,7 @@ use std::default; pub struct BooleanBitfield(BitVec); /// Error represents some reason a request against a bitfield was not satisfied -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum Error { /// OutOfBounds refers to indexing into a bitfield where no bits exist; returns the illegal index and the current size of the bitfield, respectively OutOfBounds(usize, usize), @@ -156,7 +156,7 @@ mod tests { use ssz::SszStream; #[test] - fn test_empty_bitfield() { + fn test_new_bitfield() { let mut field = BooleanBitfield::new(); let original_len = field.len(); @@ -175,6 +175,31 @@ mod tests { } } + #[test] + fn test_empty_bitfield() { + let mut field = BooleanBitfield::from_elem(0, false); + let original_len = field.len(); + + assert_eq!(original_len, 0); + + for i in 0..100 { + if i < original_len { + assert!(!field.get(i).unwrap()); + } else { + assert!(field.get(i).is_err()); + } + let previous = field.set(i, true); + if i < original_len { + assert!(!previous.unwrap()); + } else { + assert!(previous.is_none()); + } + } + + assert_eq!(field.len(), 100); + assert_eq!(field.num_set_bits(), 100); + } + const INPUT: &[u8] = &[0b0000_0010, 0b0000_0010]; #[test] @@ -206,6 +231,9 @@ mod tests { let field = BooleanBitfield::from_bytes(INPUT); assert_eq!(field.highest_set_bit().unwrap(), 14); + let field = BooleanBitfield::from_bytes(&[0b0000_0011]); + assert_eq!(field.highest_set_bit().unwrap(), 7); + let field = BooleanBitfield::new(); assert_eq!(field.highest_set_bit(), None); } @@ -255,6 +283,26 @@ mod tests { } } + #[test] + fn test_grows_with_false() { + let input_all_set: &[u8] = &[0b1111_1111, 0b1111_1111]; + let mut field = BooleanBitfield::from_bytes(input_all_set); + + // Define `a` and `b`, where both are out of bounds and `b` is greater than `a`. + let a = field.len(); + let b = a + 1; + + // Ensure `a` is out-of-bounds for test integrity. + assert!(field.get(a).is_err()); + + // Set `b` to `true`. Also, for test integrity, ensure it was previously out-of-bounds. + assert!(field.set(b, true).is_none()); + + // Ensure that `a` wasn't also set to `true` during the grow. + assert_eq!(field.get(a), Ok(false)); + assert_eq!(field.get(b), Ok(true)); + } + #[test] fn test_num_bytes() { let field = BooleanBitfield::from_bytes(INPUT); From fbb05946dde7a3eb3fc3a63b8eff655e4c234a74 Mon Sep 17 00:00:00 2001 From: mjkeating Date: Wed, 28 Nov 2018 12:13:25 -0800 Subject: [PATCH 15/20] tree_hash WIP --- beacon_chain/utils/ssz/src/tree_hash.rs | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 beacon_chain/utils/ssz/src/tree_hash.rs diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs new file mode 100644 index 0000000000..41d7a9e4d6 --- /dev/null +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -0,0 +1,36 @@ +extern crate blake2_rfc; + +use self::blake2_rfc::blake2s::blake2s; + +/** + * Extends data length to a power of 2 by minimally right-zero-padding + */ +fn extend_to_power_of_2(data: &mut Vec) { + let len = data.len(); + let new_len = len.next_power_of_two(); + if new_len > len { + data.append(&mut vec![0; new_len - len]); + } +} + +fn hash(data: Vec) -> Vec { + let result = blake2s(32, &[], data.as_slice()); + result.as_bytes().to_vec() +} + +// fn list_to_glob() {} +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extend_to_power_of_2() { + let mut data = vec![1, 2, 3, 4, 5]; + + // an array length of 5 should be extended to + // a length of 8 (the next power of 2) by right + // padding it with 3 zeros + extend_to_power_of_2(&mut data); + assert_eq!(data, [1, 2, 3, 4, 5, 0, 0, 0]); + } +} From f11c619ef5810bd1061094a0706455e774b61dca Mon Sep 17 00:00:00 2001 From: mjkeating Date: Tue, 4 Dec 2018 12:37:12 -0800 Subject: [PATCH 16/20] WIP for tree_hash --- beacon_chain/types/src/shard_and_committee.rs | 33 ++++ beacon_chain/types/src/validator_record.rs | 50 ++++++ beacon_chain/utils/ssz/Cargo.toml | 1 + beacon_chain/utils/ssz/src/impl_encode.rs | 18 ++- beacon_chain/utils/ssz/src/impl_tree_hash.rs | 67 ++++++++ beacon_chain/utils/ssz/src/lib.rs | 3 + beacon_chain/utils/ssz/src/tree_hash.rs | 147 ++++++++++++++++-- 7 files changed, 305 insertions(+), 14 deletions(-) create mode 100644 beacon_chain/utils/ssz/src/impl_tree_hash.rs diff --git a/beacon_chain/types/src/shard_and_committee.rs b/beacon_chain/types/src/shard_and_committee.rs index 44c2e57ffb..8388b9cbc2 100644 --- a/beacon_chain/types/src/shard_and_committee.rs +++ b/beacon_chain/types/src/shard_and_committee.rs @@ -1,3 +1,5 @@ +use super::ssz::{merkle_hash, TreeHash}; + #[derive(Clone, Debug, PartialEq)] pub struct ShardAndCommittee { pub shard: u16, @@ -15,6 +17,26 @@ impl ShardAndCommittee { } } +impl TreeHash for ShardAndCommittee { + // python sample code: + // def hash_shard_and_committee(val): + // committee = merkle_hash([x.to_bytes(3, 'big') for x in val.committee]) + // return hash(val.shard_id.to_bytes(2, 'big') + committee) + fn tree_hash(&self) -> Vec { + let mut committee_ssz_items = Vec::new(); + for c in &self.committee { + let mut h = (*c as u32).tree_hash(); + h.resize(3, 0); + committee_ssz_items.push(h); + } + let mut result = Vec::new(); + result.append(&mut self.shard.tree_hash()); + result.append(&mut merkle_hash(&mut committee_ssz_items)); + + result.tree_hash() + } +} + #[cfg(test)] mod tests { use super::*; @@ -25,4 +47,15 @@ mod tests { assert_eq!(s.shard, 0); assert_eq!(s.committee.len(), 0); } + + #[test] + fn test_shard_and_committee_tree_hash() { + let s = ShardAndCommittee { + shard: 1, + committee: vec![1, 2, 3], + }; + + // should test a known hash value + assert_eq!(s.tree_hash().len(), 32); + } } diff --git a/beacon_chain/types/src/validator_record.rs b/beacon_chain/types/src/validator_record.rs index 3a15baeecb..3d4a57e201 100644 --- a/beacon_chain/types/src/validator_record.rs +++ b/beacon_chain/types/src/validator_record.rs @@ -1,4 +1,5 @@ use super::bls::{Keypair, PublicKey}; +use super::ssz::TreeHash; use super::{Address, Hash256}; #[derive(Debug, PartialEq, Clone, Copy)] @@ -44,6 +45,46 @@ impl ValidatorRecord { } } +impl TreeHash for ValidatorRecord { + /* python sample code: + def hash_validator_record(val): + return hash(val.pubkey.to_bytes(32, 'big') + val.withdrawal_shard.to_bytes(2, 'big') + \ + val.withdrawal_address + val.randao_commitment + val.balance.to_bytes(16, 'big') + \ + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big')) + */ + fn tree_hash(&self) -> Vec { + // the serialized fields, to be hashed, should add up to 118 bytes in length. + // allocating it once here + let mut ssz = Vec::with_capacity(118); + + // "val.pubkey.to_bytes(32, 'big')" logic + // TODO: + // probably all kinds of wrong here. Not sure how to convert (szz) + // pubkey into a big-endian 32 byte array. Note: as_bytes(), the only method on + // PublicKey, returns a 192 byte array. + let pub_key_bytes = &mut self.pubkey.as_bytes(); + pub_key_bytes.resize(32, 0); + ssz.append(pub_key_bytes); + + ssz.append(&mut self.withdrawal_shard.tree_hash()); + ssz.append(&mut self.withdrawal_address.tree_hash()); + ssz.append(&mut self.randao_commitment.tree_hash()); + + // balance is a 64bit number that serializes to 8 bytes. + // Right padding here to resize to 16 bytes - not sure why + // a 16 byte array is implemented in the python code: "val.balance.to_bytes(16, 'big')" + let mut balance = self.balance.tree_hash(); + balance.resize(16, 0); + ssz.append(&mut balance); + + // TODO: + // ... + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big') + // Our ValidatorRecord seems to be missing the start_dynasty and end_dynasty fields + + ssz.tree_hash() + } +} + #[cfg(test)] mod tests { use super::*; @@ -59,4 +100,13 @@ mod tests { assert_eq!(v.status, 0); assert_eq!(v.exit_slot, 0); } + + #[test] + fn test_validator_record_ree_hash() { + let (v, _kp) = ValidatorRecord::zero_with_thread_rand_keypair(); + let h = v.tree_hash(); + + // TODO: should check a known hash result value + assert_eq!(h.len(), 32); + } } diff --git a/beacon_chain/utils/ssz/Cargo.toml b/beacon_chain/utils/ssz/Cargo.toml index aa4dc5d72c..ec91009274 100644 --- a/beacon_chain/utils/ssz/Cargo.toml +++ b/beacon_chain/utils/ssz/Cargo.toml @@ -6,3 +6,4 @@ authors = ["Paul Hauner "] [dependencies] bytes = "0.4.9" ethereum-types = "0.4.0" +blake2-rfc = "0.2.18" \ No newline at end of file diff --git a/beacon_chain/utils/ssz/src/impl_encode.rs b/beacon_chain/utils/ssz/src/impl_encode.rs index 3f366bdf3a..c9ca8b0064 100644 --- a/beacon_chain/utils/ssz/src/impl_encode.rs +++ b/beacon_chain/utils/ssz/src/impl_encode.rs @@ -1,7 +1,7 @@ extern crate bytes; use self::bytes::{BufMut, BytesMut}; -use super::ethereum_types::H256; +use super::ethereum_types::{Address, H256}; use super::{Encodable, SszStream}; /* @@ -48,10 +48,18 @@ impl_encodable_for_uint!(usize, 64); impl Encodable for H256 { fn ssz_append(&self, s: &mut SszStream) { + assert_eq!(32, self.len()); s.append_encoded_raw(&self.to_vec()); } } +impl Encodable for Address { + fn ssz_append(&self, s: &mut SszStream) { + assert_eq!(20, self.len()); + s.append_encoded_raw(&self) + } +} + #[cfg(test)] mod tests { use super::*; @@ -64,6 +72,14 @@ mod tests { assert_eq!(ssz.drain(), vec![0; 32]); } + #[test] + fn test_ssz_encode_adress() { + let h = Address::zero(); + let mut ssz = SszStream::new(); + ssz.append(&h); + assert_eq!(ssz.drain(), vec![0; 20]); + } + #[test] fn test_ssz_encode_u8() { let x: u8 = 0; diff --git a/beacon_chain/utils/ssz/src/impl_tree_hash.rs b/beacon_chain/utils/ssz/src/impl_tree_hash.rs new file mode 100644 index 0000000000..c3fccb2bc5 --- /dev/null +++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs @@ -0,0 +1,67 @@ +extern crate blake2_rfc; + +use self::blake2_rfc::blake2b::blake2b; + +use super::ethereum_types::{Address, H256}; +use super::{ssz_encode, TreeHash}; + +// I haven't added tests for tree_hash implementations that simply pass +// thru to the szz_encode lib for which tests already exist. Do we want +// test anyway? + +impl TreeHash for u8 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for u16 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for u32 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for u64 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for Address { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for H256 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +// hash byte arrays +impl TreeHash for [u8] { + fn tree_hash(&self) -> Vec { + hash(&self) + } +} + +/** + * From the Spec: + * We define hash(x) as BLAKE2b-512(x)[0:32] + * From the python sample code: + * return blake2b(x).digest()[:32] + * + * This was orginally writting for blake2s before it was changed to blake2b + * Perhaps, we should be using 'canonical_hash' in the hashing lib? + */ +fn hash(data: &[u8]) -> Vec { + let result = blake2b(32, &[], &data); + result.as_bytes().to_vec() +} diff --git a/beacon_chain/utils/ssz/src/lib.rs b/beacon_chain/utils/ssz/src/lib.rs index f3a195e427..9f71e36f19 100644 --- a/beacon_chain/utils/ssz/src/lib.rs +++ b/beacon_chain/utils/ssz/src/lib.rs @@ -12,12 +12,15 @@ extern crate ethereum_types; pub mod decode; pub mod encode; +pub mod tree_hash; mod impl_decode; mod impl_encode; +mod impl_tree_hash; pub use decode::{decode_ssz, decode_ssz_list, Decodable, DecodeError}; pub use encode::{Encodable, SszStream}; +pub use tree_hash::{merkle_hash, TreeHash}; pub const LENGTH_BYTES: usize = 4; pub const MAX_LIST_SIZE: usize = 1 << (4 * 8); diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 41d7a9e4d6..63b5740495 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -1,6 +1,123 @@ -extern crate blake2_rfc; +const CHUNKSIZE: usize = 128; +const HASHSIZE: usize = 32; -use self::blake2_rfc::blake2s::blake2s; +pub trait TreeHash { + // Note: it would be nice to have a default trait implementation here + // i.e. szz_encode(self) - but rust complains it does not know + // the size of 'self'. Not sure if there's a way around this. + + fn tree_hash(&self) -> Vec; +} + +// python example: Note - I'm seeing some inconsistencies +// between this and the 'Tree Hash' section in the SSZ spec. +// So, I imagine it will change. +/* def merkle_hash(lst): + # Concatenate list into data + if len(lst[0]) != next_power_of_2(len(lst[0])): + lst = [extend_to_power_of_2(x) for x in lst] + data = b''.join(lst) + # Add padding + data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE)) + assert len(data) % CHUNKSIZE == 0 + # Store length (to compensate for non-bijectiveness of padding) + datalen = len(lst).to_bytes(32, 'big') + # Convert to chunks + chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)] + chunkz = [None] * next_power_of_2(len(chunkz)) + chunkz + [b'\x00' * CHUNKSIZE] + for i in range(len(chunkz)//2 - 1, 0, -1): + chunkz[i] = hash(chunkz[i*2] + chunkz[i*2+1]) + return hash(chunkz[1] + datalen) */ + +/** + * Returns a 32 byte hash of 'list', a vector of byte vectors. + * Note that this will consume 'list'. + * */ +pub fn merkle_hash(list: &mut Vec>) -> Vec { + // flatten list + let data = &mut list_to_blob(list); + + // data should be divisible by CHUNKSIZE + assert_eq!(data.len() % CHUNKSIZE, 0); + + // get data_len as bytes. It will hashed will the merkle root + let dlen = data.len() as u64; + let data_len_bytes = &mut dlen.tree_hash(); + data_len_bytes.resize(32, 0); + + // merklize + // + // From the Spec: + // while len(chunkz) > 1: + // if len(chunkz) % 2 == 1: + // chunkz.append(b'\x00' * SSZ_CHUNK_SIZE) + // chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] + let mut mhash = hash_level(data, CHUNKSIZE); + while mhash.len() > HASHSIZE { + mhash = hash_level(&mut mhash, HASHSIZE); + } + + assert_eq!(mhash.len(), HASHSIZE); + + mhash.append(data_len_bytes); + mhash.tree_hash() +} + +/** + * Takes a flat vector of bytes. It then hashes (chunk_size * 2) into + * a byte vector of hashes, divisible by the 32 byte hashsize + */ +fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { + assert!(data.len() % chunk_size == 0); + + let mut result: Vec = Vec::new(); + for two_chunks in data.chunks(chunk_size * 2) { + if two_chunks.len() == chunk_size && data.len() > chunk_size { + // if there is only one chunk here, hash it with a zero-byte + // CHUNKSIZE vector + let mut c = two_chunks.to_vec(); + c.append(&mut vec![0; CHUNKSIZE]); + result.append(&mut c.tree_hash()); + } else { + result.append(&mut two_chunks.tree_hash()); + } + } + + result +} + +fn list_to_blob(list: &mut Vec>) -> Vec { + let mut data_len = 0; + if list[0].len().is_power_of_two() == false { + for x in list.iter_mut() { + extend_to_power_of_2(x); + data_len += x.len(); + } + } + + // do we need padding? + let extend_by = if data_len % CHUNKSIZE > 0 { + CHUNKSIZE - (data_len % CHUNKSIZE) + } else { + 0 + }; + + // allocate buffer and append each list element (flatten the vec of vecs) + data_len += extend_by; + let mut data: Vec = Vec::with_capacity(data_len); + for x in list.iter_mut() { + data.append(x); + } + + // add padding + let mut i = 0; + while i < extend_by { + data.push(0); + i += 1; + } + + data +} /** * Extends data length to a power of 2 by minimally right-zero-padding @@ -9,16 +126,10 @@ fn extend_to_power_of_2(data: &mut Vec) { let len = data.len(); let new_len = len.next_power_of_two(); if new_len > len { - data.append(&mut vec![0; new_len - len]); + data.resize(new_len, 0); } } -fn hash(data: Vec) -> Vec { - let result = blake2s(32, &[], data.as_slice()); - result.as_bytes().to_vec() -} - -// fn list_to_glob() {} #[cfg(test)] mod tests { use super::*; @@ -26,11 +137,21 @@ mod tests { #[test] fn test_extend_to_power_of_2() { let mut data = vec![1, 2, 3, 4, 5]; - - // an array length of 5 should be extended to - // a length of 8 (the next power of 2) by right - // padding it with 3 zeros extend_to_power_of_2(&mut data); assert_eq!(data, [1, 2, 3, 4, 5, 0, 0, 0]); } + + #[test] + fn test_merkle_hash() { + let data1 = vec![1; 100]; + let data2 = vec![2; 100]; + let data3 = vec![3; 100]; + let mut list = vec![data1, data2, data3]; + let result = merkle_hash(&mut list); + + //note: should test againt a known test hash value + assert_eq!(HASHSIZE, result.len()); + println!("merkle_hash: {:?}", result); + } + } From 68629acebea8a4ba683c7d2670ef7fac5f5cfba6 Mon Sep 17 00:00:00 2001 From: mjkeating Date: Sat, 8 Dec 2018 15:25:59 -0800 Subject: [PATCH 17/20] Added tree_hash impl for Vec and Hashtable (list and container); plus various cleanup code --- beacon_chain/types/src/shard_and_committee.rs | 4 - beacon_chain/types/src/validator_record.rs | 36 ++++---- beacon_chain/utils/ssz/src/impl_encode.rs | 4 +- beacon_chain/utils/ssz/src/impl_tree_hash.rs | 92 ++++++++++++++++--- beacon_chain/utils/ssz/src/tree_hash.rs | 51 +--------- 5 files changed, 99 insertions(+), 88 deletions(-) diff --git a/beacon_chain/types/src/shard_and_committee.rs b/beacon_chain/types/src/shard_and_committee.rs index 8388b9cbc2..9982611e8f 100644 --- a/beacon_chain/types/src/shard_and_committee.rs +++ b/beacon_chain/types/src/shard_and_committee.rs @@ -18,10 +18,6 @@ impl ShardAndCommittee { } impl TreeHash for ShardAndCommittee { - // python sample code: - // def hash_shard_and_committee(val): - // committee = merkle_hash([x.to_bytes(3, 'big') for x in val.committee]) - // return hash(val.shard_id.to_bytes(2, 'big') + committee) fn tree_hash(&self) -> Vec { let mut committee_ssz_items = Vec::new(); for c in &self.committee { diff --git a/beacon_chain/types/src/validator_record.rs b/beacon_chain/types/src/validator_record.rs index 3d4a57e201..1b18a06436 100644 --- a/beacon_chain/types/src/validator_record.rs +++ b/beacon_chain/types/src/validator_record.rs @@ -2,6 +2,16 @@ use super::bls::{Keypair, PublicKey}; use super::ssz::TreeHash; use super::{Address, Hash256}; +pub const HASH_SSZ_VALIDATOR_RECORD_LENGTH: usize = { + 32 + // pubkey.to_bytes(32, 'big') + 2 + // withdrawal_shard.to_bytes(2, 'big') + 20 + // withdrawal_address + 32 + // randao_commitment + 16 + // balance.to_bytes(16, 'big') + 16 + // start_dynasty.to_bytes(8, 'big') + 8 // end_dynasty.to_bytes(8, 'big') +}; + #[derive(Debug, PartialEq, Clone, Copy)] pub enum ValidatorStatus { PendingActivation = 0, @@ -46,22 +56,15 @@ impl ValidatorRecord { } impl TreeHash for ValidatorRecord { - /* python sample code: - def hash_validator_record(val): - return hash(val.pubkey.to_bytes(32, 'big') + val.withdrawal_shard.to_bytes(2, 'big') + \ - val.withdrawal_address + val.randao_commitment + val.balance.to_bytes(16, 'big') + \ - val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big')) - */ fn tree_hash(&self) -> Vec { - // the serialized fields, to be hashed, should add up to 118 bytes in length. - // allocating it once here - let mut ssz = Vec::with_capacity(118); + let mut ssz = Vec::with_capacity(HASH_SSZ_VALIDATOR_RECORD_LENGTH); - // "val.pubkey.to_bytes(32, 'big')" logic + // From python sample: "val.pubkey.to_bytes(32, 'big')" // TODO: - // probably all kinds of wrong here. Not sure how to convert (szz) - // pubkey into a big-endian 32 byte array. Note: as_bytes(), the only method on - // PublicKey, returns a 192 byte array. + // Need to actually convert (szz) pubkey into a big-endian 32 byte + // array. + // Also, our ValidatorRecord seems to be missing the start_dynasty + // and end_dynasty fields let pub_key_bytes = &mut self.pubkey.as_bytes(); pub_key_bytes.resize(32, 0); ssz.append(pub_key_bytes); @@ -70,17 +73,10 @@ impl TreeHash for ValidatorRecord { ssz.append(&mut self.withdrawal_address.tree_hash()); ssz.append(&mut self.randao_commitment.tree_hash()); - // balance is a 64bit number that serializes to 8 bytes. - // Right padding here to resize to 16 bytes - not sure why - // a 16 byte array is implemented in the python code: "val.balance.to_bytes(16, 'big')" let mut balance = self.balance.tree_hash(); balance.resize(16, 0); ssz.append(&mut balance); - // TODO: - // ... + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big') - // Our ValidatorRecord seems to be missing the start_dynasty and end_dynasty fields - ssz.tree_hash() } } diff --git a/beacon_chain/utils/ssz/src/impl_encode.rs b/beacon_chain/utils/ssz/src/impl_encode.rs index c9ca8b0064..8a05b97051 100644 --- a/beacon_chain/utils/ssz/src/impl_encode.rs +++ b/beacon_chain/utils/ssz/src/impl_encode.rs @@ -48,14 +48,12 @@ impl_encodable_for_uint!(usize, 64); impl Encodable for H256 { fn ssz_append(&self, s: &mut SszStream) { - assert_eq!(32, self.len()); s.append_encoded_raw(&self.to_vec()); } } impl Encodable for Address { fn ssz_append(&self, s: &mut SszStream) { - assert_eq!(20, self.len()); s.append_encoded_raw(&self) } } @@ -73,7 +71,7 @@ mod tests { } #[test] - fn test_ssz_encode_adress() { + fn test_ssz_encode_address() { let h = Address::zero(); let mut ssz = SszStream::new(); ssz.append(&h); diff --git a/beacon_chain/utils/ssz/src/impl_tree_hash.rs b/beacon_chain/utils/ssz/src/impl_tree_hash.rs index c3fccb2bc5..8c5a5d17a7 100644 --- a/beacon_chain/utils/ssz/src/impl_tree_hash.rs +++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs @@ -1,13 +1,11 @@ extern crate blake2_rfc; use self::blake2_rfc::blake2b::blake2b; - use super::ethereum_types::{Address, H256}; use super::{ssz_encode, TreeHash}; - -// I haven't added tests for tree_hash implementations that simply pass -// thru to the szz_encode lib for which tests already exist. Do we want -// test anyway? +use std::cmp::Ord; +use std::collections::HashMap; +use std::hash::Hash; impl TreeHash for u8 { fn tree_hash(&self) -> Vec { @@ -45,23 +43,87 @@ impl TreeHash for H256 { } } -// hash byte arrays impl TreeHash for [u8] { fn tree_hash(&self) -> Vec { hash(&self) } } -/** - * From the Spec: - * We define hash(x) as BLAKE2b-512(x)[0:32] - * From the python sample code: - * return blake2b(x).digest()[:32] - * - * This was orginally writting for blake2s before it was changed to blake2b - * Perhaps, we should be using 'canonical_hash' in the hashing lib? - */ +impl TreeHash for Vec +where + T: TreeHash, +{ + /// Appends the tree_hash for each value of 'self' into a byte array + /// and returns the hash of said byte array + fn tree_hash(&self) -> Vec { + let mut result = Vec::new(); + for x in self { + result.append(&mut x.tree_hash()); + } + + hash(&result) + } +} + +impl TreeHash for HashMap +where + K: Eq, + K: Hash, + K: Ord, + V: TreeHash, +{ + /// Appends the tree_hash for each value of 'self, sorted by key, + /// into a byte array and returns the hash of said byte array + fn tree_hash(&self) -> Vec { + let mut items: Vec<_> = self.iter().collect(); + items.sort_by(|a, b| a.0.cmp(b.0)); + let mut result = Vec::new(); + for item in items { + result.append(&mut item.1.tree_hash()); + } + + hash(&result) + } +} + +/// From the Spec: +/// We define hash(x) as BLAKE2b-512(x)[0:32] fn hash(data: &[u8]) -> Vec { let result = blake2b(32, &[], &data); result.as_bytes().to_vec() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_impl_tree_hash_vec() { + let result = vec![1u32, 2, 3, 4, 5, 6, 7].tree_hash(); + assert_eq!(result.len(), 32); + } + + #[test] + fn test_impl_tree_hash_hashmap() { + let mut map = HashMap::new(); + map.insert("c", 3); + map.insert("b", 2); + map.insert("g", 7); + map.insert("d", 6); + map.insert("e", 4); + map.insert("a", 1u32); + map.insert("f", 5); + let result = map.tree_hash(); + + // TODO: resolve inconsistencies between the python sample code and + // the spec; and create tests that tie-out to an offical result + assert_eq!( + result, + [ + 59, 110, 242, 24, 177, 184, 73, 109, 190, 19, 172, 39, 74, 94, 224, 198, 0, 170, + 225, 152, 249, 59, 10, 76, 137, 124, 52, 159, 37, 42, 26, 157 + ] + ); + } + +} diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 63b5740495..60a052b354 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -2,71 +2,32 @@ const CHUNKSIZE: usize = 128; const HASHSIZE: usize = 32; pub trait TreeHash { - // Note: it would be nice to have a default trait implementation here - // i.e. szz_encode(self) - but rust complains it does not know - // the size of 'self'. Not sure if there's a way around this. - fn tree_hash(&self) -> Vec; } -// python example: Note - I'm seeing some inconsistencies -// between this and the 'Tree Hash' section in the SSZ spec. -// So, I imagine it will change. -/* def merkle_hash(lst): - # Concatenate list into data - if len(lst[0]) != next_power_of_2(len(lst[0])): - lst = [extend_to_power_of_2(x) for x in lst] - data = b''.join(lst) - # Add padding - data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE)) - assert len(data) % CHUNKSIZE == 0 - # Store length (to compensate for non-bijectiveness of padding) - datalen = len(lst).to_bytes(32, 'big') - # Convert to chunks - chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)] - chunkz = [None] * next_power_of_2(len(chunkz)) + chunkz + [b'\x00' * CHUNKSIZE] - for i in range(len(chunkz)//2 - 1, 0, -1): - chunkz[i] = hash(chunkz[i*2] + chunkz[i*2+1]) - return hash(chunkz[1] + datalen) */ - -/** - * Returns a 32 byte hash of 'list', a vector of byte vectors. - * Note that this will consume 'list'. - * */ +/// Returns a 32 byte hash of 'list' - a vector of byte vectors. +/// Note that this will consume 'list'. pub fn merkle_hash(list: &mut Vec>) -> Vec { // flatten list let data = &mut list_to_blob(list); - // data should be divisible by CHUNKSIZE - assert_eq!(data.len() % CHUNKSIZE, 0); - // get data_len as bytes. It will hashed will the merkle root let dlen = data.len() as u64; let data_len_bytes = &mut dlen.tree_hash(); data_len_bytes.resize(32, 0); // merklize - // - // From the Spec: - // while len(chunkz) > 1: - // if len(chunkz) % 2 == 1: - // chunkz.append(b'\x00' * SSZ_CHUNK_SIZE) - // chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] let mut mhash = hash_level(data, CHUNKSIZE); while mhash.len() > HASHSIZE { mhash = hash_level(&mut mhash, HASHSIZE); } - assert_eq!(mhash.len(), HASHSIZE); - mhash.append(data_len_bytes); mhash.tree_hash() } -/** - * Takes a flat vector of bytes. It then hashes (chunk_size * 2) into - * a byte vector of hashes, divisible by the 32 byte hashsize - */ +/// Takes a flat vector of bytes. It then hashes 'chunk_size * 2' slices into +/// a byte vector of hashes, divisible by HASHSIZE fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { assert!(data.len() % chunk_size == 0); @@ -119,9 +80,7 @@ fn list_to_blob(list: &mut Vec>) -> Vec { data } -/** - * Extends data length to a power of 2 by minimally right-zero-padding - */ +/// Extends data length to a power of 2 by minimally right-zero-padding fn extend_to_power_of_2(data: &mut Vec) { let len = data.len(); let new_len = len.next_power_of_two(); From c961c87307b89cdff552a1437b3699bf09f91aea Mon Sep 17 00:00:00 2001 From: mjkeating Date: Sun, 9 Dec 2018 10:12:41 -0800 Subject: [PATCH 18/20] fixed tree_hash() for Vec and a couple of other issues --- beacon_chain/types/src/shard_and_committee.rs | 2 +- beacon_chain/types/src/validator_record.rs | 2 +- beacon_chain/utils/ssz/src/impl_tree_hash.rs | 16 +++++++--------- beacon_chain/utils/ssz/src/tree_hash.rs | 19 ++++++------------- 4 files changed, 15 insertions(+), 24 deletions(-) diff --git a/beacon_chain/types/src/shard_and_committee.rs b/beacon_chain/types/src/shard_and_committee.rs index 9982611e8f..c7fed2e8d2 100644 --- a/beacon_chain/types/src/shard_and_committee.rs +++ b/beacon_chain/types/src/shard_and_committee.rs @@ -29,7 +29,7 @@ impl TreeHash for ShardAndCommittee { result.append(&mut self.shard.tree_hash()); result.append(&mut merkle_hash(&mut committee_ssz_items)); - result.tree_hash() + result.as_slice().tree_hash() } } diff --git a/beacon_chain/types/src/validator_record.rs b/beacon_chain/types/src/validator_record.rs index 1b18a06436..799e0137f6 100644 --- a/beacon_chain/types/src/validator_record.rs +++ b/beacon_chain/types/src/validator_record.rs @@ -77,7 +77,7 @@ impl TreeHash for ValidatorRecord { balance.resize(16, 0); ssz.append(&mut balance); - ssz.tree_hash() + ssz.as_slice().tree_hash() } } diff --git a/beacon_chain/utils/ssz/src/impl_tree_hash.rs b/beacon_chain/utils/ssz/src/impl_tree_hash.rs index 8c5a5d17a7..dfa6da2b28 100644 --- a/beacon_chain/utils/ssz/src/impl_tree_hash.rs +++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs @@ -2,7 +2,7 @@ extern crate blake2_rfc; use self::blake2_rfc::blake2b::blake2b; use super::ethereum_types::{Address, H256}; -use super::{ssz_encode, TreeHash}; +use super::{merkle_hash, ssz_encode, TreeHash}; use std::cmp::Ord; use std::collections::HashMap; use std::hash::Hash; @@ -53,15 +53,13 @@ impl TreeHash for Vec where T: TreeHash, { - /// Appends the tree_hash for each value of 'self' into a byte array - /// and returns the hash of said byte array + /// Returns the merkle_hash of a list of tree_hash values created + /// from the given list. + /// Note: A byte vector, Vec, must be converted to a slice (as_slice()) + /// to be handled properly (i.e. hashed) as byte array. fn tree_hash(&self) -> Vec { - let mut result = Vec::new(); - for x in self { - result.append(&mut x.tree_hash()); - } - - hash(&result) + let mut tree_hashes = self.iter().map(|x| x.tree_hash()).collect(); + merkle_hash(&mut tree_hashes) } } diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 60a052b354..31e1d3720c 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -23,14 +23,12 @@ pub fn merkle_hash(list: &mut Vec>) -> Vec { } mhash.append(data_len_bytes); - mhash.tree_hash() + mhash.as_slice().tree_hash() } /// Takes a flat vector of bytes. It then hashes 'chunk_size * 2' slices into /// a byte vector of hashes, divisible by HASHSIZE fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { - assert!(data.len() % chunk_size == 0); - let mut result: Vec = Vec::new(); for two_chunks in data.chunks(chunk_size * 2) { if two_chunks.len() == chunk_size && data.len() > chunk_size { @@ -38,7 +36,7 @@ fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { // CHUNKSIZE vector let mut c = two_chunks.to_vec(); c.append(&mut vec![0; CHUNKSIZE]); - result.append(&mut c.tree_hash()); + result.append(&mut c.as_slice().tree_hash()); } else { result.append(&mut two_chunks.tree_hash()); } @@ -48,14 +46,14 @@ fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { } fn list_to_blob(list: &mut Vec>) -> Vec { - let mut data_len = 0; if list[0].len().is_power_of_two() == false { for x in list.iter_mut() { extend_to_power_of_2(x); - data_len += x.len(); } } + let mut data_len = list[0].len() * list.len(); + // do we need padding? let extend_by = if data_len % CHUNKSIZE > 0 { CHUNKSIZE - (data_len % CHUNKSIZE) @@ -63,6 +61,8 @@ fn list_to_blob(list: &mut Vec>) -> Vec { 0 }; + println!("data_len {}, extend_by {}", data_len, extend_by); + // allocate buffer and append each list element (flatten the vec of vecs) data_len += extend_by; let mut data: Vec = Vec::with_capacity(data_len); @@ -93,13 +93,6 @@ fn extend_to_power_of_2(data: &mut Vec) { mod tests { use super::*; - #[test] - fn test_extend_to_power_of_2() { - let mut data = vec![1, 2, 3, 4, 5]; - extend_to_power_of_2(&mut data); - assert_eq!(data, [1, 2, 3, 4, 5, 0, 0, 0]); - } - #[test] fn test_merkle_hash() { let data1 = vec![1; 100]; From 1e4e92bf2eb33b41773c94967786107bcbe29b1d Mon Sep 17 00:00:00 2001 From: mjkeating Date: Sun, 9 Dec 2018 14:43:41 -0800 Subject: [PATCH 19/20] removed a debugging println statement --- beacon_chain/utils/ssz/src/tree_hash.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 31e1d3720c..0375d207c1 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -61,8 +61,6 @@ fn list_to_blob(list: &mut Vec>) -> Vec { 0 }; - println!("data_len {}, extend_by {}", data_len, extend_by); - // allocate buffer and append each list element (flatten the vec of vecs) data_len += extend_by; let mut data: Vec = Vec::with_capacity(data_len); From be2c82a732029d83e786512d5103f42628a7bcb4 Mon Sep 17 00:00:00 2001 From: mjkeating Date: Wed, 12 Dec 2018 13:48:54 -0800 Subject: [PATCH 20/20] updated with latest spec changes --- beacon_chain/types/src/shard_and_committee.rs | 29 -------- beacon_chain/types/src/validator_record.rs | 46 ------------ beacon_chain/utils/ssz/Cargo.toml | 2 +- beacon_chain/utils/ssz/src/impl_tree_hash.rs | 16 ++--- beacon_chain/utils/ssz/src/tree_hash.rs | 71 +++++++------------ 5 files changed, 34 insertions(+), 130 deletions(-) diff --git a/beacon_chain/types/src/shard_and_committee.rs b/beacon_chain/types/src/shard_and_committee.rs index c7fed2e8d2..44c2e57ffb 100644 --- a/beacon_chain/types/src/shard_and_committee.rs +++ b/beacon_chain/types/src/shard_and_committee.rs @@ -1,5 +1,3 @@ -use super::ssz::{merkle_hash, TreeHash}; - #[derive(Clone, Debug, PartialEq)] pub struct ShardAndCommittee { pub shard: u16, @@ -17,22 +15,6 @@ impl ShardAndCommittee { } } -impl TreeHash for ShardAndCommittee { - fn tree_hash(&self) -> Vec { - let mut committee_ssz_items = Vec::new(); - for c in &self.committee { - let mut h = (*c as u32).tree_hash(); - h.resize(3, 0); - committee_ssz_items.push(h); - } - let mut result = Vec::new(); - result.append(&mut self.shard.tree_hash()); - result.append(&mut merkle_hash(&mut committee_ssz_items)); - - result.as_slice().tree_hash() - } -} - #[cfg(test)] mod tests { use super::*; @@ -43,15 +25,4 @@ mod tests { assert_eq!(s.shard, 0); assert_eq!(s.committee.len(), 0); } - - #[test] - fn test_shard_and_committee_tree_hash() { - let s = ShardAndCommittee { - shard: 1, - committee: vec![1, 2, 3], - }; - - // should test a known hash value - assert_eq!(s.tree_hash().len(), 32); - } } diff --git a/beacon_chain/types/src/validator_record.rs b/beacon_chain/types/src/validator_record.rs index 799e0137f6..3a15baeecb 100644 --- a/beacon_chain/types/src/validator_record.rs +++ b/beacon_chain/types/src/validator_record.rs @@ -1,17 +1,6 @@ use super::bls::{Keypair, PublicKey}; -use super::ssz::TreeHash; use super::{Address, Hash256}; -pub const HASH_SSZ_VALIDATOR_RECORD_LENGTH: usize = { - 32 + // pubkey.to_bytes(32, 'big') - 2 + // withdrawal_shard.to_bytes(2, 'big') - 20 + // withdrawal_address - 32 + // randao_commitment - 16 + // balance.to_bytes(16, 'big') - 16 + // start_dynasty.to_bytes(8, 'big') - 8 // end_dynasty.to_bytes(8, 'big') -}; - #[derive(Debug, PartialEq, Clone, Copy)] pub enum ValidatorStatus { PendingActivation = 0, @@ -55,32 +44,6 @@ impl ValidatorRecord { } } -impl TreeHash for ValidatorRecord { - fn tree_hash(&self) -> Vec { - let mut ssz = Vec::with_capacity(HASH_SSZ_VALIDATOR_RECORD_LENGTH); - - // From python sample: "val.pubkey.to_bytes(32, 'big')" - // TODO: - // Need to actually convert (szz) pubkey into a big-endian 32 byte - // array. - // Also, our ValidatorRecord seems to be missing the start_dynasty - // and end_dynasty fields - let pub_key_bytes = &mut self.pubkey.as_bytes(); - pub_key_bytes.resize(32, 0); - ssz.append(pub_key_bytes); - - ssz.append(&mut self.withdrawal_shard.tree_hash()); - ssz.append(&mut self.withdrawal_address.tree_hash()); - ssz.append(&mut self.randao_commitment.tree_hash()); - - let mut balance = self.balance.tree_hash(); - balance.resize(16, 0); - ssz.append(&mut balance); - - ssz.as_slice().tree_hash() - } -} - #[cfg(test)] mod tests { use super::*; @@ -96,13 +59,4 @@ mod tests { assert_eq!(v.status, 0); assert_eq!(v.exit_slot, 0); } - - #[test] - fn test_validator_record_ree_hash() { - let (v, _kp) = ValidatorRecord::zero_with_thread_rand_keypair(); - let h = v.tree_hash(); - - // TODO: should check a known hash result value - assert_eq!(h.len(), 32); - } } diff --git a/beacon_chain/utils/ssz/Cargo.toml b/beacon_chain/utils/ssz/Cargo.toml index ec91009274..d70a692b6a 100644 --- a/beacon_chain/utils/ssz/Cargo.toml +++ b/beacon_chain/utils/ssz/Cargo.toml @@ -6,4 +6,4 @@ authors = ["Paul Hauner "] [dependencies] bytes = "0.4.9" ethereum-types = "0.4.0" -blake2-rfc = "0.2.18" \ No newline at end of file +hashing = { path = "../hashing" } \ No newline at end of file diff --git a/beacon_chain/utils/ssz/src/impl_tree_hash.rs b/beacon_chain/utils/ssz/src/impl_tree_hash.rs index dfa6da2b28..9d86da2dda 100644 --- a/beacon_chain/utils/ssz/src/impl_tree_hash.rs +++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs @@ -1,6 +1,6 @@ -extern crate blake2_rfc; +extern crate hashing; -use self::blake2_rfc::blake2b::blake2b; +use self::hashing::canonical_hash; use super::ethereum_types::{Address, H256}; use super::{merkle_hash, ssz_encode, TreeHash}; use std::cmp::Ord; @@ -84,11 +84,8 @@ where } } -/// From the Spec: -/// We define hash(x) as BLAKE2b-512(x)[0:32] fn hash(data: &[u8]) -> Vec { - let result = blake2b(32, &[], &data); - result.as_bytes().to_vec() + canonical_hash(data) } #[cfg(test)] @@ -113,13 +110,12 @@ mod tests { map.insert("f", 5); let result = map.tree_hash(); - // TODO: resolve inconsistencies between the python sample code and - // the spec; and create tests that tie-out to an offical result + // TODO: create tests that tie-out to an offical result assert_eq!( result, [ - 59, 110, 242, 24, 177, 184, 73, 109, 190, 19, 172, 39, 74, 94, 224, 198, 0, 170, - 225, 152, 249, 59, 10, 76, 137, 124, 52, 159, 37, 42, 26, 157 + 232, 63, 235, 91, 115, 69, 159, 54, 95, 239, 147, 30, 179, 96, 232, 210, 225, 31, + 12, 95, 149, 104, 134, 158, 45, 51, 20, 101, 202, 164, 200, 163 ] ); } diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 0375d207c1..33bece9c4c 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -1,4 +1,4 @@ -const CHUNKSIZE: usize = 128; +const SSZ_CHUNK_SIZE: usize = 128; const HASHSIZE: usize = 32; pub trait TreeHash { @@ -9,15 +9,15 @@ pub trait TreeHash { /// Note that this will consume 'list'. pub fn merkle_hash(list: &mut Vec>) -> Vec { // flatten list - let data = &mut list_to_blob(list); + let (chunk_size, mut data) = list_to_blob(list); // get data_len as bytes. It will hashed will the merkle root - let dlen = data.len() as u64; + let dlen = list.len() as u64; let data_len_bytes = &mut dlen.tree_hash(); data_len_bytes.resize(32, 0); // merklize - let mut mhash = hash_level(data, CHUNKSIZE); + let mut mhash = hash_level(&mut data, chunk_size); while mhash.len() > HASHSIZE { mhash = hash_level(&mut mhash, HASHSIZE); } @@ -33,9 +33,9 @@ fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { for two_chunks in data.chunks(chunk_size * 2) { if two_chunks.len() == chunk_size && data.len() > chunk_size { // if there is only one chunk here, hash it with a zero-byte - // CHUNKSIZE vector + // SSZ_CHUNK_SIZE vector let mut c = two_chunks.to_vec(); - c.append(&mut vec![0; CHUNKSIZE]); + c.append(&mut vec![0; SSZ_CHUNK_SIZE]); result.append(&mut c.as_slice().tree_hash()); } else { result.append(&mut two_chunks.tree_hash()); @@ -45,46 +45,30 @@ fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { result } -fn list_to_blob(list: &mut Vec>) -> Vec { - if list[0].len().is_power_of_two() == false { - for x in list.iter_mut() { - extend_to_power_of_2(x); +fn list_to_blob(list: &mut Vec>) -> (usize, Vec) { + let chunk_size = if list.is_empty() { + SSZ_CHUNK_SIZE + } else if list[0].len() < SSZ_CHUNK_SIZE { + let items_per_chunk = SSZ_CHUNK_SIZE / list[0].len(); + items_per_chunk * list[0].len() + } else { + list[0].len() + }; + + let mut data = Vec::new(); + if list.is_empty() { + // handle and empty list + data.append(&mut vec![0; SSZ_CHUNK_SIZE]); + } else { + // just create a blob here; we'll divide into + // chunked slices when we merklize + data.reserve(list[0].len() * list.len()); + for item in list.iter_mut() { + data.append(item); } } - let mut data_len = list[0].len() * list.len(); - - // do we need padding? - let extend_by = if data_len % CHUNKSIZE > 0 { - CHUNKSIZE - (data_len % CHUNKSIZE) - } else { - 0 - }; - - // allocate buffer and append each list element (flatten the vec of vecs) - data_len += extend_by; - let mut data: Vec = Vec::with_capacity(data_len); - for x in list.iter_mut() { - data.append(x); - } - - // add padding - let mut i = 0; - while i < extend_by { - data.push(0); - i += 1; - } - - data -} - -/// Extends data length to a power of 2 by minimally right-zero-padding -fn extend_to_power_of_2(data: &mut Vec) { - let len = data.len(); - let new_len = len.next_power_of_two(); - if new_len > len { - data.resize(new_len, 0); - } + (chunk_size, data) } #[cfg(test)] @@ -103,5 +87,4 @@ mod tests { assert_eq!(HASHSIZE, result.len()); println!("merkle_hash: {:?}", result); } - }