Create cached_tree_hash crate.

This commit is contained in:
Paul Hauner
2019-04-26 09:55:03 +10:00
parent 827e1c62d9
commit b213a5ade4
22 changed files with 800 additions and 1778 deletions

View File

@@ -1,156 +0,0 @@
use super::*;
use hashing::hash;
use int_to_bytes::int_to_bytes32;
use std::ops::Range;
pub mod btree_overlay;
pub mod impls;
pub mod resize;
pub mod tree_hash_cache;
pub use btree_overlay::BTreeOverlay;
pub use tree_hash_cache::TreeHashCache;
#[derive(Debug, PartialEq)]
pub struct CachedTreeHasher {
cache: TreeHashCache,
}
impl CachedTreeHasher {
pub fn new<T>(item: &T) -> Result<Self, Error>
where
T: CachedTreeHash<T>,
{
Ok(Self {
cache: TreeHashCache::new(item, 0)?,
})
}
pub fn update<T>(&mut self, item: &T) -> Result<(), Error>
where
T: CachedTreeHash<T>,
{
// Reset the per-hash counters.
self.cache.chunk_index = 0;
self.cache.overlay_index = 0;
// Reset the "modified" flags for the cache.
self.cache.reset_modifications();
// Update the cache with the (maybe) changed object.
item.update_tree_hash_cache(&mut self.cache)?;
Ok(())
}
pub fn tree_hash_root(&self) -> Result<Vec<u8>, Error> {
// Return the root of the cache -- the merkle root.
Ok(self.cache.root()?.to_vec())
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Error {
ShouldNotProduceBTreeOverlay,
NoFirstNode,
NoBytesForRoot,
UnableToObtainSlices,
UnableToGrowMerkleTree,
UnableToShrinkMerkleTree,
TreeCannotHaveZeroNodes,
ShouldNeverBePacked(TreeHashType),
BytesAreNotEvenChunks(usize),
NoModifiedFieldForChunk(usize),
NoBytesForChunk(usize),
NoOverlayForIndex(usize),
NotLeafNode(usize),
}
pub trait CachedTreeHash<Item>: TreeHash {
fn tree_hash_cache_overlay(
&self,
chunk_offset: usize,
depth: usize,
) -> Result<BTreeOverlay, Error>;
fn num_tree_hash_cache_chunks(&self) -> usize;
fn new_tree_hash_cache(&self, depth: usize) -> Result<TreeHashCache, Error>;
fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error>;
}
fn children(parent: usize) -> (usize, usize) {
((2 * parent + 1), (2 * parent + 2))
}
fn node_range_to_byte_range(node_range: &Range<usize>) -> Range<usize> {
node_range.start * HASHSIZE..node_range.end * HASHSIZE
}
/// Split `values` into a power-of-two, identical-length chunks (padding with `0`) and merkleize
/// them, returning the entire merkle tree.
///
/// The root hash is `merkleize(values)[0..BYTES_PER_CHUNK]`.
pub fn merkleize(values: Vec<u8>) -> Vec<u8> {
let values = sanitise_bytes(values);
let leaves = values.len() / HASHSIZE;
if leaves == 0 {
panic!("No full leaves");
}
if !leaves.is_power_of_two() {
panic!("leaves is not power of two");
}
let mut o: Vec<u8> = vec![0; (num_nodes(leaves) - leaves) * HASHSIZE];
o.append(&mut values.to_vec());
let mut i = o.len();
let mut j = o.len() - values.len();
while i >= MERKLE_HASH_CHUNCK {
i -= MERKLE_HASH_CHUNCK;
let hash = hash(&o[i..i + MERKLE_HASH_CHUNCK]);
j -= HASHSIZE;
o[j..j + HASHSIZE].copy_from_slice(&hash);
}
o
}
pub fn sanitise_bytes(mut bytes: Vec<u8>) -> Vec<u8> {
let present_leaves = num_unsanitized_leaves(bytes.len());
let required_leaves = present_leaves.next_power_of_two();
if (present_leaves != required_leaves) | last_leaf_needs_padding(bytes.len()) {
bytes.resize(num_bytes(required_leaves), 0);
}
bytes
}
fn pad_for_leaf_count(num_leaves: usize, bytes: &mut Vec<u8>) {
let required_leaves = num_leaves.next_power_of_two();
bytes.resize(
bytes.len() + (required_leaves - num_leaves) * BYTES_PER_CHUNK,
0,
);
}
fn last_leaf_needs_padding(num_bytes: usize) -> bool {
num_bytes % HASHSIZE != 0
}
/// Rounds up
fn num_unsanitized_leaves(num_bytes: usize) -> usize {
(num_bytes + HASHSIZE - 1) / HASHSIZE
}
fn num_bytes(num_leaves: usize) -> usize {
num_leaves * HASHSIZE
}

View File

@@ -1,243 +0,0 @@
use super::*;
#[derive(Debug, PartialEq, Clone)]
pub struct BTreeOverlay {
pub offset: usize,
pub depth: usize,
pub num_items: usize,
pub lengths: Vec<usize>,
}
impl BTreeOverlay {
pub fn new<T>(item: &T, initial_offset: usize, depth: usize) -> Result<Self, Error>
where
T: CachedTreeHash<T>,
{
item.tree_hash_cache_overlay(initial_offset, depth)
}
pub fn from_lengths(
offset: usize,
num_items: usize,
depth: usize,
lengths: Vec<usize>,
) -> Result<Self, Error> {
if lengths.is_empty() {
Err(Error::TreeCannotHaveZeroNodes)
} else {
Ok(Self {
offset,
num_items,
depth,
lengths,
})
}
}
pub fn num_leaf_nodes(&self) -> usize {
self.lengths.len().next_power_of_two()
}
pub fn num_padding_leaves(&self) -> usize {
self.num_leaf_nodes() - self.lengths.len()
}
/// Returns the number of nodes in the tree.
///
/// Note: this is distinct from `num_chunks`, which returns the total number of chunks in
/// this tree.
pub fn num_nodes(&self) -> usize {
2 * self.num_leaf_nodes() - 1
}
pub fn num_internal_nodes(&self) -> usize {
self.num_leaf_nodes() - 1
}
fn first_node(&self) -> usize {
self.offset
}
pub fn root(&self) -> usize {
self.first_node()
}
pub fn next_node(&self) -> usize {
self.first_node() + self.num_internal_nodes() + self.num_leaf_nodes() - self.lengths.len()
+ self.lengths.iter().sum::<usize>()
}
pub fn height(&self) -> usize {
self.num_leaf_nodes().trailing_zeros() as usize
}
pub fn chunk_range(&self) -> Range<usize> {
self.first_node()..self.next_node()
}
/// Returns the number of chunks inside this tree (including subtrees).
///
/// Note: this is distinct from `num_nodes` which returns the number of nodes in the binary
/// tree.
pub fn num_chunks(&self) -> usize {
self.next_node() - self.first_node()
}
pub fn first_leaf_node(&self) -> usize {
self.offset + self.num_internal_nodes()
}
/// Returns the chunk-range for a given leaf node.
///
/// Returns `None` if:
/// - The specified node is internal.
/// - The specified node is padding.
/// - The specified node is OOB of the tree.
pub fn get_leaf_node(&self, i: usize) -> Result<Option<Range<usize>>, Error> {
if i >= self.num_nodes() - self.num_padding_leaves() {
Ok(None)
} else if (i == self.num_internal_nodes()) && (self.num_items == 0) {
// If this is the first leaf node and the overlay contains zero items, return `None` as
// this node must be padding.
Ok(None)
} else {
let i = i - self.num_internal_nodes();
let first_node = self.offset
+ self.num_internal_nodes()
+ self.lengths.iter().take(i).sum::<usize>();
let last_node = first_node + self.lengths[i];
Ok(Some(first_node..last_node))
}
}
pub fn child_chunks(&self, parent: usize) -> (usize, usize) {
let children = children(parent);
if children.1 < self.num_internal_nodes() {
(children.0 + self.offset, children.1 + self.offset)
} else {
let chunks = self.n_leaf_node_chunks(children.1);
(chunks[chunks.len() - 2], chunks[chunks.len() - 1])
}
}
/// (parent, (left_child, right_child))
pub fn internal_parents_and_children(&self) -> Vec<(usize, (usize, usize))> {
let mut chunks = Vec::with_capacity(self.num_nodes());
chunks.append(&mut self.internal_node_chunks());
chunks.append(&mut self.leaf_node_chunks());
(0..self.num_internal_nodes())
.into_iter()
.map(|parent| {
let children = children(parent);
(chunks[parent], (chunks[children.0], chunks[children.1]))
})
.collect()
}
// Returns a `Vec` of chunk indices for each internal node of the tree.
pub fn internal_node_chunks(&self) -> Vec<usize> {
(self.offset..self.offset + self.num_internal_nodes()).collect()
}
// Returns a `Vec` of the first chunk index for each leaf node of the tree.
pub fn leaf_node_chunks(&self) -> Vec<usize> {
self.n_leaf_node_chunks(self.num_leaf_nodes())
}
// Returns a `Vec` of the first chunk index for the first `n` leaf nodes of the tree.
fn n_leaf_node_chunks(&self, n: usize) -> Vec<usize> {
let mut chunks = Vec::with_capacity(n);
let mut chunk = self.offset + self.num_internal_nodes();
for i in 0..n {
chunks.push(chunk);
match self.lengths.get(i) {
Some(len) => {
chunk += len;
}
None => chunk += 1,
}
}
chunks
}
}
#[cfg(test)]
mod test {
use super::*;
fn get_tree_a(n: usize) -> BTreeOverlay {
BTreeOverlay::from_lengths(0, n, 0, vec![1; n]).unwrap()
}
#[test]
fn leaf_node_chunks() {
let tree = get_tree_a(4);
assert_eq!(tree.leaf_node_chunks(), vec![3, 4, 5, 6])
}
#[test]
fn internal_node_chunks() {
let tree = get_tree_a(4);
assert_eq!(tree.internal_node_chunks(), vec![0, 1, 2])
}
#[test]
fn internal_parents_and_children() {
let tree = get_tree_a(4);
assert_eq!(
tree.internal_parents_and_children(),
vec![(0, (1, 2)), (1, (3, 4)), (2, (5, 6))]
)
}
#[test]
fn chunk_range() {
let tree = get_tree_a(4);
assert_eq!(tree.chunk_range(), 0..7);
let tree = get_tree_a(1);
assert_eq!(tree.chunk_range(), 0..1);
let tree = get_tree_a(2);
assert_eq!(tree.chunk_range(), 0..3);
let tree = BTreeOverlay::from_lengths(11, 4, 0, vec![1, 1]).unwrap();
assert_eq!(tree.chunk_range(), 11..14);
}
#[test]
fn get_leaf_node() {
let tree = get_tree_a(4);
assert_eq!(tree.get_leaf_node(3), Ok(Some(3..4)));
assert_eq!(tree.get_leaf_node(4), Ok(Some(4..5)));
assert_eq!(tree.get_leaf_node(5), Ok(Some(5..6)));
assert_eq!(tree.get_leaf_node(6), Ok(Some(6..7)));
}
#[test]
fn root_of_one_node() {
let tree = get_tree_a(1);
assert_eq!(tree.root(), 0);
assert_eq!(tree.num_internal_nodes(), 0);
assert_eq!(tree.num_leaf_nodes(), 1);
}
#[test]
fn child_chunks() {
let tree = get_tree_a(4);
assert_eq!(tree.child_chunks(0), (1, 2))
}
}

View File

@@ -1,69 +0,0 @@
use super::*;
mod vec;
impl CachedTreeHash<u64> for u64 {
fn new_tree_hash_cache(&self, _depth: usize) -> Result<TreeHashCache, Error> {
Ok(TreeHashCache::from_bytes(
merkleize(self.to_le_bytes().to_vec()),
false,
None,
)?)
}
fn num_tree_hash_cache_chunks(&self) -> usize {
1
}
fn tree_hash_cache_overlay(
&self,
chunk_offset: usize,
depth: usize,
) -> Result<BTreeOverlay, Error> {
panic!("Basic should not produce overlay");
// BTreeOverlay::from_lengths(chunk_offset, 1, depth, vec![1])
}
fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> {
let leaf = merkleize(self.to_le_bytes().to_vec());
cache.maybe_update_chunk(cache.chunk_index, &leaf)?;
cache.chunk_index += 1;
// cache.overlay_index += 1;
Ok(())
}
}
impl CachedTreeHash<usize> for usize {
fn new_tree_hash_cache(&self, _depth: usize) -> Result<TreeHashCache, Error> {
Ok(TreeHashCache::from_bytes(
merkleize(self.to_le_bytes().to_vec()),
false,
None,
)?)
}
fn num_tree_hash_cache_chunks(&self) -> usize {
1
}
fn tree_hash_cache_overlay(
&self,
chunk_offset: usize,
depth: usize,
) -> Result<BTreeOverlay, Error> {
panic!("Basic should not produce overlay");
// BTreeOverlay::from_lengths(chunk_offset, 1, depth, vec![1])
}
fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> {
let leaf = merkleize(self.to_le_bytes().to_vec());
cache.maybe_update_chunk(cache.chunk_index, &leaf)?;
cache.chunk_index += 1;
// cache.overlay_index += 1;
Ok(())
}
}

View File

@@ -1,201 +0,0 @@
use super::*;
impl<T> CachedTreeHash<Vec<T>> for Vec<T>
where
T: CachedTreeHash<T> + TreeHash,
{
fn new_tree_hash_cache(&self, depth: usize) -> Result<TreeHashCache, Error> {
let overlay = self.tree_hash_cache_overlay(0, depth)?;
let mut cache = match T::tree_hash_type() {
TreeHashType::Basic => TreeHashCache::from_bytes(
merkleize(get_packed_leaves(self)?),
false,
Some(overlay.clone()),
),
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
let subtrees = self
.iter()
.map(|item| TreeHashCache::new(item, depth + 1))
.collect::<Result<Vec<TreeHashCache>, _>>()?;
TreeHashCache::from_leaves_and_subtrees(self, subtrees, depth)
}
}?;
cache.add_length_nodes(overlay.chunk_range(), self.len())?;
Ok(cache)
}
fn num_tree_hash_cache_chunks(&self) -> usize {
BTreeOverlay::new(self, 0, 0)
.and_then(|o| Ok(o.num_chunks()))
.unwrap_or_else(|_| 1)
+ 2
}
fn tree_hash_cache_overlay(
&self,
chunk_offset: usize,
depth: usize,
) -> Result<BTreeOverlay, Error> {
let lengths = match T::tree_hash_type() {
TreeHashType::Basic => {
// Ceil division.
let num_leaves = (self.len() + T::tree_hash_packing_factor() - 1)
/ T::tree_hash_packing_factor();
// Disallow zero-length as an empty list still has one all-padding node.
vec![1; std::cmp::max(1, num_leaves)]
}
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
let mut lengths = vec![];
for item in self {
lengths.push(item.num_tree_hash_cache_chunks())
}
// Disallow zero-length as an empty list still has one all-padding node.
if lengths.is_empty() {
lengths.push(1);
}
lengths
}
};
BTreeOverlay::from_lengths(chunk_offset, self.len(), depth, lengths)
}
fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> {
// Skip the length-mixed-in root node.
cache.chunk_index += 1;
let old_overlay = cache.get_overlay(cache.overlay_index, cache.chunk_index)?;
let new_overlay = BTreeOverlay::new(self, cache.chunk_index, old_overlay.depth)?;
cache.replace_overlay(cache.overlay_index, cache.chunk_index, new_overlay.clone())?;
cache.overlay_index += 1;
match T::tree_hash_type() {
TreeHashType::Basic => {
let mut buf = vec![0; HASHSIZE];
let item_bytes = HASHSIZE / T::tree_hash_packing_factor();
// Iterate through each of the leaf nodes.
for i in 0..new_overlay.num_leaf_nodes() {
// Iterate through the number of items that may be packing into the leaf node.
for j in 0..T::tree_hash_packing_factor() {
// Create a mut slice that can be filled with either a serialized item or
// padding.
let buf_slice = &mut buf[j * item_bytes..(j + 1) * item_bytes];
// Attempt to get the item for this portion of the chunk. If it exists,
// update `buf` with it's serialized bytes. If it doesn't exist, update
// `buf` with padding.
match self.get(i * T::tree_hash_packing_factor() + j) {
Some(item) => {
buf_slice.copy_from_slice(&item.tree_hash_packed_encoding());
}
None => buf_slice.copy_from_slice(&vec![0; item_bytes]),
}
}
// Update the chunk if the generated `buf` is not the same as the cache.
let chunk = new_overlay.first_leaf_node() + i;
cache.maybe_update_chunk(chunk, &buf)?;
}
}
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
for i in 0..new_overlay.num_leaf_nodes() {
// Adjust `i` so it is a leaf node for each of the overlays.
let old_i = i + old_overlay.num_internal_nodes();
let new_i = i + new_overlay.num_internal_nodes();
match (
old_overlay.get_leaf_node(old_i)?,
new_overlay.get_leaf_node(new_i)?,
) {
// The item existed in the previous list and exists in the current list.
(Some(_old), Some(new)) => {
cache.chunk_index = new.start;
self[i].update_tree_hash_cache(cache)?;
}
// The item did not exist in the previous list but does exist in this list.
//
// Viz., the list has been lengthened.
(None, Some(new)) => {
let (bytes, mut bools, overlays) =
TreeHashCache::new(&self[i], new_overlay.depth + 1)?
.into_components();
// Record the number of overlays, this will be used later in the fn.
let num_overlays = overlays.len();
// Flag the root node of the new tree as dirty.
bools[0] = true;
cache.splice(new.start..new.start + 1, bytes, bools);
cache
.overlays
.splice(cache.overlay_index..cache.overlay_index, overlays);
cache.overlay_index += num_overlays;
}
// The item existed in the previous list but does not exist in this list.
//
// Viz., the list has been shortened.
(Some(old), None) => {
if new_overlay.num_items == 0 {
// In this case, the list has been made empty and we should make
// this node padding.
cache.maybe_update_chunk(new_overlay.root(), &[0; HASHSIZE])?;
} else {
// In this case, there are some items in the new list and we should
// splice out the entire tree of the removed node, replacing it
// with a single padding node.
cache.splice(old, vec![0; HASHSIZE], vec![true]);
}
}
// The item didn't exist in the old list and doesn't exist in the new list,
// nothing to do.
(None, None) => {}
}
}
// Clean out any excess overlays that may or may not be remaining if the list was
// shortened.
cache.remove_proceeding_child_overlays(cache.overlay_index, new_overlay.depth);
}
}
cache.update_internal_nodes(&new_overlay)?;
// Mix in length
cache.mix_in_length(new_overlay.chunk_range(), self.len())?;
// Skip an extra node to clear the length node.
cache.chunk_index = new_overlay.next_node() + 1;
Ok(())
}
}
fn get_packed_leaves<T>(vec: &Vec<T>) -> Result<Vec<u8>, Error>
where
T: CachedTreeHash<T>,
{
let num_packed_bytes = (BYTES_PER_CHUNK / T::tree_hash_packing_factor()) * vec.len();
let num_leaves = num_sanitized_leaves(num_packed_bytes);
let mut packed = Vec::with_capacity(num_leaves * HASHSIZE);
for item in vec {
packed.append(&mut item.tree_hash_packed_encoding());
}
Ok(sanitise_bytes(packed))
}

View File

@@ -1,284 +0,0 @@
use super::*;
use std::cmp::min;
/// New vec is bigger than old vec.
pub fn grow_merkle_cache(
old_bytes: &[u8],
old_flags: &[bool],
from_height: usize,
to_height: usize,
) -> Option<(Vec<u8>, Vec<bool>)> {
// Determine the size of our new tree. It is not just a simple `1 << to_height` as there can be
// an arbitrary number of nodes in `old_bytes` leaves if those leaves are subtrees.
let to_nodes = {
let old_nodes = old_bytes.len() / HASHSIZE;
let additional_nodes = old_nodes - nodes_in_tree_of_height(from_height);
nodes_in_tree_of_height(to_height) + additional_nodes
};
let mut bytes = vec![0; to_nodes * HASHSIZE];
let mut flags = vec![true; to_nodes];
let leaf_level = from_height;
for i in 0..=from_height as usize {
// If we're on the leaf slice, grab the first byte and all the of the bytes after that.
// This is required because we can have an arbitrary number of bytes at the leaf level
// (e.g., the case where there are subtrees as leaves).
//
// If we're not on a leaf level, the number of nodes is fixed and known.
let (old_byte_slice, old_flag_slice) = if i == leaf_level {
(
old_bytes.get(first_byte_at_height(i)..)?,
old_flags.get(first_node_at_height(i)..)?,
)
} else {
(
old_bytes.get(byte_range_at_height(i))?,
old_flags.get(node_range_at_height(i))?,
)
};
let new_i = i + to_height - from_height;
let (new_byte_slice, new_flag_slice) = if i == leaf_level {
(
bytes.get_mut(first_byte_at_height(new_i)..)?,
flags.get_mut(first_node_at_height(new_i)..)?,
)
} else {
(
bytes.get_mut(byte_range_at_height(new_i))?,
flags.get_mut(node_range_at_height(new_i))?,
)
};
new_byte_slice
.get_mut(0..old_byte_slice.len())?
.copy_from_slice(old_byte_slice);
new_flag_slice
.get_mut(0..old_flag_slice.len())?
.copy_from_slice(old_flag_slice);
}
Some((bytes, flags))
}
/// New vec is smaller than old vec.
pub fn shrink_merkle_cache(
from_bytes: &[u8],
from_flags: &[bool],
from_height: usize,
to_height: usize,
to_nodes: usize,
) -> Option<(Vec<u8>, Vec<bool>)> {
let mut bytes = vec![0; to_nodes * HASHSIZE];
let mut flags = vec![true; to_nodes];
for i in 0..=to_height as usize {
let from_i = i + from_height - to_height;
let (from_byte_slice, from_flag_slice) = if from_i == from_height {
(
from_bytes.get(first_byte_at_height(from_i)..)?,
from_flags.get(first_node_at_height(from_i)..)?,
)
} else {
(
from_bytes.get(byte_range_at_height(from_i))?,
from_flags.get(node_range_at_height(from_i))?,
)
};
let (to_byte_slice, to_flag_slice) = if i == to_height {
(
bytes.get_mut(first_byte_at_height(i)..)?,
flags.get_mut(first_node_at_height(i)..)?,
)
} else {
(
bytes.get_mut(byte_range_at_height(i))?,
flags.get_mut(node_range_at_height(i))?,
)
};
let num_bytes = min(from_byte_slice.len(), to_byte_slice.len());
let num_flags = min(from_flag_slice.len(), to_flag_slice.len());
to_byte_slice
.get_mut(0..num_bytes)?
.copy_from_slice(from_byte_slice.get(0..num_bytes)?);
to_flag_slice
.get_mut(0..num_flags)?
.copy_from_slice(from_flag_slice.get(0..num_flags)?);
}
Some((bytes, flags))
}
fn nodes_in_tree_of_height(h: usize) -> usize {
2 * (1 << h) - 1
}
fn byte_range_at_height(h: usize) -> Range<usize> {
let node_range = node_range_at_height(h);
node_range.start * HASHSIZE..node_range.end * HASHSIZE
}
fn node_range_at_height(h: usize) -> Range<usize> {
first_node_at_height(h)..last_node_at_height(h) + 1
}
fn first_byte_at_height(h: usize) -> usize {
first_node_at_height(h) * HASHSIZE
}
fn first_node_at_height(h: usize) -> usize {
(1 << h) - 1
}
fn last_node_at_height(h: usize) -> usize {
(1 << (h + 1)) - 2
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn can_grow_and_shrink_three_levels() {
let small: usize = 1;
let big: usize = 15;
let original_bytes = vec![42; small * HASHSIZE];
let original_flags = vec![false; small];
let (grown_bytes, grown_flags) = grow_merkle_cache(
&original_bytes,
&original_flags,
(small + 1).trailing_zeros() as usize - 1,
(big + 1).trailing_zeros() as usize - 1,
)
.unwrap();
let mut expected_bytes = vec![];
let mut expected_flags = vec![];
// First level
expected_bytes.append(&mut vec![0; 32]);
expected_flags.push(true);
// Second level
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_flags.push(true);
expected_flags.push(true);
// Third level
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_flags.push(true);
expected_flags.push(true);
expected_flags.push(true);
expected_flags.push(true);
// Fourth level
expected_bytes.append(&mut vec![42; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_flags.push(false);
expected_flags.push(true);
expected_flags.push(true);
expected_flags.push(true);
expected_flags.push(true);
expected_flags.push(true);
expected_flags.push(true);
expected_flags.push(true);
assert_eq!(expected_bytes, grown_bytes);
assert_eq!(expected_flags, grown_flags);
let (shrunk_bytes, shrunk_flags) = shrink_merkle_cache(
&grown_bytes,
&grown_flags,
(big + 1).trailing_zeros() as usize - 1,
(small + 1).trailing_zeros() as usize - 1,
small,
)
.unwrap();
assert_eq!(original_bytes, shrunk_bytes);
assert_eq!(original_flags, shrunk_flags);
}
#[test]
fn can_grow_and_shrink_one_level() {
let small: usize = 7;
let big: usize = 15;
let original_bytes = vec![42; small * HASHSIZE];
let original_flags = vec![false; small];
let (grown_bytes, grown_flags) = grow_merkle_cache(
&original_bytes,
&original_flags,
(small + 1).trailing_zeros() as usize - 1,
(big + 1).trailing_zeros() as usize - 1,
)
.unwrap();
let mut expected_bytes = vec![];
let mut expected_flags = vec![];
// First level
expected_bytes.append(&mut vec![0; 32]);
expected_flags.push(true);
// Second level
expected_bytes.append(&mut vec![42; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_flags.push(false);
expected_flags.push(true);
// Third level
expected_bytes.append(&mut vec![42; 32]);
expected_bytes.append(&mut vec![42; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_flags.push(false);
expected_flags.push(false);
expected_flags.push(true);
expected_flags.push(true);
// Fourth level
expected_bytes.append(&mut vec![42; 32]);
expected_bytes.append(&mut vec![42; 32]);
expected_bytes.append(&mut vec![42; 32]);
expected_bytes.append(&mut vec![42; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_bytes.append(&mut vec![0; 32]);
expected_flags.push(false);
expected_flags.push(false);
expected_flags.push(false);
expected_flags.push(false);
expected_flags.push(true);
expected_flags.push(true);
expected_flags.push(true);
expected_flags.push(true);
assert_eq!(expected_bytes, grown_bytes);
assert_eq!(expected_flags, grown_flags);
let (shrunk_bytes, shrunk_flags) = shrink_merkle_cache(
&grown_bytes,
&grown_flags,
(big + 1).trailing_zeros() as usize - 1,
(small + 1).trailing_zeros() as usize - 1,
small,
)
.unwrap();
assert_eq!(original_bytes, shrunk_bytes);
assert_eq!(original_flags, shrunk_flags);
}
}

View File

@@ -1,330 +0,0 @@
use super::*;
#[derive(Debug, PartialEq, Clone)]
pub struct TreeHashCache {
pub cache: Vec<u8>,
pub chunk_modified: Vec<bool>,
pub overlays: Vec<BTreeOverlay>,
pub chunk_index: usize,
pub overlay_index: usize,
}
impl Into<Vec<u8>> for TreeHashCache {
fn into(self) -> Vec<u8> {
self.cache
}
}
impl TreeHashCache {
pub fn new<T>(item: &T, depth: usize) -> Result<Self, Error>
where
T: CachedTreeHash<T>,
{
item.new_tree_hash_cache(depth)
}
pub fn from_leaves_and_subtrees<T>(
item: &T,
leaves_and_subtrees: Vec<Self>,
depth: usize,
) -> Result<Self, Error>
where
T: CachedTreeHash<T>,
{
let overlay = BTreeOverlay::new(item, 0, depth)?;
// Note how many leaves were provided. If is not a power-of-two, we'll need to pad it out
// later.
let num_provided_leaf_nodes = leaves_and_subtrees.len();
// Allocate enough bytes to store the internal nodes and the leaves and subtrees, then fill
// all the to-be-built internal nodes with zeros and append the leaves and subtrees.
let internal_node_bytes = overlay.num_internal_nodes() * BYTES_PER_CHUNK;
let leaves_and_subtrees_bytes = leaves_and_subtrees
.iter()
.fold(0, |acc, t| acc + t.bytes_len());
let mut cache = Vec::with_capacity(leaves_and_subtrees_bytes + internal_node_bytes);
cache.resize(internal_node_bytes, 0);
// Allocate enough bytes to store all the leaves.
let mut leaves = Vec::with_capacity(overlay.num_leaf_nodes() * HASHSIZE);
let mut overlays = Vec::with_capacity(leaves_and_subtrees.len());
if T::tree_hash_type() == TreeHashType::List {
overlays.push(overlay);
}
// Iterate through all of the leaves/subtrees, adding their root as a leaf node and then
// concatenating their merkle trees.
for t in leaves_and_subtrees {
leaves.append(&mut t.root()?.to_vec());
let (mut bytes, _bools, mut t_overlays) = t.into_components();
cache.append(&mut bytes);
overlays.append(&mut t_overlays);
}
// Pad the leaves to an even power-of-two, using zeros.
pad_for_leaf_count(num_provided_leaf_nodes, &mut cache);
// Merkleize the leaves, then split the leaf nodes off them. Then, replace all-zeros
// internal nodes created earlier with the internal nodes generated by `merkleize`.
let mut merkleized = merkleize(leaves);
merkleized.split_off(internal_node_bytes);
cache.splice(0..internal_node_bytes, merkleized);
Ok(Self {
chunk_modified: vec![false; cache.len() / BYTES_PER_CHUNK],
cache,
overlays,
chunk_index: 0,
overlay_index: 0,
})
}
pub fn from_bytes(
bytes: Vec<u8>,
initial_modified_state: bool,
overlay: Option<BTreeOverlay>,
) -> Result<Self, Error> {
if bytes.len() % BYTES_PER_CHUNK > 0 {
return Err(Error::BytesAreNotEvenChunks(bytes.len()));
}
let overlays = match overlay {
Some(overlay) => vec![overlay],
None => vec![],
};
Ok(Self {
chunk_modified: vec![initial_modified_state; bytes.len() / BYTES_PER_CHUNK],
cache: bytes,
overlays,
chunk_index: 0,
overlay_index: 0,
})
}
pub fn get_overlay(
&self,
overlay_index: usize,
chunk_index: usize,
) -> Result<BTreeOverlay, Error> {
let mut overlay = self
.overlays
.get(overlay_index)
.ok_or_else(|| Error::NoOverlayForIndex(overlay_index))?
.clone();
overlay.offset = chunk_index;
Ok(overlay)
}
pub fn reset_modifications(&mut self) {
for chunk_modified in &mut self.chunk_modified {
*chunk_modified = false;
}
}
pub fn replace_overlay(
&mut self,
overlay_index: usize,
chunk_index: usize,
new_overlay: BTreeOverlay,
) -> Result<BTreeOverlay, Error> {
let old_overlay = self.get_overlay(overlay_index, chunk_index)?;
// If the merkle tree required to represent the new list is of a different size to the one
// required for the previous list, then update our cache.
//
// This grows/shrinks the bytes to accomodate the new tree, preserving as much of the tree
// as possible.
if new_overlay.num_leaf_nodes() != old_overlay.num_leaf_nodes() {
// Get slices of the exsiting tree from the cache.
let (old_bytes, old_flags) = self
.slices(old_overlay.chunk_range())
.ok_or_else(|| Error::UnableToObtainSlices)?;
let (new_bytes, new_bools) =
if new_overlay.num_leaf_nodes() > old_overlay.num_leaf_nodes() {
resize::grow_merkle_cache(
old_bytes,
old_flags,
old_overlay.height(),
new_overlay.height(),
)
.ok_or_else(|| Error::UnableToGrowMerkleTree)?
} else {
resize::shrink_merkle_cache(
old_bytes,
old_flags,
old_overlay.height(),
new_overlay.height(),
new_overlay.num_chunks(),
)
.ok_or_else(|| Error::UnableToShrinkMerkleTree)?
};
// Splice the newly created `TreeHashCache` over the existing elements.
self.splice(old_overlay.chunk_range(), new_bytes, new_bools);
}
Ok(std::mem::replace(
&mut self.overlays[overlay_index],
new_overlay,
))
}
pub fn remove_proceeding_child_overlays(&mut self, overlay_index: usize, depth: usize) {
let end = self
.overlays
.iter()
.skip(overlay_index)
.position(|o| o.depth <= depth)
.and_then(|i| Some(i + overlay_index))
.unwrap_or_else(|| self.overlays.len());
self.overlays.splice(overlay_index..end, vec![]);
}
pub fn update_internal_nodes(&mut self, overlay: &BTreeOverlay) -> Result<(), Error> {
for (parent, children) in overlay.internal_parents_and_children().into_iter().rev() {
if self.either_modified(children)? {
self.modify_chunk(parent, &self.hash_children(children)?)?;
}
}
Ok(())
}
fn bytes_len(&self) -> usize {
self.cache.len()
}
pub fn root(&self) -> Result<&[u8], Error> {
self.cache
.get(0..HASHSIZE)
.ok_or_else(|| Error::NoBytesForRoot)
}
pub fn splice(&mut self, chunk_range: Range<usize>, bytes: Vec<u8>, bools: Vec<bool>) {
// Update the `chunk_modified` vec, marking all spliced-in nodes as changed.
self.chunk_modified.splice(chunk_range.clone(), bools);
self.cache
.splice(node_range_to_byte_range(&chunk_range), bytes);
}
pub fn maybe_update_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> {
let start = chunk * BYTES_PER_CHUNK;
let end = start + BYTES_PER_CHUNK;
if !self.chunk_equals(chunk, to)? {
self.cache
.get_mut(start..end)
.ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?
.copy_from_slice(to);
self.chunk_modified[chunk] = true;
}
Ok(())
}
fn slices(&self, chunk_range: Range<usize>) -> Option<(&[u8], &[bool])> {
Some((
self.cache.get(node_range_to_byte_range(&chunk_range))?,
self.chunk_modified.get(chunk_range)?,
))
}
pub fn modify_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> {
let start = chunk * BYTES_PER_CHUNK;
let end = start + BYTES_PER_CHUNK;
self.cache
.get_mut(start..end)
.ok_or_else(|| Error::NoBytesForChunk(chunk))?
.copy_from_slice(to);
self.chunk_modified[chunk] = true;
Ok(())
}
fn get_chunk(&self, chunk: usize) -> Result<&[u8], Error> {
let start = chunk * BYTES_PER_CHUNK;
let end = start + BYTES_PER_CHUNK;
Ok(self
.cache
.get(start..end)
.ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?)
}
fn chunk_equals(&mut self, chunk: usize, other: &[u8]) -> Result<bool, Error> {
Ok(self.get_chunk(chunk)? == other)
}
pub fn changed(&self, chunk: usize) -> Result<bool, Error> {
self.chunk_modified
.get(chunk)
.cloned()
.ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))
}
fn either_modified(&self, children: (usize, usize)) -> Result<bool, Error> {
Ok(self.changed(children.0)? | self.changed(children.1)?)
}
pub fn hash_children(&self, children: (usize, usize)) -> Result<Vec<u8>, Error> {
let mut child_bytes = Vec::with_capacity(BYTES_PER_CHUNK * 2);
child_bytes.append(&mut self.get_chunk(children.0)?.to_vec());
child_bytes.append(&mut self.get_chunk(children.1)?.to_vec());
Ok(hash(&child_bytes))
}
pub fn add_length_nodes(
&mut self,
chunk_range: Range<usize>,
length: usize,
) -> Result<(), Error> {
self.chunk_modified[chunk_range.start] = true;
let byte_range = node_range_to_byte_range(&chunk_range);
// Add the last node.
self.cache
.splice(byte_range.end..byte_range.end, vec![0; HASHSIZE]);
self.chunk_modified
.splice(chunk_range.end..chunk_range.end, vec![false]);
// Add the first node.
self.cache
.splice(byte_range.start..byte_range.start, vec![0; HASHSIZE]);
self.chunk_modified
.splice(chunk_range.start..chunk_range.start, vec![false]);
self.mix_in_length(chunk_range.start + 1..chunk_range.end + 1, length)?;
Ok(())
}
pub fn mix_in_length(&mut self, chunk_range: Range<usize>, length: usize) -> Result<(), Error> {
// Update the length chunk.
self.maybe_update_chunk(chunk_range.end, &int_to_bytes32(length as u64))?;
// Update the mixed-in root if the main root or the length have changed.
let children = (chunk_range.start, chunk_range.end);
if self.either_modified(children)? {
self.modify_chunk(chunk_range.start - 1, &self.hash_children(children)?)?;
}
Ok(())
}
pub fn into_components(self) -> (Vec<u8>, Vec<bool>, Vec<BTreeOverlay>) {
(self.cache, self.chunk_modified, self.overlays)
}
}

View File

@@ -1,5 +1,6 @@
use super::*;
use ethereum_types::H256;
use int_to_bytes::int_to_bytes32;
macro_rules! impl_for_bitsize {
($type: ident, $bit_size: expr) => {

View File

@@ -1,14 +1,10 @@
pub mod cached_tree_hash;
pub mod signed_root;
pub mod standard_tree_hash;
use hashing::hash;
pub mod impls;
pub const BYTES_PER_CHUNK: usize = 32;
pub const HASHSIZE: usize = 32;
pub const MERKLE_HASH_CHUNCK: usize = 2 * BYTES_PER_CHUNK;
pub use cached_tree_hash::{BTreeOverlay, CachedTreeHash, Error, TreeHashCache};
pub use signed_root::SignedRoot;
pub use standard_tree_hash::{merkle_root, TreeHash};
pub const MERKLE_HASH_CHUNK: usize = 2 * BYTES_PER_CHUNK;
#[derive(Debug, PartialEq, Clone)]
pub enum TreeHashType {
@@ -18,6 +14,78 @@ pub enum TreeHashType {
Container,
}
pub trait TreeHash {
fn tree_hash_type() -> TreeHashType;
fn tree_hash_packed_encoding(&self) -> Vec<u8>;
fn tree_hash_packing_factor() -> usize;
fn tree_hash_root(&self) -> Vec<u8>;
}
pub trait SignedRoot: TreeHash {
fn signed_root(&self) -> Vec<u8>;
}
pub fn merkle_root(bytes: &[u8]) -> Vec<u8> {
// TODO: replace this with a more memory efficient method.
efficient_merkleize(&bytes)[0..32].to_vec()
}
pub fn efficient_merkleize(bytes: &[u8]) -> Vec<u8> {
// If the bytes are just one chunk (or less than one chunk) just return them.
if bytes.len() <= HASHSIZE {
let mut o = bytes.to_vec();
o.resize(HASHSIZE, 0);
return o;
}
let leaves = num_sanitized_leaves(bytes.len());
let nodes = num_nodes(leaves);
let internal_nodes = nodes - leaves;
let num_bytes = std::cmp::max(internal_nodes, 1) * HASHSIZE + bytes.len();
let mut o: Vec<u8> = vec![0; internal_nodes * HASHSIZE];
o.append(&mut bytes.to_vec());
assert_eq!(o.len(), num_bytes);
let empty_chunk_hash = hash(&[0; MERKLE_HASH_CHUNK]);
let mut i = nodes * HASHSIZE;
let mut j = internal_nodes * HASHSIZE;
while i >= MERKLE_HASH_CHUNK {
i -= MERKLE_HASH_CHUNK;
j -= HASHSIZE;
let hash = match o.get(i..i + MERKLE_HASH_CHUNK) {
// All bytes are available, hash as ususal.
Some(slice) => hash(slice),
// Unable to get all the bytes.
None => {
match o.get(i..) {
// Able to get some of the bytes, pad them out.
Some(slice) => {
let mut bytes = slice.to_vec();
bytes.resize(MERKLE_HASH_CHUNK, 0);
hash(&bytes)
}
// Unable to get any bytes, use the empty-chunk hash.
None => empty_chunk_hash.clone(),
}
}
};
o[j..j + HASHSIZE].copy_from_slice(&hash);
}
o
}
fn num_sanitized_leaves(num_bytes: usize) -> usize {
let leaves = (num_bytes + HASHSIZE - 1) / HASHSIZE;
leaves.next_power_of_two()

View File

@@ -1,5 +0,0 @@
use crate::TreeHash;
pub trait SignedRoot: TreeHash {
fn signed_root(&self) -> Vec<u8>;
}

View File

@@ -1,75 +0,0 @@
use super::*;
use hashing::hash;
use int_to_bytes::int_to_bytes32;
pub use impls::vec_tree_hash_root;
mod impls;
pub trait TreeHash {
fn tree_hash_type() -> TreeHashType;
fn tree_hash_packed_encoding(&self) -> Vec<u8>;
fn tree_hash_packing_factor() -> usize;
fn tree_hash_root(&self) -> Vec<u8>;
}
pub fn merkle_root(bytes: &[u8]) -> Vec<u8> {
// TODO: replace this with a more memory efficient method.
efficient_merkleize(&bytes)[0..32].to_vec()
}
pub fn efficient_merkleize(bytes: &[u8]) -> Vec<u8> {
// If the bytes are just one chunk (or less than one chunk) just return them.
if bytes.len() <= HASHSIZE {
let mut o = bytes.to_vec();
o.resize(HASHSIZE, 0);
return o;
}
let leaves = num_sanitized_leaves(bytes.len());
let nodes = num_nodes(leaves);
let internal_nodes = nodes - leaves;
let num_bytes = std::cmp::max(internal_nodes, 1) * HASHSIZE + bytes.len();
let mut o: Vec<u8> = vec![0; internal_nodes * HASHSIZE];
o.append(&mut bytes.to_vec());
assert_eq!(o.len(), num_bytes);
let empty_chunk_hash = hash(&[0; MERKLE_HASH_CHUNCK]);
let mut i = nodes * HASHSIZE;
let mut j = internal_nodes * HASHSIZE;
while i >= MERKLE_HASH_CHUNCK {
i -= MERKLE_HASH_CHUNCK;
j -= HASHSIZE;
let hash = match o.get(i..i + MERKLE_HASH_CHUNCK) {
// All bytes are available, hash as ususal.
Some(slice) => hash(slice),
// Unable to get all the bytes.
None => {
match o.get(i..) {
// Able to get some of the bytes, pad them out.
Some(slice) => {
let mut bytes = slice.to_vec();
bytes.resize(MERKLE_HASH_CHUNCK, 0);
hash(&bytes)
}
// Unable to get any bytes, use the empty-chunk hash.
None => empty_chunk_hash.clone(),
}
}
};
o[j..j + HASHSIZE].copy_from_slice(&hash);
}
o
}