Remove "old" item requirement from treehash

This commit is contained in:
Paul Hauner
2019-04-21 12:12:47 +10:00
parent 2ee3b05bd3
commit 4aeadfa60f
7 changed files with 561 additions and 221 deletions

View File

@@ -17,10 +17,13 @@ pub enum Error {
UnableToObtainSlices,
UnableToGrowMerkleTree,
UnableToShrinkMerkleTree,
TreeCannotHaveZeroNodes,
ShouldNeverBePacked(TreeHashType),
BytesAreNotEvenChunks(usize),
NoModifiedFieldForChunk(usize),
NoBytesForChunk(usize),
NoOverlayForIndex(usize),
NotLeafNode(usize),
}
pub trait CachedTreeHash<T>: CachedTreeHashSubTree<T> + Sized {
@@ -36,12 +39,7 @@ pub trait CachedTreeHashSubTree<Item>: TreeHash {
fn new_tree_hash_cache(&self) -> Result<TreeHashCache, Error>;
fn update_tree_hash_cache(
&self,
other: &Item,
cache: &mut TreeHashCache,
chunk: usize,
) -> Result<usize, Error>;
fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error>;
}
fn children(parent: usize) -> (usize, usize) {
@@ -123,6 +121,10 @@ fn num_bytes(num_leaves: usize) -> usize {
pub struct TreeHashCache {
cache: Vec<u8>,
chunk_modified: Vec<bool>,
overlays: Vec<BTreeOverlay>,
pub chunk_index: usize,
pub overlay_index: usize,
}
impl Into<Vec<u8>> for TreeHashCache {
@@ -139,10 +141,17 @@ impl TreeHashCache {
item.new_tree_hash_cache()
}
pub fn from_elems(cache: Vec<u8>, chunk_modified: Vec<bool>) -> Self {
pub fn from_elems(
cache: Vec<u8>,
chunk_modified: Vec<bool>,
overlays: Vec<BTreeOverlay>,
) -> Self {
Self {
cache,
chunk_modified,
overlays,
chunk_index: 0,
overlay_index: 0,
}
}
@@ -153,7 +162,7 @@ impl TreeHashCache {
where
T: CachedTreeHashSubTree<T>,
{
let offset_handler = BTreeOverlay::new(item, 0)?;
let overlay = BTreeOverlay::new(item, 0)?;
// Note how many leaves were provided. If is not a power-of-two, we'll need to pad it out
// later.
@@ -161,7 +170,7 @@ impl TreeHashCache {
// Allocate enough bytes to store the internal nodes and the leaves and subtrees, then fill
// all the to-be-built internal nodes with zeros and append the leaves and subtrees.
let internal_node_bytes = offset_handler.num_internal_nodes * BYTES_PER_CHUNK;
let internal_node_bytes = overlay.num_internal_nodes() * BYTES_PER_CHUNK;
let leaves_and_subtrees_bytes = leaves_and_subtrees
.iter()
.fold(0, |acc, t| acc + t.bytes_len());
@@ -169,13 +178,19 @@ impl TreeHashCache {
cache.resize(internal_node_bytes, 0);
// Allocate enough bytes to store all the leaves.
let mut leaves = Vec::with_capacity(offset_handler.num_leaf_nodes * HASHSIZE);
let mut leaves = Vec::with_capacity(overlay.num_leaf_nodes() * HASHSIZE);
let mut overlays = Vec::with_capacity(leaves_and_subtrees.len());
overlays.push(overlay);
// Iterate through all of the leaves/subtrees, adding their root as a leaf node and then
// concatenating their merkle trees.
for t in leaves_and_subtrees {
leaves.append(&mut t.root().ok_or_else(|| Error::NoBytesForRoot)?.to_vec());
cache.append(&mut t.into_merkle_tree());
let (mut bytes, _bools, mut t_overlays) = t.into_components();
cache.append(&mut bytes);
overlays.append(&mut t_overlays);
}
// Pad the leaves to an even power-of-two, using zeros.
@@ -190,10 +205,17 @@ impl TreeHashCache {
Ok(Self {
chunk_modified: vec![false; cache.len() / BYTES_PER_CHUNK],
cache,
overlays,
chunk_index: 0,
overlay_index: 0,
})
}
pub fn from_bytes(bytes: Vec<u8>, initial_modified_state: bool) -> Result<Self, Error> {
pub fn from_bytes(
bytes: Vec<u8>,
initial_modified_state: bool,
overlay: BTreeOverlay,
) -> Result<Self, Error> {
if bytes.len() % BYTES_PER_CHUNK > 0 {
return Err(Error::BytesAreNotEvenChunks(bytes.len()));
}
@@ -201,9 +223,84 @@ impl TreeHashCache {
Ok(Self {
chunk_modified: vec![initial_modified_state; bytes.len() / BYTES_PER_CHUNK],
cache: bytes,
overlays: vec![overlay],
chunk_index: 0,
overlay_index: 0,
})
}
pub fn get_overlay(
&self,
overlay_index: usize,
chunk_index: usize,
) -> Result<BTreeOverlay, Error> {
let mut overlay = self
.overlays
.get(overlay_index)
.ok_or_else(|| Error::NoOverlayForIndex(overlay_index))?
.clone();
overlay.offset = chunk_index;
Ok(overlay)
}
pub fn replace_overlay(
&mut self,
overlay_index: usize,
new_overlay: BTreeOverlay,
) -> Result<BTreeOverlay, Error> {
let old_overlay = self
.overlays
.get(overlay_index)
.ok_or_else(|| Error::NoOverlayForIndex(overlay_index))?;
// Get slices of the exsiting tree from the cache.
let (old_bytes, old_flags) = self
.slices(old_overlay.chunk_range())
.ok_or_else(|| Error::UnableToObtainSlices)?;
let (new_bytes, new_bools) = if new_overlay.num_leaf_nodes() > old_overlay.num_leaf_nodes()
{
resize::grow_merkle_cache(
old_bytes,
old_flags,
old_overlay.height(),
new_overlay.height(),
)
.ok_or_else(|| Error::UnableToGrowMerkleTree)?
} else {
resize::shrink_merkle_cache(
old_bytes,
old_flags,
old_overlay.height(),
new_overlay.height(),
new_overlay.total_chunks(),
)
.ok_or_else(|| Error::UnableToShrinkMerkleTree)?
};
// Splice the newly created `TreeHashCache` over the existing elements.
self.splice(old_overlay.chunk_range(), new_bytes, new_bools);
Ok(std::mem::replace(
&mut self.overlays[overlay_index],
new_overlay,
))
}
pub fn update_internal_nodes(&mut self, overlay: &BTreeOverlay) -> Result<(), Error> {
for (parent, children) in overlay.internal_parents_and_children().into_iter().rev() {
dbg!(parent);
dbg!(&children);
if self.either_modified(children)? {
self.modify_chunk(parent, &self.hash_children(children)?)?;
}
}
Ok(())
}
pub fn bytes_len(&self) -> usize {
self.cache.len()
}
@@ -212,9 +309,7 @@ impl TreeHashCache {
self.cache.get(0..HASHSIZE)
}
pub fn splice(&mut self, chunk_range: Range<usize>, replace_with: Self) {
let (bytes, bools) = replace_with.into_components();
pub fn splice(&mut self, chunk_range: Range<usize>, bytes: Vec<u8>, bools: Vec<bool>) {
// Update the `chunk_modified` vec, marking all spliced-in nodes as changed.
self.chunk_modified.splice(chunk_range.clone(), bools);
self.cache
@@ -278,14 +373,14 @@ impl TreeHashCache {
.ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))
}
pub fn either_modified(&self, children: (&usize, &usize)) -> Result<bool, Error> {
Ok(self.changed(*children.0)? | self.changed(*children.1)?)
pub fn either_modified(&self, children: (usize, usize)) -> Result<bool, Error> {
Ok(self.changed(children.0)? | self.changed(children.1)?)
}
pub fn hash_children(&self, children: (&usize, &usize)) -> Result<Vec<u8>, Error> {
pub fn hash_children(&self, children: (usize, usize)) -> Result<Vec<u8>, Error> {
let mut child_bytes = Vec::with_capacity(BYTES_PER_CHUNK * 2);
child_bytes.append(&mut self.get_chunk(*children.0)?.to_vec());
child_bytes.append(&mut self.get_chunk(*children.1)?.to_vec());
child_bytes.append(&mut self.get_chunk(children.0)?.to_vec());
child_bytes.append(&mut self.get_chunk(children.1)?.to_vec());
Ok(hash(&child_bytes))
}
@@ -299,11 +394,7 @@ impl TreeHashCache {
Ok(hash(&bytes))
}
pub fn into_merkle_tree(self) -> Vec<u8> {
self.cache
}
pub fn into_components(self) -> (Vec<u8>, Vec<bool>) {
(self.cache, self.chunk_modified)
pub fn into_components(self) -> (Vec<u8>, Vec<bool>, Vec<BTreeOverlay>) {
(self.cache, self.chunk_modified, self.overlays)
}
}

View File

@@ -1,12 +1,9 @@
use super::*;
#[derive(Debug)]
#[derive(Debug, PartialEq, Clone)]
pub struct BTreeOverlay {
pub num_internal_nodes: usize,
pub num_leaf_nodes: usize,
pub first_node: usize,
pub next_node: usize,
offsets: Vec<usize>,
pub offset: usize,
lengths: Vec<usize>,
}
impl BTreeOverlay {
@@ -17,84 +14,87 @@ impl BTreeOverlay {
item.tree_hash_cache_overlay(initial_offset)
}
pub fn from_lengths(offset: usize, mut lengths: Vec<usize>) -> Result<Self, Error> {
// Extend it to the next power-of-two, if it is not already.
let num_leaf_nodes = if lengths.len().is_power_of_two() {
lengths.len()
pub fn from_lengths(offset: usize, lengths: Vec<usize>) -> Result<Self, Error> {
if lengths.is_empty() {
Err(Error::TreeCannotHaveZeroNodes)
} else {
let num_leaf_nodes = lengths.len().next_power_of_two();
lengths.resize(num_leaf_nodes, 1);
num_leaf_nodes
};
let num_nodes = num_nodes(num_leaf_nodes);
let num_internal_nodes = num_nodes - num_leaf_nodes;
let mut offsets = Vec::with_capacity(num_nodes);
offsets.append(&mut (offset..offset + num_internal_nodes).collect());
let mut next_node = num_internal_nodes + offset;
for i in 0..num_leaf_nodes {
offsets.push(next_node);
next_node += lengths[i];
Ok(Self { offset, lengths })
}
}
Ok(Self {
num_internal_nodes,
num_leaf_nodes,
offsets,
first_node: offset,
next_node,
})
pub fn num_leaf_nodes(&self) -> usize {
self.lengths.len().next_power_of_two()
}
fn num_padding_leaves(&self) -> usize {
self.num_leaf_nodes() - self.lengths.len()
}
pub fn num_nodes(&self) -> usize {
2 * self.num_leaf_nodes() - 1
}
pub fn num_internal_nodes(&self) -> usize {
self.num_leaf_nodes() - 1
}
fn first_node(&self) -> usize {
self.offset
}
pub fn root(&self) -> usize {
self.first_node
self.first_node()
}
pub fn next_node(&self) -> usize {
self.first_node() + self.lengths.iter().sum::<usize>()
}
pub fn height(&self) -> usize {
self.num_leaf_nodes.trailing_zeros() as usize
self.num_leaf_nodes().trailing_zeros() as usize
}
pub fn chunk_range(&self) -> Range<usize> {
self.first_node..self.next_node
self.first_node()..self.next_node()
}
pub fn total_chunks(&self) -> usize {
self.next_node - self.first_node
self.next_node() - self.first_node()
}
pub fn total_nodes(&self) -> usize {
self.num_internal_nodes + self.num_leaf_nodes
pub fn first_leaf_node(&self) -> usize {
self.offset + self.num_internal_nodes()
}
pub fn first_leaf_node(&self) -> Result<usize, Error> {
self.offsets
.get(self.num_internal_nodes)
.cloned()
.ok_or_else(|| Error::NoFirstNode)
pub fn get_leaf_node(&self, i: usize) -> Result<Option<Range<usize>>, Error> {
if i >= self.num_leaf_nodes() {
return Err(Error::NotLeafNode(i));
} else if i >= self.num_leaf_nodes() - self.num_padding_leaves() {
Ok(None)
} else {
let first_node = self.offset + self.lengths.iter().take(i).sum::<usize>();
let last_node = first_node + self.lengths[i];
Ok(Some(first_node..last_node))
}
}
/// Returns an iterator visiting each internal node, providing the left and right child chunks
/// for the node.
pub fn iter_internal_nodes<'a>(
&'a self,
) -> impl DoubleEndedIterator<Item = (&'a usize, (&'a usize, &'a usize))> {
let internal_nodes = &self.offsets[0..self.num_internal_nodes];
internal_nodes.iter().enumerate().map(move |(i, parent)| {
let children = children(i);
(
parent,
(&self.offsets[children.0], &self.offsets[children.1]),
)
})
pub fn internal_parents_and_children(&self) -> Vec<(usize, (usize, usize))> {
(0..self.num_internal_nodes())
.into_iter()
.map(|parent| {
let children = children(parent);
(
parent + self.offset,
(children.0 + self.offset, children.1 + self.offset),
)
})
.collect()
}
/// Returns an iterator visiting each leaf node, providing the chunk for that node.
pub fn iter_leaf_nodes<'a>(&'a self) -> impl DoubleEndedIterator<Item = &'a usize> {
let leaf_nodes = &self.offsets[self.num_internal_nodes..];
leaf_nodes.iter()
// Returns a `Vec` of chunk indices for each internal node of the tree.
pub fn internal_node_chunks(&self) -> Vec<usize> {
(self.offset..self.offset + self.num_internal_nodes()).collect()
}
}

View File

@@ -8,6 +8,7 @@ impl CachedTreeHashSubTree<u64> for u64 {
Ok(TreeHashCache::from_bytes(
merkleize(self.to_le_bytes().to_vec()),
false,
self.tree_hash_cache_overlay(0)?,
)?)
}
@@ -15,17 +16,13 @@ impl CachedTreeHashSubTree<u64> for u64 {
BTreeOverlay::from_lengths(chunk_offset, vec![1])
}
fn update_tree_hash_cache(
&self,
other: &Self,
cache: &mut TreeHashCache,
chunk: usize,
) -> Result<usize, Error> {
if self != other {
let leaf = merkleize(self.to_le_bytes().to_vec());
cache.modify_chunk(chunk, &leaf)?;
}
fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> {
let leaf = merkleize(self.to_le_bytes().to_vec());
cache.maybe_update_chunk(cache.chunk_index, &leaf)?;
Ok(chunk + 1)
cache.chunk_index += 1;
cache.overlay_index += 1;
Ok(())
}
}

View File

@@ -5,10 +5,14 @@ where
T: CachedTreeHashSubTree<T> + TreeHash,
{
fn new_tree_hash_cache(&self) -> Result<TreeHashCache, Error> {
match T::tree_hash_type() {
TreeHashType::Basic => {
TreeHashCache::from_bytes(merkleize(get_packed_leaves(self)?), false)
}
let overlay = self.tree_hash_cache_overlay(0)?;
let mut cache = match T::tree_hash_type() {
TreeHashType::Basic => TreeHashCache::from_bytes(
merkleize(get_packed_leaves(self)?),
false,
overlay.clone(),
),
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
let subtrees = self
.iter()
@@ -17,17 +21,29 @@ where
TreeHashCache::from_leaves_and_subtrees(self, subtrees)
}
}
}?;
// Mix in the length of the list.
let root_node = overlay.root();
cache.modify_chunk(root_node, &cache.mix_in_length(root_node, self.len())?)?;
Ok(cache)
}
fn tree_hash_cache_overlay(&self, chunk_offset: usize) -> Result<BTreeOverlay, Error> {
let lengths = match T::tree_hash_type() {
TreeHashType::Basic => vec![1; self.len() / T::tree_hash_packing_factor()],
TreeHashType::Basic => {
// Ceil division.
let num_leaves = (self.len() + T::tree_hash_packing_factor() - 1)
/ T::tree_hash_packing_factor();
vec![1; num_leaves]
}
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
let mut lengths = vec![];
for item in self {
lengths.push(BTreeOverlay::new(item, 0)?.total_nodes())
lengths.push(BTreeOverlay::new(item, 0)?.num_nodes())
}
lengths
@@ -37,120 +53,93 @@ where
BTreeOverlay::from_lengths(chunk_offset, lengths)
}
fn update_tree_hash_cache(
&self,
other: &Vec<T>,
cache: &mut TreeHashCache,
chunk: usize,
) -> Result<usize, Error> {
let offset_handler = BTreeOverlay::new(self, chunk)?;
let old_offset_handler = BTreeOverlay::new(other, chunk)?;
fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> {
let new_overlay = BTreeOverlay::new(self, cache.chunk_index)?;
let old_overlay = cache
.get_overlay(cache.overlay_index, cache.chunk_index)?
.clone();
if offset_handler.num_leaf_nodes != old_offset_handler.num_leaf_nodes {
let old_offset_handler = BTreeOverlay::new(other, chunk)?;
// Get slices of the exsiting tree from the cache.
let (old_bytes, old_flags) = cache
.slices(old_offset_handler.chunk_range())
.ok_or_else(|| Error::UnableToObtainSlices)?;
let (new_bytes, new_flags) =
if offset_handler.num_leaf_nodes > old_offset_handler.num_leaf_nodes {
grow_merkle_cache(
old_bytes,
old_flags,
old_offset_handler.height(),
offset_handler.height(),
)
.ok_or_else(|| Error::UnableToGrowMerkleTree)?
} else {
shrink_merkle_cache(
old_bytes,
old_flags,
old_offset_handler.height(),
offset_handler.height(),
offset_handler.total_chunks(),
)
.ok_or_else(|| Error::UnableToShrinkMerkleTree)?
};
// Create a `TreeHashCache` from the raw elements.
let modified_cache = TreeHashCache::from_elems(new_bytes, new_flags);
// Splice the newly created `TreeHashCache` over the existing elements.
cache.splice(old_offset_handler.chunk_range(), modified_cache);
// If the merkle tree required to represent the new list is of a different size to the one
// required for the previous list, then update our cache.
//
// This grows/shrinks the bytes to accomodate the new tree, preserving as much of the tree
// as possible.
if new_overlay.num_leaf_nodes() != old_overlay.num_leaf_nodes() {
cache.replace_overlay(cache.overlay_index, new_overlay.clone())?;
}
match T::tree_hash_type() {
TreeHashType::Basic => {
let leaves = get_packed_leaves(self)?;
let mut buf = vec![0; HASHSIZE];
let item_bytes = HASHSIZE / T::tree_hash_packing_factor();
for (i, chunk) in offset_handler.iter_leaf_nodes().enumerate() {
if let Some(latest) = leaves.get(i * HASHSIZE..(i + 1) * HASHSIZE) {
cache.maybe_update_chunk(*chunk, latest)?;
// Iterate through each of the leaf nodes.
for i in 0..new_overlay.num_leaf_nodes() {
// Iterate through the number of items that may be packing into the leaf node.
for j in 0..T::tree_hash_packing_factor() {
// Create a mut slice that can either be filled with a serialized item or
// padding.
let buf_slice = &mut buf[j * item_bytes..(j + 1) * item_bytes];
// Attempt to get the item for this portion of the chunk. If it exists,
// update `buf` with it's serialized bytes. If it doesn't exist, update
// `buf` with padding.
match self.get(i * T::tree_hash_packing_factor() + j) {
Some(item) => {
buf_slice.copy_from_slice(&item.tree_hash_packed_encoding());
}
None => buf_slice.copy_from_slice(&vec![0; item_bytes]),
}
}
}
let first_leaf_chunk = offset_handler.first_leaf_node()?;
cache.splice(
first_leaf_chunk..offset_handler.next_node,
TreeHashCache::from_bytes(leaves, true)?,
);
// Update the chunk if the generated `buf` is not the same as the cache.
let chunk = new_overlay.first_leaf_node() + i;
cache.maybe_update_chunk(chunk, &buf)?;
}
}
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
let mut i = offset_handler.num_leaf_nodes;
for &start_chunk in offset_handler.iter_leaf_nodes().rev() {
i -= 1;
match (other.get(i), self.get(i)) {
// The item existed in the previous list and exsits in the current list.
(Some(old), Some(new)) => {
new.update_tree_hash_cache(old, cache, start_chunk)?;
for i in (0..new_overlay.num_leaf_nodes()).rev() {
match (old_overlay.get_leaf_node(i)?, new_overlay.get_leaf_node(i)?) {
// The item existed in the previous list and exists in the current list.
(Some(_old), Some(new)) => {
cache.chunk_index = new.start;
self[i].update_tree_hash_cache(cache)?;
}
// The item existed in the previous list but does not exist in this list.
//
// I.e., the list has been shortened.
// Viz., the list has been shortened.
(Some(old), None) => {
// Splice out the entire tree of the removed node, replacing it with a
// single padding node.
let end_chunk = BTreeOverlay::new(old, start_chunk)?.next_node;
cache.splice(
start_chunk..end_chunk,
TreeHashCache::from_bytes(vec![0; HASHSIZE], true)?,
);
cache.splice(old, vec![0; HASHSIZE], vec![true]);
}
// The item existed in the previous list but does exist in this list.
// The item did not exist in the previous list but does exist in this list.
//
// I.e., the list has been lengthened.
// Viz., the list has been lengthened.
(None, Some(new)) => {
let bytes: Vec<u8> = TreeHashCache::new(new)?.into();
let bytes: Vec<u8> = TreeHashCache::new(&self[i])?.into();
let bools = vec![true; bytes.len() / HASHSIZE];
cache.splice(
start_chunk..start_chunk + 1,
TreeHashCache::from_bytes(bytes, true)?,
);
cache.splice(new.start..new.start + 1, bytes, bools);
}
// The item didn't exist in the old list and doesn't exist in the new list,
// nothing to do.
(None, None) => {}
};
}
}
}
}
for (&parent, children) in offset_handler.iter_internal_nodes().rev() {
if cache.either_modified(children)? {
cache.modify_chunk(parent, &cache.hash_children(children)?)?;
}
}
cache.update_internal_nodes(&new_overlay)?;
// If the root node or the length has changed, mix in the length of the list.
let root_node = offset_handler.root();
if cache.changed(root_node)? | (self.len() != other.len()) {
cache.modify_chunk(root_node, &cache.mix_in_length(root_node, self.len())?)?;
}
// Always update the root node as we don't have a reliable check to know if the list len
// has changed.
let root_node = new_overlay.root();
cache.modify_chunk(root_node, &cache.mix_in_length(root_node, self.len())?)?;
Ok(offset_handler.next_node)
cache.chunk_index = new_overlay.next_node();
Ok(())
}
}