Use hardware acceleration for SHA256 (#2426)

## Proposed Changes Modify the SHA256 implementation in `eth2_hashing` so that it switches between `ring` and `sha2` to take advantage of [x86_64 SHA extensions](https://en.wikipedia.org/wiki/Intel_SHA_extensions). The extensions are available on modern Intel and AMD CPUs, and seem to provide a considerable speed-up: on my Ryzen 5950X it dropped state tree hashing times by about 30% from 35ms to 25ms (on Prater). ## Additional Info The extensions became available in the `sha2` crate [last year](https://www.reddit.com/r/rust/comments/hf2vcx/ann_rustcryptos_sha1_and_sha2_now_support/), and are not available in Ring, which uses a [pure Rust implementation of sha2](https://github.com/briansmith/ring/blob/main/src/digest/sha2.rs). Ring is faster on CPUs that lack the extensions so I've implemented a runtime switch to use `sha2` only when the extensions are available. The runtime switching seems to impose a miniscule penalty (see the benchmarks linked below).
2026-03-06 10:11:44 +00:00 · 2021-07-12 08:47:01 +00:00
parent a7b7134abb
commit 2c691af95b
10 changed files with 222 additions and 86 deletions
--- a/consensus/swap_or_not_shuffle/src/compute_shuffled_index.rs
+++ b/consensus/swap_or_not_shuffle/src/compute_shuffled_index.rs
@@ -1,5 +1,5 @@
 use crate::Hash256;
-use eth2_hashing::{Context, SHA256};
+use eth2_hashing::{Context, Sha256Context};
 use std::cmp::max;

 /// Return `p(index)` in a pseudorandom permutation `p` of `0...list_size-1` with ``seed`` as entropy.
@@ -54,7 +54,7 @@ fn do_round(seed: &[u8], index: usize, pivot: usize, round: u8, list_size: usize
 }

 fn hash_with_round_and_position(seed: &[u8], round: u8, position: usize) -> Hash256 {
-    let mut context = Context::new(&SHA256);
+    let mut context = Context::new();

    context.update(seed);
    context.update(&[round]);
@@ -64,17 +64,17 @@ fn hash_with_round_and_position(seed: &[u8], round: u8, position: usize) -> Hash
     */
    context.update(&(position / 256).to_le_bytes()[0..4]);

-    let digest = context.finish();
+    let digest = context.finalize();
    Hash256::from_slice(digest.as_ref())
 }

 fn hash_with_round(seed: &[u8], round: u8) -> Hash256 {
-    let mut context = Context::new(&SHA256);
+    let mut context = Context::new();

    context.update(seed);
    context.update(&[round]);

-    let digest = context.finish();
+    let digest = context.finalize();
    Hash256::from_slice(digest.as_ref())
 }

--- a/consensus/swap_or_not_shuffle/src/shuffle_list.rs
+++ b/consensus/swap_or_not_shuffle/src/shuffle_list.rs
@@ -1,5 +1,5 @@
 use crate::Hash256;
-use eth2_hashing::{Context, SHA256};
+use eth2_hashing::hash_fixed;
 use std::mem;

 const SEED_SIZE: usize = 32;
@@ -31,12 +31,10 @@ impl Buf {
    /// Returns the new pivot. It is "raw" because it has not modulo the list size (this must be
    /// done by the caller).
    fn raw_pivot(&self) -> u64 {
-        let mut context = Context::new(&SHA256);
-        context.update(&self.0[0..PIVOT_VIEW_SIZE]);
-        let digest = context.finish();
+        let digest = hash_fixed(&self.0[0..PIVOT_VIEW_SIZE]);

        let mut bytes = [0; mem::size_of::<u64>()];
-        bytes[..].copy_from_slice(&digest.as_ref()[0..mem::size_of::<u64>()]);
+        bytes[..].copy_from_slice(&digest[0..mem::size_of::<u64>()]);
        u64::from_le_bytes(bytes)
    }

@@ -47,10 +45,7 @@ impl Buf {

    /// Hash the entire buffer.
    fn hash(&self) -> Hash256 {
-        let mut context = Context::new(&SHA256);
-        context.update(&self.0[..]);
-        let digest = context.finish();
-        Hash256::from_slice(digest.as_ref())
+        Hash256::from_slice(&hash_fixed(&self.0))
    }
 }

--- a/consensus/tree_hash/src/lib.rs
+++ b/consensus/tree_hash/src/lib.rs
@@ -7,8 +7,7 @@ pub use merkle_hasher::{Error, MerkleHasher};
 pub use merkleize_padded::merkleize_padded;
 pub use merkleize_standard::merkleize_standard;

-use eth2_hashing::{Context, SHA256};
-use eth2_hashing::{ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
+use eth2_hashing::{hash_fixed, ZERO_HASHES, ZERO_HASHES_MAX_INDEX};

 pub const BYTES_PER_CHUNK: usize = 32;
 pub const HASHSIZE: usize = 32;
@@ -39,11 +38,7 @@ pub fn merkle_root(bytes: &[u8], minimum_leaf_count: usize) -> Hash256 {
        let mut leaves = [0; HASHSIZE * 2];
        leaves[0..bytes.len()].copy_from_slice(bytes);

-        let mut context = Context::new(&SHA256);
-        context.update(&leaves);
-        let digest = context.finish();
-
-        Hash256::from_slice(digest.as_ref())
+        Hash256::from_slice(&hash_fixed(&leaves))
    } else {
        // If there are 3 or more leaves, use `MerkleHasher`.
        let mut hasher = MerkleHasher::with_leaves(leaves);
--- a/consensus/tree_hash/src/merkle_hasher.rs
+++ b/consensus/tree_hash/src/merkle_hasher.rs
@@ -1,5 +1,5 @@
 use crate::{get_zero_hash, Hash256, HASHSIZE};
-use eth2_hashing::{Context, Digest, SHA256};
+use eth2_hashing::{Context, Sha256Context, HASH_LEN};
 use smallvec::{smallvec, SmallVec};
 use std::mem;

@@ -15,7 +15,7 @@ pub enum Error {
 ///
 /// Should be used as a left or right value for some node.
 enum Preimage<'a> {
-    Digest(Digest),
+    Digest([u8; HASH_LEN]),
    Slice(&'a [u8]),
 }

@@ -41,7 +41,7 @@ struct HalfNode {
 impl HalfNode {
    /// Create a new half-node from the given `left` value.
    fn new(id: usize, left: Preimage) -> Self {
-        let mut context = Context::new(&SHA256);
+        let mut context = Context::new();
        context.update(left.as_bytes());

        Self { context, id }
@@ -49,9 +49,9 @@ impl HalfNode {

    /// Complete the half-node by providing a `right` value. Returns a digest of the left and right
    /// nodes.
-    fn finish(mut self, right: Preimage) -> Digest {
+    fn finish(mut self, right: Preimage) -> [u8; HASH_LEN] {
        self.context.update(right.as_bytes());
-        self.context.finish()
+        self.context.finalize()
    }
 }

@@ -124,7 +124,7 @@ pub struct MerkleHasher {
    /// Stores the nodes that are half-complete and awaiting a right node.
    ///
    /// A smallvec of size 8 means we can hash a tree with 256 leaves without allocating on the
-    /// heap. Each half-node is 224 bytes, so this smallvec may store 1,792 bytes on the stack.
+    /// heap. Each half-node is 232 bytes, so this smallvec may store 1856 bytes on the stack.
    half_nodes: SmallVec8<HalfNode>,
    /// The depth of the tree that will be produced.
    ///
@@ -368,7 +368,7 @@ mod test {
    fn context_size() {
        assert_eq!(
            mem::size_of::<HalfNode>(),
-            216 + 8,
+            232,
            "Halfnode size should be as expected"
        );
    }
--- a/consensus/tree_hash/src/merkleize_padded.rs
+++ b/consensus/tree_hash/src/merkleize_padded.rs
@@ -1,5 +1,5 @@
 use super::{get_zero_hash, Hash256, BYTES_PER_CHUNK};
-use eth2_hashing::{hash, hash32_concat};
+use eth2_hashing::{hash32_concat, hash_fixed};

 /// Merkleize `bytes` and return the root, optionally padding the tree out to `min_leaves` number of
 /// leaves.
@@ -79,7 +79,7 @@ pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Hash256 {
        // Hash two chunks, creating a parent chunk.
        let hash = match bytes.get(start..start + BYTES_PER_CHUNK * 2) {
            // All bytes are available, hash as usual.
-            Some(slice) => hash(slice),
+            Some(slice) => hash_fixed(slice),
            // Unable to get all the bytes, get a small slice and pad it out.
            None => {
                let mut preimage = bytes
@@ -87,7 +87,7 @@ pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Hash256 {
                    .expect("`i` can only be larger than zero if there are bytes to read")
                    .to_vec();
                preimage.resize(BYTES_PER_CHUNK * 2, 0);
-                hash(&preimage)
+                hash_fixed(&preimage)
            }
        };