mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-21 22:04:44 +00:00
Use hardware acceleration for SHA256 (#2426)
## Proposed Changes Modify the SHA256 implementation in `eth2_hashing` so that it switches between `ring` and `sha2` to take advantage of [x86_64 SHA extensions](https://en.wikipedia.org/wiki/Intel_SHA_extensions). The extensions are available on modern Intel and AMD CPUs, and seem to provide a considerable speed-up: on my Ryzen 5950X it dropped state tree hashing times by about 30% from 35ms to 25ms (on Prater). ## Additional Info The extensions became available in the `sha2` crate [last year](https://www.reddit.com/r/rust/comments/hf2vcx/ann_rustcryptos_sha1_and_sha2_now_support/), and are not available in Ring, which uses a [pure Rust implementation of sha2](https://github.com/briansmith/ring/blob/main/src/digest/sha2.rs). Ring is faster on CPUs that lack the extensions so I've implemented a runtime switch to use `sha2` only when the extensions are available. The runtime switching seems to impose a miniscule penalty (see the benchmarks linked below).
This commit is contained in:
@@ -1,61 +1,205 @@
|
||||
//! Provides a simple hash function utilizing `ring::digest::SHA256`.
|
||||
//! Optimized SHA256 for use in Ethereum 2.0.
|
||||
//!
|
||||
//! The purpose of this crate is to provide an abstraction to whatever hash function Ethereum
|
||||
//! 2.0 is using. The hash function has been subject to change during the specification process, so
|
||||
//! defining it once in this crate makes it easy to replace.
|
||||
//! The initial purpose of this crate was to provide an abstraction over the hash function used in
|
||||
//! Ethereum 2.0. The hash function changed during the specification process, so defining it once in
|
||||
//! this crate made it easy to replace.
|
||||
//!
|
||||
//! Now this crate serves primarily as a wrapper over two SHA256 crates: `sha2` and `ring` –
|
||||
//! which it switches between at runtime based on the availability of SHA intrinsics.
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
pub use ring::digest::{digest, Context, Digest, SHA256};
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
use sha2::{Digest, Sha256};
|
||||
pub use self::DynamicContext as Context;
|
||||
use sha2::Digest;
|
||||
|
||||
#[cfg(feature = "zero_hash_cache")]
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
/// Returns the digest of `input`.
|
||||
///
|
||||
/// Uses `ring::digest::SHA256`.
|
||||
/// Length of a SHA256 hash in bytes.
|
||||
pub const HASH_LEN: usize = 32;
|
||||
|
||||
/// Returns the digest of `input` using the best available implementation.
|
||||
pub fn hash(input: &[u8]) -> Vec<u8> {
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
let h = digest(&SHA256, input).as_ref().into();
|
||||
DynamicImpl::best().hash(input)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
let h = Sha256::digest(input).as_ref().into();
|
||||
|
||||
h
|
||||
/// Hash function returning a fixed-size array (to save on allocations).
|
||||
///
|
||||
/// Uses the best available implementation based on CPU features.
|
||||
pub fn hash_fixed(input: &[u8]) -> [u8; HASH_LEN] {
|
||||
DynamicImpl::best().hash_fixed(input)
|
||||
}
|
||||
|
||||
/// Compute the hash of two slices concatenated.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Will panic if either `h1` or `h2` are not 32 bytes in length.
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
pub fn hash32_concat(h1: &[u8], h2: &[u8]) -> [u8; 32] {
|
||||
let mut context = Context::new(&SHA256);
|
||||
context.update(h1);
|
||||
context.update(h2);
|
||||
|
||||
let mut output = [0; 32];
|
||||
output[..].copy_from_slice(context.finish().as_ref());
|
||||
output
|
||||
let mut ctxt = DynamicContext::new();
|
||||
ctxt.update(h1);
|
||||
ctxt.update(h2);
|
||||
ctxt.finalize()
|
||||
}
|
||||
|
||||
/// Compute the hash of two slices concatenated.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Will panic if either `h1` or `h2` are not 32 bytes in length.
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
pub fn hash32_concat(h1: &[u8], h2: &[u8]) -> [u8; 32] {
|
||||
let mut preimage = [0; 64];
|
||||
preimage[0..32].copy_from_slice(h1);
|
||||
preimage[32..64].copy_from_slice(h2);
|
||||
/// Context trait for abstracting over implementation contexts.
|
||||
pub trait Sha256Context {
|
||||
fn new() -> Self;
|
||||
|
||||
let mut output = [0; 32];
|
||||
output[..].copy_from_slice(&hash(&preimage));
|
||||
output
|
||||
fn update(&mut self, bytes: &[u8]);
|
||||
|
||||
fn finalize(self) -> [u8; HASH_LEN];
|
||||
}
|
||||
|
||||
/// Top-level trait implemented by both `sha2` and `ring` implementations.
|
||||
pub trait Sha256 {
|
||||
type Context: Sha256Context;
|
||||
|
||||
fn hash(&self, input: &[u8]) -> Vec<u8>;
|
||||
|
||||
fn hash_fixed(&self, input: &[u8]) -> [u8; HASH_LEN];
|
||||
}
|
||||
|
||||
/// Implementation of SHA256 using the `sha2` crate (fastest on CPUs with SHA extensions).
|
||||
struct Sha2CrateImpl;
|
||||
|
||||
impl Sha256Context for sha2::Sha256 {
|
||||
fn new() -> Self {
|
||||
sha2::Digest::new()
|
||||
}
|
||||
|
||||
fn update(&mut self, bytes: &[u8]) {
|
||||
sha2::Digest::update(self, bytes)
|
||||
}
|
||||
|
||||
fn finalize(self) -> [u8; HASH_LEN] {
|
||||
sha2::Digest::finalize(self).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl Sha256 for Sha2CrateImpl {
|
||||
type Context = sha2::Sha256;
|
||||
|
||||
fn hash(&self, input: &[u8]) -> Vec<u8> {
|
||||
Self::Context::digest(input).into_iter().collect()
|
||||
}
|
||||
|
||||
fn hash_fixed(&self, input: &[u8]) -> [u8; HASH_LEN] {
|
||||
Self::Context::digest(input).into()
|
||||
}
|
||||
}
|
||||
|
||||
/// Implementation of SHA256 using the `ring` crate (fastest on CPUs without SHA extensions).
|
||||
pub struct RingImpl;
|
||||
|
||||
impl Sha256Context for ring::digest::Context {
|
||||
fn new() -> Self {
|
||||
Self::new(&ring::digest::SHA256)
|
||||
}
|
||||
|
||||
fn update(&mut self, bytes: &[u8]) {
|
||||
self.update(bytes)
|
||||
}
|
||||
|
||||
fn finalize(self) -> [u8; HASH_LEN] {
|
||||
let mut output = [0; HASH_LEN];
|
||||
output.copy_from_slice(self.finish().as_ref());
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl Sha256 for RingImpl {
|
||||
type Context = ring::digest::Context;
|
||||
|
||||
fn hash(&self, input: &[u8]) -> Vec<u8> {
|
||||
ring::digest::digest(&ring::digest::SHA256, input)
|
||||
.as_ref()
|
||||
.into()
|
||||
}
|
||||
|
||||
fn hash_fixed(&self, input: &[u8]) -> [u8; HASH_LEN] {
|
||||
let mut ctxt = Self::Context::new(&ring::digest::SHA256);
|
||||
ctxt.update(input);
|
||||
ctxt.finalize()
|
||||
}
|
||||
}
|
||||
|
||||
/// Default dynamic implementation that switches between available implementations.
|
||||
pub enum DynamicImpl {
|
||||
Sha2,
|
||||
Ring,
|
||||
}
|
||||
|
||||
// Runtime latch for detecting the availability of SHA extensions on x86_64.
|
||||
//
|
||||
// Inspired by the runtime switch within the `sha2` crate itself.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
cpufeatures::new!(x86_sha_extensions, "sha", "sse2", "ssse3", "sse4.1");
|
||||
|
||||
#[inline(always)]
|
||||
pub fn have_sha_extensions() -> bool {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
return x86_sha_extensions::get();
|
||||
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
return false;
|
||||
}
|
||||
|
||||
impl DynamicImpl {
|
||||
/// Choose the best available implementation based on the currently executing CPU.
|
||||
#[inline(always)]
|
||||
pub fn best() -> Self {
|
||||
if have_sha_extensions() {
|
||||
Self::Sha2
|
||||
} else {
|
||||
Self::Ring
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Sha256 for DynamicImpl {
|
||||
type Context = DynamicContext;
|
||||
|
||||
#[inline(always)]
|
||||
fn hash(&self, input: &[u8]) -> Vec<u8> {
|
||||
match self {
|
||||
Self::Sha2 => Sha2CrateImpl.hash(input),
|
||||
Self::Ring => RingImpl.hash(input),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn hash_fixed(&self, input: &[u8]) -> [u8; HASH_LEN] {
|
||||
match self {
|
||||
Self::Sha2 => Sha2CrateImpl.hash_fixed(input),
|
||||
Self::Ring => RingImpl.hash_fixed(input),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Context encapsulating all implemenation contexts.
|
||||
///
|
||||
/// This enum ends up being 8 bytes larger than the largest inner context.
|
||||
pub enum DynamicContext {
|
||||
Sha2(sha2::Sha256),
|
||||
Ring(ring::digest::Context),
|
||||
}
|
||||
|
||||
impl Sha256Context for DynamicContext {
|
||||
fn new() -> Self {
|
||||
match DynamicImpl::best() {
|
||||
DynamicImpl::Sha2 => Self::Sha2(Sha256Context::new()),
|
||||
DynamicImpl::Ring => Self::Ring(Sha256Context::new()),
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, bytes: &[u8]) {
|
||||
match self {
|
||||
Self::Sha2(ctxt) => Sha256Context::update(ctxt, bytes),
|
||||
Self::Ring(ctxt) => Sha256Context::update(ctxt, bytes),
|
||||
}
|
||||
}
|
||||
|
||||
fn finalize(self) -> [u8; HASH_LEN] {
|
||||
match self {
|
||||
Self::Sha2(ctxt) => Sha256Context::finalize(ctxt),
|
||||
Self::Ring(ctxt) => Sha256Context::finalize(ctxt),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The max index that can be used with `ZERO_HASHES`.
|
||||
|
||||
Reference in New Issue
Block a user