From 7d792e615cfaf8afb9eb342a1b6260dfd513b16e Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Thu, 27 Mar 2025 13:25:50 +1100 Subject: [PATCH] Fix xdelta3 output buffer issue (#7174) * Fix xdelta3 output buffer issue * Fix buckets * Update commit hash to `main` * Tag TODO(hdiff) * Update cargo lock --- Cargo.lock | 2 +- Cargo.toml | 2 +- beacon_node/store/src/hdiff.rs | 43 ++++++++++++++++++++++++++------ beacon_node/store/src/metrics.rs | 7 ++++++ 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ac4248319f..b86ee106e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10760,7 +10760,7 @@ dependencies = [ [[package]] name = "xdelta3" version = "0.1.5" -source = "git+http://github.com/sigp/xdelta3-rs?rev=50d63cdf1878e5cf3538e9aae5eed34a22c64e4a#50d63cdf1878e5cf3538e9aae5eed34a22c64e4a" +source = "git+http://github.com/sigp/xdelta3-rs?rev=4db64086bb02e9febb584ba93b9d16bb2ae3825a#4db64086bb02e9febb584ba93b9d16bb2ae3825a" dependencies = [ "bindgen", "cc", diff --git a/Cargo.toml b/Cargo.toml index 3df158e5a5..49ea6a1108 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -289,7 +289,7 @@ validator_metrics = { path = "validator_client/validator_metrics" } validator_store = { path = "validator_client/validator_store" } validator_test_rig = { path = "testing/validator_test_rig" } warp_utils = { path = "common/warp_utils" } -xdelta3 = { git = "http://github.com/sigp/xdelta3-rs", rev = "50d63cdf1878e5cf3538e9aae5eed34a22c64e4a" } +xdelta3 = { git = "http://github.com/sigp/xdelta3-rs", rev = "4db64086bb02e9febb584ba93b9d16bb2ae3825a" } zstd = "0.13" [profile.maxperf] diff --git a/beacon_node/store/src/hdiff.rs b/beacon_node/store/src/hdiff.rs index a29e680eb5..a659c65452 100644 --- a/beacon_node/store/src/hdiff.rs +++ b/beacon_node/store/src/hdiff.rs @@ -21,8 +21,8 @@ static EMPTY_PUBKEY: LazyLock = LazyLock::new(PublicKeyBytes::em pub enum Error { InvalidHierarchy, DiffDeletionsNotSupported, - UnableToComputeDiff, - UnableToApplyDiff, + UnableToComputeDiff(xdelta3::Error), + UnableToApplyDiff(xdelta3::Error), BalancesIncompleteChunk, Compression(std::io::Error), InvalidSszState(ssz::DecodeError), @@ -323,9 +323,15 @@ impl BytesDiff { } pub fn compute_xdelta(source_bytes: &[u8], target_bytes: &[u8]) -> Result { - let bytes = xdelta3::encode(target_bytes, source_bytes) - .ok_or(Error::UnableToComputeDiff) - .unwrap(); + // TODO(hdiff): Use a smaller estimate for the output diff buffer size, currently the + // xdelta3 lib will use 2x the size of the source plus the target length, which is 4x the + // size of the hdiff buffer. In practice, diffs are almost always smaller than buffers (by a + // signficiant factor), so this is 4-16x larger than necessary in a temporary allocation. + // + // We should use an estimated size that *should* be enough, and then dynamically increase it + // if we hit an insufficient space error. + let bytes = + xdelta3::encode(target_bytes, source_bytes).map_err(Error::UnableToComputeDiff)?; Ok(Self { bytes }) } @@ -334,8 +340,31 @@ impl BytesDiff { } pub fn apply_xdelta(&self, source: &[u8], target: &mut Vec) -> Result<(), Error> { - *target = xdelta3::decode(&self.bytes, source).ok_or(Error::UnableToApplyDiff)?; - Ok(()) + // TODO(hdiff): Dynamic buffer allocation. This is a stopgap until we implement a schema + // change to store the output buffer size inside the `BytesDiff`. + let mut output_length = ((source.len() + self.bytes.len()) * 3) / 2; + let mut num_resizes = 0; + loop { + match xdelta3::decode_with_output_len(&self.bytes, source, output_length as u32) { + Ok(result_buffer) => { + *target = result_buffer; + + metrics::observe( + &metrics::BEACON_HDIFF_BUFFER_APPLY_RESIZES, + num_resizes as f64, + ); + return Ok(()); + } + Err(xdelta3::Error::InsufficientOutputLength) => { + // Double the output buffer length and try again. + output_length *= 2; + num_resizes += 1; + } + Err(err) => { + return Err(Error::UnableToApplyDiff(err)); + } + } + } } /// Byte size of this instance diff --git a/beacon_node/store/src/metrics.rs b/beacon_node/store/src/metrics.rs index 6f9f667917..5da73c3cad 100644 --- a/beacon_node/store/src/metrics.rs +++ b/beacon_node/store/src/metrics.rs @@ -202,6 +202,13 @@ pub static BEACON_HDIFF_BUFFER_CLONE_TIMES: LazyLock> = LazyLo "Time required to clone hierarchical diff buffer bytes", ) }); +pub static BEACON_HDIFF_BUFFER_APPLY_RESIZES: LazyLock> = LazyLock::new(|| { + try_create_histogram_with_buckets( + "store_hdiff_buffer_apply_resizes", + "Number of times during diff application that the output buffer had to be resized before decoding succeeded", + Ok(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) + ) +}); /* * Beacon Block */