Fix xdelta3 output buffer issue (#7174)

* Fix xdelta3 output buffer issue

* Fix buckets

* Update commit hash to `main`

* Tag TODO(hdiff)

* Update cargo lock
This commit is contained in:
Michael Sproul
2025-03-27 13:25:50 +11:00
committed by GitHub
parent 0875326cb6
commit 7d792e615c
4 changed files with 45 additions and 9 deletions

2
Cargo.lock generated
View File

@@ -10760,7 +10760,7 @@ dependencies = [
[[package]]
name = "xdelta3"
version = "0.1.5"
source = "git+http://github.com/sigp/xdelta3-rs?rev=50d63cdf1878e5cf3538e9aae5eed34a22c64e4a#50d63cdf1878e5cf3538e9aae5eed34a22c64e4a"
source = "git+http://github.com/sigp/xdelta3-rs?rev=4db64086bb02e9febb584ba93b9d16bb2ae3825a#4db64086bb02e9febb584ba93b9d16bb2ae3825a"
dependencies = [
"bindgen",
"cc",

View File

@@ -289,7 +289,7 @@ validator_metrics = { path = "validator_client/validator_metrics" }
validator_store = { path = "validator_client/validator_store" }
validator_test_rig = { path = "testing/validator_test_rig" }
warp_utils = { path = "common/warp_utils" }
xdelta3 = { git = "http://github.com/sigp/xdelta3-rs", rev = "50d63cdf1878e5cf3538e9aae5eed34a22c64e4a" }
xdelta3 = { git = "http://github.com/sigp/xdelta3-rs", rev = "4db64086bb02e9febb584ba93b9d16bb2ae3825a" }
zstd = "0.13"
[profile.maxperf]

View File

@@ -21,8 +21,8 @@ static EMPTY_PUBKEY: LazyLock<PublicKeyBytes> = LazyLock::new(PublicKeyBytes::em
pub enum Error {
InvalidHierarchy,
DiffDeletionsNotSupported,
UnableToComputeDiff,
UnableToApplyDiff,
UnableToComputeDiff(xdelta3::Error),
UnableToApplyDiff(xdelta3::Error),
BalancesIncompleteChunk,
Compression(std::io::Error),
InvalidSszState(ssz::DecodeError),
@@ -323,9 +323,15 @@ impl BytesDiff {
}
pub fn compute_xdelta(source_bytes: &[u8], target_bytes: &[u8]) -> Result<Self, Error> {
let bytes = xdelta3::encode(target_bytes, source_bytes)
.ok_or(Error::UnableToComputeDiff)
.unwrap();
// TODO(hdiff): Use a smaller estimate for the output diff buffer size, currently the
// xdelta3 lib will use 2x the size of the source plus the target length, which is 4x the
// size of the hdiff buffer. In practice, diffs are almost always smaller than buffers (by a
// signficiant factor), so this is 4-16x larger than necessary in a temporary allocation.
//
// We should use an estimated size that *should* be enough, and then dynamically increase it
// if we hit an insufficient space error.
let bytes =
xdelta3::encode(target_bytes, source_bytes).map_err(Error::UnableToComputeDiff)?;
Ok(Self { bytes })
}
@@ -334,8 +340,31 @@ impl BytesDiff {
}
pub fn apply_xdelta(&self, source: &[u8], target: &mut Vec<u8>) -> Result<(), Error> {
*target = xdelta3::decode(&self.bytes, source).ok_or(Error::UnableToApplyDiff)?;
Ok(())
// TODO(hdiff): Dynamic buffer allocation. This is a stopgap until we implement a schema
// change to store the output buffer size inside the `BytesDiff`.
let mut output_length = ((source.len() + self.bytes.len()) * 3) / 2;
let mut num_resizes = 0;
loop {
match xdelta3::decode_with_output_len(&self.bytes, source, output_length as u32) {
Ok(result_buffer) => {
*target = result_buffer;
metrics::observe(
&metrics::BEACON_HDIFF_BUFFER_APPLY_RESIZES,
num_resizes as f64,
);
return Ok(());
}
Err(xdelta3::Error::InsufficientOutputLength) => {
// Double the output buffer length and try again.
output_length *= 2;
num_resizes += 1;
}
Err(err) => {
return Err(Error::UnableToApplyDiff(err));
}
}
}
}
/// Byte size of this instance

View File

@@ -202,6 +202,13 @@ pub static BEACON_HDIFF_BUFFER_CLONE_TIMES: LazyLock<Result<Histogram>> = LazyLo
"Time required to clone hierarchical diff buffer bytes",
)
});
pub static BEACON_HDIFF_BUFFER_APPLY_RESIZES: LazyLock<Result<Histogram>> = LazyLock::new(|| {
try_create_histogram_with_buckets(
"store_hdiff_buffer_apply_resizes",
"Number of times during diff application that the output buffer had to be resized before decoding succeeded",
Ok(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0])
)
});
/*
* Beacon Block
*/