Fix the getBlobs metric and ensure it is recorded promptly to prevent miscounts (#7188)

From testing conducted by Sunnyside Labs, they noticed that the "expected blobs" are quite low on bandwidth constrained nodes. This observation revealed that we don't record the `beacon_blobs_from_el_expected_total` metric at all if the EL doesn't return any response. The fetch blobs function returns without recording the metric. To fix this, I've moved `BLOBS_FROM_EL_EXPECTED_TOTAL` and `BLOBS_FROM_EL_RECEIVED_TOTAL` to as early as possible, to make the metric more accurate.
2026-03-19 21:04:41 +00:00 · 2025-04-04 20:01:39 +11:00
parent 57abffcd99
commit 6a75f24ab1
2 changed files with 24 additions and 22 deletions
--- a/beacon_node/beacon_chain/src/fetch_blobs.rs
+++ b/beacon_node/beacon_chain/src/fetch_blobs.rs
@@ -13,7 +13,7 @@ use crate::observed_data_sidecars::DoNotObserve;
 use crate::{metrics, AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError};
 use execution_layer::json_structures::BlobAndProofV1;
 use execution_layer::Error as ExecutionLayerError;
-use metrics::{inc_counter, inc_counter_by, TryExt};
+use metrics::{inc_counter, TryExt};
 use ssz_types::FixedVector;
 use state_processing::per_block_processing::deneb::kzg_commitment_to_versioned_hash;
 use std::sync::Arc;
@@ -73,13 +73,20 @@ pub async fn fetch_and_process_engine_blobs<T: BeaconChainTypes>(
        .as_ref()
        .ok_or(FetchEngineBlobError::ExecutionLayerMissing)?;

+    metrics::observe(&metrics::BLOBS_FROM_EL_EXPECTED, num_expected_blobs as f64);
    debug!(num_expected_blobs, "Fetching blobs from the EL");
    let response = execution_layer
        .get_blobs(versioned_hashes)
        .await
+        .inspect_err(|_| {
+            inc_counter(&metrics::BLOBS_FROM_EL_ERROR_TOTAL);
+        })
        .map_err(FetchEngineBlobError::RequestFailed)?;

-    if response.is_empty() || response.iter().all(|opt| opt.is_none()) {
+    let num_fetched_blobs = response.iter().filter(|opt| opt.is_some()).count();
+    metrics::observe(&metrics::BLOBS_FROM_EL_RECEIVED, num_fetched_blobs as f64);
+
+    if num_fetched_blobs == 0 {
        debug!(num_expected_blobs, "No blobs fetched from the EL");
        inc_counter(&metrics::BLOBS_FROM_EL_MISS_TOTAL);
        return Ok(None);
@@ -99,20 +106,6 @@ pub async fn fetch_and_process_engine_blobs<T: BeaconChainTypes>(
        &chain.spec,
    )?;

-    let num_fetched_blobs = fixed_blob_sidecar_list
-        .iter()
-        .filter(|b| b.is_some())
-        .count();
-
-    inc_counter_by(
-        &metrics::BLOBS_FROM_EL_EXPECTED_TOTAL,
-        num_expected_blobs as u64,
-    );
-    inc_counter_by(
-        &metrics::BLOBS_FROM_EL_RECEIVED_TOTAL,
-        num_fetched_blobs as u64,
-    );
-
    // Gossip verify blobs before publishing. This prevents blobs with invalid KZG proofs from
    // the EL making it into the data availability checker. We do not immediately add these
    // blobs to the observed blobs/columns cache because we want to allow blobs/columns to arrive on gossip