mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-03 00:31:50 +00:00
Fix the getBlobs metric and ensure it is recorded promptly to prevent miscounts (#7188)
From testing conducted by Sunnyside Labs, they noticed that the "expected blobs" are quite low on bandwidth constrained nodes. This observation revealed that we don't record the `beacon_blobs_from_el_expected_total` metric at all if the EL doesn't return any response. The fetch blobs function returns without recording the metric. To fix this, I've moved `BLOBS_FROM_EL_EXPECTED_TOTAL` and `BLOBS_FROM_EL_RECEIVED_TOTAL` to as early as possible, to make the metric more accurate.
This commit is contained in:
@@ -13,7 +13,7 @@ use crate::observed_data_sidecars::DoNotObserve;
|
|||||||
use crate::{metrics, AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError};
|
use crate::{metrics, AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError};
|
||||||
use execution_layer::json_structures::BlobAndProofV1;
|
use execution_layer::json_structures::BlobAndProofV1;
|
||||||
use execution_layer::Error as ExecutionLayerError;
|
use execution_layer::Error as ExecutionLayerError;
|
||||||
use metrics::{inc_counter, inc_counter_by, TryExt};
|
use metrics::{inc_counter, TryExt};
|
||||||
use ssz_types::FixedVector;
|
use ssz_types::FixedVector;
|
||||||
use state_processing::per_block_processing::deneb::kzg_commitment_to_versioned_hash;
|
use state_processing::per_block_processing::deneb::kzg_commitment_to_versioned_hash;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -73,13 +73,20 @@ pub async fn fetch_and_process_engine_blobs<T: BeaconChainTypes>(
|
|||||||
.as_ref()
|
.as_ref()
|
||||||
.ok_or(FetchEngineBlobError::ExecutionLayerMissing)?;
|
.ok_or(FetchEngineBlobError::ExecutionLayerMissing)?;
|
||||||
|
|
||||||
|
metrics::observe(&metrics::BLOBS_FROM_EL_EXPECTED, num_expected_blobs as f64);
|
||||||
debug!(num_expected_blobs, "Fetching blobs from the EL");
|
debug!(num_expected_blobs, "Fetching blobs from the EL");
|
||||||
let response = execution_layer
|
let response = execution_layer
|
||||||
.get_blobs(versioned_hashes)
|
.get_blobs(versioned_hashes)
|
||||||
.await
|
.await
|
||||||
|
.inspect_err(|_| {
|
||||||
|
inc_counter(&metrics::BLOBS_FROM_EL_ERROR_TOTAL);
|
||||||
|
})
|
||||||
.map_err(FetchEngineBlobError::RequestFailed)?;
|
.map_err(FetchEngineBlobError::RequestFailed)?;
|
||||||
|
|
||||||
if response.is_empty() || response.iter().all(|opt| opt.is_none()) {
|
let num_fetched_blobs = response.iter().filter(|opt| opt.is_some()).count();
|
||||||
|
metrics::observe(&metrics::BLOBS_FROM_EL_RECEIVED, num_fetched_blobs as f64);
|
||||||
|
|
||||||
|
if num_fetched_blobs == 0 {
|
||||||
debug!(num_expected_blobs, "No blobs fetched from the EL");
|
debug!(num_expected_blobs, "No blobs fetched from the EL");
|
||||||
inc_counter(&metrics::BLOBS_FROM_EL_MISS_TOTAL);
|
inc_counter(&metrics::BLOBS_FROM_EL_MISS_TOTAL);
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
@@ -99,20 +106,6 @@ pub async fn fetch_and_process_engine_blobs<T: BeaconChainTypes>(
|
|||||||
&chain.spec,
|
&chain.spec,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let num_fetched_blobs = fixed_blob_sidecar_list
|
|
||||||
.iter()
|
|
||||||
.filter(|b| b.is_some())
|
|
||||||
.count();
|
|
||||||
|
|
||||||
inc_counter_by(
|
|
||||||
&metrics::BLOBS_FROM_EL_EXPECTED_TOTAL,
|
|
||||||
num_expected_blobs as u64,
|
|
||||||
);
|
|
||||||
inc_counter_by(
|
|
||||||
&metrics::BLOBS_FROM_EL_RECEIVED_TOTAL,
|
|
||||||
num_fetched_blobs as u64,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Gossip verify blobs before publishing. This prevents blobs with invalid KZG proofs from
|
// Gossip verify blobs before publishing. This prevents blobs with invalid KZG proofs from
|
||||||
// the EL making it into the data availability checker. We do not immediately add these
|
// the EL making it into the data availability checker. We do not immediately add these
|
||||||
// blobs to the observed blobs/columns cache because we want to allow blobs/columns to arrive on gossip
|
// blobs to the observed blobs/columns cache because we want to allow blobs/columns to arrive on gossip
|
||||||
|
|||||||
@@ -1662,28 +1662,37 @@ pub static DATA_COLUMN_SIDECAR_GOSSIP_VERIFICATION_TIMES: LazyLock<Result<Histog
|
|||||||
pub static BLOBS_FROM_EL_HIT_TOTAL: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
pub static BLOBS_FROM_EL_HIT_TOTAL: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
||||||
try_create_int_counter(
|
try_create_int_counter(
|
||||||
"beacon_blobs_from_el_hit_total",
|
"beacon_blobs_from_el_hit_total",
|
||||||
"Number of blob batches fetched from the execution layer",
|
"Number of non-empty blob batches fetched from the execution layer",
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
pub static BLOBS_FROM_EL_MISS_TOTAL: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
pub static BLOBS_FROM_EL_MISS_TOTAL: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
||||||
try_create_int_counter(
|
try_create_int_counter(
|
||||||
"beacon_blobs_from_el_miss_total",
|
"beacon_blobs_from_el_miss_total",
|
||||||
"Number of blob batches failed to fetch from the execution layer",
|
"Number of empty blob responses from the execution layer",
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
pub static BLOBS_FROM_EL_EXPECTED_TOTAL: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
pub static BLOBS_FROM_EL_ERROR_TOTAL: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
||||||
try_create_int_counter(
|
try_create_int_counter(
|
||||||
"beacon_blobs_from_el_expected_total",
|
"beacon_blobs_from_el_error_total",
|
||||||
|
"Number of failed blob fetches from the execution layer",
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
pub static BLOBS_FROM_EL_EXPECTED: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||||
|
try_create_histogram_with_buckets(
|
||||||
|
"beacon_blobs_from_el_expected",
|
||||||
"Number of blobs expected from the execution layer",
|
"Number of blobs expected from the execution layer",
|
||||||
|
Ok(vec![0.0, 3.0, 6.0, 9.0, 12.0, 18.0, 24.0, 30.0]),
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
pub static BLOBS_FROM_EL_RECEIVED_TOTAL: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
|
pub static BLOBS_FROM_EL_RECEIVED: LazyLock<Result<Histogram>> = LazyLock::new(|| {
|
||||||
try_create_int_counter(
|
try_create_histogram_with_buckets(
|
||||||
"beacon_blobs_from_el_received_total",
|
"beacon_blobs_from_el_received_total",
|
||||||
"Number of blobs fetched from the execution layer",
|
"Number of blobs fetched from the execution layer",
|
||||||
|
linear_buckets(0.0, 4.0, 20),
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user