Add BEACON_PROCESSOR_WORKERS_ACTIVE_GAUGE_BY_TYPE metric (#7935)

Similar to `BEACON_PROCESSOR_WORKERS_ACTIVE_TOTAL` but this metric also records the work type.

This is useful in identifying the task when a worker is stuck due to a deadlock or something else, and usually difficult to debug in production / release mode.
This commit is contained in:
Jimmy Chen
2025-08-26 16:46:14 +10:00
committed by GitHub
parent 78b4cca46b
commit 3e78034de6
2 changed files with 15 additions and 9 deletions

View File

@@ -1403,11 +1403,6 @@ impl<E: EthSpec> BeaconProcessor<E> {
}
};
metrics::set_gauge(
&metrics::BEACON_PROCESSOR_WORKERS_ACTIVE_TOTAL,
self.current_workers as i64,
);
if let Some(modified_queue_id) = modified_queue_id {
let queue_len = match modified_queue_id {
WorkType::GossipAttestation => attestation_queue.len(),
@@ -1520,6 +1515,11 @@ impl<E: EthSpec> BeaconProcessor<E> {
&[work.str_id()],
);
metrics::inc_gauge_vec(
&metrics::BEACON_PROCESSOR_WORKERS_ACTIVE_GAUGE_BY_TYPE,
&[work_id],
);
// Wrap the `idle_tx` in a struct that will fire the idle message whenever it is dropped.
//
// This helps ensure that the worker is always freed in the case of an early exit or panic.
@@ -1688,6 +1688,11 @@ pub struct SendOnDrop {
impl Drop for SendOnDrop {
fn drop(&mut self) {
metrics::dec_gauge_vec(
&metrics::BEACON_PROCESSOR_WORKERS_ACTIVE_GAUGE_BY_TYPE,
&[self.work_type.clone().into()],
);
if let Err(e) = self.tx.try_send(self.work_type.clone()) {
warn!(
msg = "did not free worker, shutdown may be underway",

View File

@@ -42,11 +42,12 @@ pub static BEACON_PROCESSOR_WORKERS_SPAWNED_TOTAL: LazyLock<Result<IntCounter>>
"The number of workers ever spawned by the gossip processing pool.",
)
});
pub static BEACON_PROCESSOR_WORKERS_ACTIVE_TOTAL: LazyLock<Result<IntGauge>> =
pub static BEACON_PROCESSOR_WORKERS_ACTIVE_GAUGE_BY_TYPE: LazyLock<Result<IntGaugeVec>> =
LazyLock::new(|| {
try_create_int_gauge(
"beacon_processor_workers_active_total",
"Count of active workers in the gossip processing pool.",
try_create_int_gauge_vec(
"beacon_processor_workers_active_gauge_by_type",
"Int gauge of the number of active workers per work type",
&["type"],
)
});
pub static BEACON_PROCESSOR_IDLE_EVENTS_TOTAL: LazyLock<Result<IntCounter>> = LazyLock::new(|| {