From 3e78034de6cc73160eb9400d51860b13db55f94f Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Tue, 26 Aug 2025 16:46:14 +1000 Subject: [PATCH] Add `BEACON_PROCESSOR_WORKERS_ACTIVE_GAUGE_BY_TYPE` metric (#7935) Similar to `BEACON_PROCESSOR_WORKERS_ACTIVE_TOTAL` but this metric also records the work type. This is useful in identifying the task when a worker is stuck due to a deadlock or something else, and usually difficult to debug in production / release mode. --- beacon_node/beacon_processor/src/lib.rs | 15 ++++++++++----- beacon_node/beacon_processor/src/metrics.rs | 9 +++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/beacon_node/beacon_processor/src/lib.rs b/beacon_node/beacon_processor/src/lib.rs index d318fc8c97..ab9ab045f4 100644 --- a/beacon_node/beacon_processor/src/lib.rs +++ b/beacon_node/beacon_processor/src/lib.rs @@ -1403,11 +1403,6 @@ impl BeaconProcessor { } }; - metrics::set_gauge( - &metrics::BEACON_PROCESSOR_WORKERS_ACTIVE_TOTAL, - self.current_workers as i64, - ); - if let Some(modified_queue_id) = modified_queue_id { let queue_len = match modified_queue_id { WorkType::GossipAttestation => attestation_queue.len(), @@ -1520,6 +1515,11 @@ impl BeaconProcessor { &[work.str_id()], ); + metrics::inc_gauge_vec( + &metrics::BEACON_PROCESSOR_WORKERS_ACTIVE_GAUGE_BY_TYPE, + &[work_id], + ); + // Wrap the `idle_tx` in a struct that will fire the idle message whenever it is dropped. // // This helps ensure that the worker is always freed in the case of an early exit or panic. @@ -1688,6 +1688,11 @@ pub struct SendOnDrop { impl Drop for SendOnDrop { fn drop(&mut self) { + metrics::dec_gauge_vec( + &metrics::BEACON_PROCESSOR_WORKERS_ACTIVE_GAUGE_BY_TYPE, + &[self.work_type.clone().into()], + ); + if let Err(e) = self.tx.try_send(self.work_type.clone()) { warn!( msg = "did not free worker, shutdown may be underway", diff --git a/beacon_node/beacon_processor/src/metrics.rs b/beacon_node/beacon_processor/src/metrics.rs index 2c27b78f63..3770473df5 100644 --- a/beacon_node/beacon_processor/src/metrics.rs +++ b/beacon_node/beacon_processor/src/metrics.rs @@ -42,11 +42,12 @@ pub static BEACON_PROCESSOR_WORKERS_SPAWNED_TOTAL: LazyLock> "The number of workers ever spawned by the gossip processing pool.", ) }); -pub static BEACON_PROCESSOR_WORKERS_ACTIVE_TOTAL: LazyLock> = +pub static BEACON_PROCESSOR_WORKERS_ACTIVE_GAUGE_BY_TYPE: LazyLock> = LazyLock::new(|| { - try_create_int_gauge( - "beacon_processor_workers_active_total", - "Count of active workers in the gossip processing pool.", + try_create_int_gauge_vec( + "beacon_processor_workers_active_gauge_by_type", + "Int gauge of the number of active workers per work type", + &["type"], ) }); pub static BEACON_PROCESSOR_IDLE_EVENTS_TOTAL: LazyLock> = LazyLock::new(|| {