Misc. dependency cleanup (#6810)

* remove ensure_dir_exists (2 deps saved)

* group UNHANDLED_ERRORs into a generic (2 deps saved)

* Introduce separate `health_metrics` crate

* separate health_metrics crate

* remove metrics from warp_utils

* move ProcessHealth::observe and SystemHealth::observe to health_metrics

* fix errors

* nitpick `Cargo.toml`s

---------

Co-authored-by: Daniel Knopik <daniel@dknopik.de>
# Conflicts:
#	Cargo.toml
This commit is contained in:
Daniel Knopik
2025-01-16 02:48:50 +01:00
committed by GitHub
parent b1a19a8b20
commit 669932aa67
43 changed files with 303 additions and 315 deletions

View File

@@ -3,7 +3,6 @@
pub mod cors;
pub mod json;
pub mod metrics;
pub mod query;
pub mod reject;
pub mod task;

View File

@@ -1,189 +0,0 @@
use eth2::lighthouse::{ProcessHealth, SystemHealth};
use metrics::*;
use std::sync::LazyLock;
pub static PROCESS_NUM_THREADS: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"process_num_threads",
"Number of threads used by the current process",
)
});
pub static PROCESS_RES_MEM: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"process_resident_memory_bytes",
"Resident memory used by the current process",
)
});
pub static PROCESS_VIRT_MEM: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"process_virtual_memory_bytes",
"Virtual memory used by the current process",
)
});
pub static PROCESS_SHR_MEM: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"process_shared_memory_bytes",
"Shared memory used by the current process",
)
});
pub static PROCESS_SECONDS: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"process_cpu_seconds_total",
"Total cpu time taken by the current process",
)
});
pub static SYSTEM_VIRT_MEM_TOTAL: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge("system_virt_mem_total_bytes", "Total system virtual memory")
});
pub static SYSTEM_VIRT_MEM_AVAILABLE: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"system_virt_mem_available_bytes",
"Available system virtual memory",
)
});
pub static SYSTEM_VIRT_MEM_USED: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge("system_virt_mem_used_bytes", "Used system virtual memory")
});
pub static SYSTEM_VIRT_MEM_FREE: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge("system_virt_mem_free_bytes", "Free system virtual memory")
});
pub static SYSTEM_VIRT_MEM_CACHED: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge("system_virt_mem_cached_bytes", "Used system virtual memory")
});
pub static SYSTEM_VIRT_MEM_BUFFERS: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge("system_virt_mem_buffer_bytes", "Free system virtual memory")
});
pub static SYSTEM_VIRT_MEM_PERCENTAGE: LazyLock<Result<Gauge>> = LazyLock::new(|| {
try_create_float_gauge(
"system_virt_mem_percentage",
"Percentage of used virtual memory",
)
});
pub static SYSTEM_LOADAVG_1: LazyLock<Result<Gauge>> =
LazyLock::new(|| try_create_float_gauge("system_loadavg_1", "Loadavg over 1 minute"));
pub static SYSTEM_LOADAVG_5: LazyLock<Result<Gauge>> =
LazyLock::new(|| try_create_float_gauge("system_loadavg_5", "Loadavg over 5 minutes"));
pub static SYSTEM_LOADAVG_15: LazyLock<Result<Gauge>> =
LazyLock::new(|| try_create_float_gauge("system_loadavg_15", "Loadavg over 15 minutes"));
pub static CPU_CORES: LazyLock<Result<IntGauge>> =
LazyLock::new(|| try_create_int_gauge("cpu_cores", "Number of physical cpu cores"));
pub static CPU_THREADS: LazyLock<Result<IntGauge>> =
LazyLock::new(|| try_create_int_gauge("cpu_threads", "Number of logical cpu cores"));
pub static CPU_SYSTEM_SECONDS_TOTAL: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"cpu_system_seconds_total",
"Total time spent in kernel mode",
)
});
pub static CPU_USER_SECONDS_TOTAL: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge("cpu_user_seconds_total", "Total time spent in user mode")
});
pub static CPU_IOWAIT_SECONDS_TOTAL: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"cpu_iowait_seconds_total",
"Total time spent waiting for io",
)
});
pub static CPU_IDLE_SECONDS_TOTAL: LazyLock<Result<IntGauge>> =
LazyLock::new(|| try_create_int_gauge("cpu_idle_seconds_total", "Total time spent idle"));
pub static DISK_BYTES_TOTAL: LazyLock<Result<IntGauge>> =
LazyLock::new(|| try_create_int_gauge("disk_node_bytes_total", "Total capacity of disk"));
pub static DISK_BYTES_FREE: LazyLock<Result<IntGauge>> =
LazyLock::new(|| try_create_int_gauge("disk_node_bytes_free", "Free space in disk"));
pub static DISK_READS: LazyLock<Result<IntGauge>> =
LazyLock::new(|| try_create_int_gauge("disk_node_reads_total", "Number of disk reads"));
pub static DISK_WRITES: LazyLock<Result<IntGauge>> =
LazyLock::new(|| try_create_int_gauge("disk_node_writes_total", "Number of disk writes"));
pub static NETWORK_BYTES_RECEIVED: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"network_node_bytes_total_received",
"Total bytes received over all network interfaces",
)
});
pub static NETWORK_BYTES_SENT: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"network_node_bytes_total_transmit",
"Total bytes sent over all network interfaces",
)
});
pub static BOOT_TIME: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"misc_node_boot_ts_seconds",
"Boot time as unix epoch timestamp",
)
});
pub fn scrape_health_metrics() {
scrape_process_health_metrics();
scrape_system_health_metrics();
}
pub fn scrape_process_health_metrics() {
// This will silently fail if we are unable to observe the health. This is desired behaviour
// since we don't support `Health` for all platforms.
if let Ok(health) = ProcessHealth::observe() {
set_gauge(&PROCESS_NUM_THREADS, health.pid_num_threads);
set_gauge(&PROCESS_RES_MEM, health.pid_mem_resident_set_size as i64);
set_gauge(&PROCESS_VIRT_MEM, health.pid_mem_virtual_memory_size as i64);
set_gauge(&PROCESS_SHR_MEM, health.pid_mem_shared_memory_size as i64);
set_gauge(&PROCESS_SECONDS, health.pid_process_seconds_total as i64);
}
}
pub fn scrape_system_health_metrics() {
// This will silently fail if we are unable to observe the health. This is desired behaviour
// since we don't support `Health` for all platforms.
if let Ok(health) = SystemHealth::observe() {
set_gauge(&SYSTEM_VIRT_MEM_TOTAL, health.sys_virt_mem_total as i64);
set_gauge(
&SYSTEM_VIRT_MEM_AVAILABLE,
health.sys_virt_mem_available as i64,
);
set_gauge(&SYSTEM_VIRT_MEM_USED, health.sys_virt_mem_used as i64);
set_gauge(&SYSTEM_VIRT_MEM_FREE, health.sys_virt_mem_free as i64);
set_float_gauge(
&SYSTEM_VIRT_MEM_PERCENTAGE,
health.sys_virt_mem_percent as f64,
);
set_float_gauge(&SYSTEM_LOADAVG_1, health.sys_loadavg_1);
set_float_gauge(&SYSTEM_LOADAVG_5, health.sys_loadavg_5);
set_float_gauge(&SYSTEM_LOADAVG_15, health.sys_loadavg_15);
set_gauge(&CPU_CORES, health.cpu_cores as i64);
set_gauge(&CPU_THREADS, health.cpu_threads as i64);
set_gauge(
&CPU_SYSTEM_SECONDS_TOTAL,
health.system_seconds_total as i64,
);
set_gauge(&CPU_USER_SECONDS_TOTAL, health.user_seconds_total as i64);
set_gauge(
&CPU_IOWAIT_SECONDS_TOTAL,
health.iowait_seconds_total as i64,
);
set_gauge(&CPU_IDLE_SECONDS_TOTAL, health.idle_seconds_total as i64);
set_gauge(&DISK_BYTES_TOTAL, health.disk_node_bytes_total as i64);
set_gauge(&DISK_BYTES_FREE, health.disk_node_bytes_free as i64);
set_gauge(&DISK_READS, health.disk_node_reads_total as i64);
set_gauge(&DISK_WRITES, health.disk_node_writes_total as i64);
set_gauge(
&NETWORK_BYTES_RECEIVED,
health.network_node_bytes_total_received as i64,
);
set_gauge(
&NETWORK_BYTES_SENT,
health.network_node_bytes_total_transmit as i64,
);
}
}

View File

@@ -2,6 +2,7 @@ use eth2::types::{ErrorMessage, Failure, IndexedErrorMessage};
use std::convert::Infallible;
use std::error::Error;
use std::fmt;
use std::fmt::Debug;
use warp::{http::StatusCode, reject::Reject, reply::Response, Reply};
#[derive(Debug)]
@@ -19,15 +20,6 @@ pub fn server_sent_event_error(s: String) -> ServerSentEventError {
ServerSentEventError(s)
}
#[derive(Debug)]
pub struct BeaconChainError(pub beacon_chain::BeaconChainError);
impl Reject for BeaconChainError {}
pub fn beacon_chain_error(e: beacon_chain::BeaconChainError) -> warp::reject::Rejection {
warp::reject::custom(BeaconChainError(e))
}
#[derive(Debug)]
pub struct BeaconStateError(pub types::BeaconStateError);
@@ -47,21 +39,12 @@ pub fn arith_error(e: safe_arith::ArithError) -> warp::reject::Rejection {
}
#[derive(Debug)]
pub struct SlotProcessingError(pub state_processing::SlotProcessingError);
pub struct UnhandledError(pub Box<dyn Debug + Send + Sync + 'static>);
impl Reject for SlotProcessingError {}
impl Reject for UnhandledError {}
pub fn slot_processing_error(e: state_processing::SlotProcessingError) -> warp::reject::Rejection {
warp::reject::custom(SlotProcessingError(e))
}
#[derive(Debug)]
pub struct BlockProductionError(pub beacon_chain::BlockProductionError);
impl Reject for BlockProductionError {}
pub fn block_production_error(e: beacon_chain::BlockProductionError) -> warp::reject::Rejection {
warp::reject::custom(BlockProductionError(e))
pub fn unhandled_error<D: Debug + Send + Sync + 'static>(e: D) -> warp::reject::Rejection {
warp::reject::custom(UnhandledError(Box::new(e)))
}
#[derive(Debug)]
@@ -191,16 +174,7 @@ pub async fn handle_rejection(err: warp::Rejection) -> Result<impl warp::Reply,
} else if let Some(e) = err.find::<warp::reject::InvalidQuery>() {
code = StatusCode::BAD_REQUEST;
message = format!("BAD_REQUEST: invalid query: {}", e);
} else if let Some(e) = err.find::<crate::reject::BeaconChainError>() {
code = StatusCode::INTERNAL_SERVER_ERROR;
message = format!("UNHANDLED_ERROR: {:?}", e.0);
} else if let Some(e) = err.find::<crate::reject::BeaconStateError>() {
code = StatusCode::INTERNAL_SERVER_ERROR;
message = format!("UNHANDLED_ERROR: {:?}", e.0);
} else if let Some(e) = err.find::<crate::reject::SlotProcessingError>() {
code = StatusCode::INTERNAL_SERVER_ERROR;
message = format!("UNHANDLED_ERROR: {:?}", e.0);
} else if let Some(e) = err.find::<crate::reject::BlockProductionError>() {
} else if let Some(e) = err.find::<crate::reject::UnhandledError>() {
code = StatusCode::INTERNAL_SERVER_ERROR;
message = format!("UNHANDLED_ERROR: {:?}", e.0);
} else if let Some(e) = err.find::<crate::reject::CustomNotFound>() {