mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-03 00:31:50 +00:00
Monitoring service api (#2251)
## Issue Addressed N/A ## Proposed Changes Adds a client side api for collecting system and process metrics and pushing it to a monitoring service.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
use eth2::lighthouse::Health;
|
||||
use eth2::lighthouse::{ProcessHealth, SystemHealth};
|
||||
use lighthouse_metrics::*;
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
@@ -14,6 +14,10 @@ lazy_static::lazy_static! {
|
||||
"process_virtual_memory_bytes",
|
||||
"Virtual memory used by the current process"
|
||||
);
|
||||
pub static ref PROCESS_SECONDS: Result<IntGauge> = try_create_int_gauge(
|
||||
"process_cpu_seconds_total",
|
||||
"Total cpu time taken by the current process"
|
||||
);
|
||||
pub static ref SYSTEM_VIRT_MEM_TOTAL: Result<IntGauge> =
|
||||
try_create_int_gauge("system_virt_mem_total_bytes", "Total system virtual memory");
|
||||
pub static ref SYSTEM_VIRT_MEM_AVAILABLE: Result<IntGauge> = try_create_int_gauge(
|
||||
@@ -24,6 +28,10 @@ lazy_static::lazy_static! {
|
||||
try_create_int_gauge("system_virt_mem_used_bytes", "Used system virtual memory");
|
||||
pub static ref SYSTEM_VIRT_MEM_FREE: Result<IntGauge> =
|
||||
try_create_int_gauge("system_virt_mem_free_bytes", "Free system virtual memory");
|
||||
pub static ref SYSTEM_VIRT_MEM_CACHED: Result<IntGauge> =
|
||||
try_create_int_gauge("system_virt_mem_cached_bytes", "Used system virtual memory");
|
||||
pub static ref SYSTEM_VIRT_MEM_BUFFERS: Result<IntGauge> =
|
||||
try_create_int_gauge("system_virt_mem_buffer_bytes", "Free system virtual memory");
|
||||
pub static ref SYSTEM_VIRT_MEM_PERCENTAGE: Result<Gauge> = try_create_float_gauge(
|
||||
"system_virt_mem_percentage",
|
||||
"Percentage of used virtual memory"
|
||||
@@ -34,15 +42,62 @@ lazy_static::lazy_static! {
|
||||
try_create_float_gauge("system_loadavg_5", "Loadavg over 5 minutes");
|
||||
pub static ref SYSTEM_LOADAVG_15: Result<Gauge> =
|
||||
try_create_float_gauge("system_loadavg_15", "Loadavg over 15 minutes");
|
||||
|
||||
pub static ref CPU_CORES: Result<IntGauge> =
|
||||
try_create_int_gauge("cpu_cores", "Number of physical cpu cores");
|
||||
pub static ref CPU_THREADS: Result<IntGauge> =
|
||||
try_create_int_gauge("cpu_threads", "Number of logical cpu cores");
|
||||
|
||||
pub static ref CPU_SYSTEM_SECONDS_TOTAL: Result<IntGauge> =
|
||||
try_create_int_gauge("cpu_system_seconds_total", "Total time spent in kernel mode");
|
||||
pub static ref CPU_USER_SECONDS_TOTAL: Result<IntGauge> =
|
||||
try_create_int_gauge("cpu_user_seconds_total", "Total time spent in user mode");
|
||||
pub static ref CPU_IOWAIT_SECONDS_TOTAL: Result<IntGauge> =
|
||||
try_create_int_gauge("cpu_iowait_seconds_total", "Total time spent waiting for io");
|
||||
pub static ref CPU_IDLE_SECONDS_TOTAL: Result<IntGauge> =
|
||||
try_create_int_gauge("cpu_idle_seconds_total", "Total time spent idle");
|
||||
|
||||
pub static ref DISK_BYTES_TOTAL: Result<IntGauge> =
|
||||
try_create_int_gauge("disk_node_bytes_total", "Total capacity of disk");
|
||||
|
||||
pub static ref DISK_BYTES_FREE: Result<IntGauge> =
|
||||
try_create_int_gauge("disk_node_bytes_free", "Free space in disk");
|
||||
|
||||
pub static ref DISK_READS: Result<IntGauge> =
|
||||
try_create_int_gauge("disk_node_reads_total", "Number of disk reads");
|
||||
|
||||
pub static ref DISK_WRITES: Result<IntGauge> =
|
||||
try_create_int_gauge("disk_node_writes_total", "Number of disk writes");
|
||||
|
||||
pub static ref NETWORK_BYTES_RECEIVED: Result<IntGauge> =
|
||||
try_create_int_gauge("network_node_bytes_total_received", "Total bytes received over all network interfaces");
|
||||
pub static ref NETWORK_BYTES_SENT: Result<IntGauge> =
|
||||
try_create_int_gauge("network_node_bytes_total_transmit", "Total bytes sent over all network interfaces");
|
||||
|
||||
pub static ref BOOT_TIME: Result<IntGauge> =
|
||||
try_create_int_gauge("misc_node_boot_ts_seconds", "Boot time as unix epoch timestamp");
|
||||
}
|
||||
|
||||
pub fn scrape_health_metrics() {
|
||||
scrape_process_health_metrics();
|
||||
scrape_system_health_metrics();
|
||||
}
|
||||
|
||||
pub fn scrape_process_health_metrics() {
|
||||
// This will silently fail if we are unable to observe the health. This is desired behaviour
|
||||
// since we don't support `Health` for all platforms.
|
||||
if let Ok(health) = Health::observe() {
|
||||
if let Ok(health) = ProcessHealth::observe() {
|
||||
set_gauge(&PROCESS_NUM_THREADS, health.pid_num_threads as i64);
|
||||
set_gauge(&PROCESS_RES_MEM, health.pid_mem_resident_set_size as i64);
|
||||
set_gauge(&PROCESS_VIRT_MEM, health.pid_mem_virtual_memory_size as i64);
|
||||
set_gauge(&PROCESS_SECONDS, health.pid_process_seconds_total as i64);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn scrape_system_health_metrics() {
|
||||
// This will silently fail if we are unable to observe the health. This is desired behaviour
|
||||
// since we don't support `Health` for all platforms.
|
||||
if let Ok(health) = SystemHealth::observe() {
|
||||
set_gauge(&SYSTEM_VIRT_MEM_TOTAL, health.sys_virt_mem_total as i64);
|
||||
set_gauge(
|
||||
&SYSTEM_VIRT_MEM_AVAILABLE,
|
||||
@@ -57,5 +112,34 @@ pub fn scrape_health_metrics() {
|
||||
set_float_gauge(&SYSTEM_LOADAVG_1, health.sys_loadavg_1);
|
||||
set_float_gauge(&SYSTEM_LOADAVG_5, health.sys_loadavg_5);
|
||||
set_float_gauge(&SYSTEM_LOADAVG_15, health.sys_loadavg_15);
|
||||
|
||||
set_gauge(&CPU_CORES, health.cpu_cores as i64);
|
||||
set_gauge(&CPU_THREADS, health.cpu_threads as i64);
|
||||
|
||||
set_gauge(
|
||||
&CPU_SYSTEM_SECONDS_TOTAL,
|
||||
health.system_seconds_total as i64,
|
||||
);
|
||||
set_gauge(&CPU_USER_SECONDS_TOTAL, health.user_seconds_total as i64);
|
||||
set_gauge(
|
||||
&CPU_IOWAIT_SECONDS_TOTAL,
|
||||
health.iowait_seconds_total as i64,
|
||||
);
|
||||
set_gauge(&CPU_IDLE_SECONDS_TOTAL, health.idle_seconds_total as i64);
|
||||
|
||||
set_gauge(&DISK_BYTES_TOTAL, health.disk_node_bytes_total as i64);
|
||||
|
||||
set_gauge(&DISK_BYTES_FREE, health.disk_node_bytes_free as i64);
|
||||
set_gauge(&DISK_READS, health.disk_node_reads_total as i64);
|
||||
set_gauge(&DISK_WRITES, health.disk_node_writes_total as i64);
|
||||
|
||||
set_gauge(
|
||||
&NETWORK_BYTES_RECEIVED,
|
||||
health.network_node_bytes_total_received as i64,
|
||||
);
|
||||
set_gauge(
|
||||
&NETWORK_BYTES_SENT,
|
||||
health.network_node_bytes_total_transmit as i64,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user