Add metrics to VC (#1954)

## Issue Addressed

NA

## Proposed Changes

- Adds a HTTP server to the VC which provides Prometheus metrics.
- Moves the health metrics into the `lighthouse_metrics` crate so it can be shared between BN/VC.
- Sprinkle some metrics around the VC.
- Update the book to indicate that we now have VC metrics.
- Shifts the "waiting for genesis" logic later in the `ProductionValidatorClient::new_from_cli`
  - This is worth attention during the review.

## Additional Info

- ~~`clippy` has some new lints that are failing. I'll deal with that in another PR.~~
This commit is contained in:
Paul Hauner
2020-11-26 01:10:51 +00:00
parent 50558e61f7
commit 26741944b1
18 changed files with 571 additions and 73 deletions

View File

@@ -1,10 +1,6 @@
//! This crate provides a HTTP server that is solely dedicated to serving the `/metrics` endpoint.
//!
//! For other endpoints, see the `http_api` crate.
#[macro_use]
extern crate lazy_static;
mod metrics;
use beacon_chain::{BeaconChain, BeaconChainTypes};

View File

@@ -1,45 +1,9 @@
use crate::Context;
use beacon_chain::BeaconChainTypes;
use eth2::lighthouse::Health;
use lighthouse_metrics::{Encoder, TextEncoder};
pub use lighthouse_metrics::*;
lazy_static! {
pub static ref PROCESS_NUM_THREADS: Result<IntGauge> = try_create_int_gauge(
"process_num_threads",
"Number of threads used by the current process"
);
pub static ref PROCESS_RES_MEM: Result<IntGauge> = try_create_int_gauge(
"process_resident_memory_bytes",
"Resident memory used by the current process"
);
pub static ref PROCESS_VIRT_MEM: Result<IntGauge> = try_create_int_gauge(
"process_virtual_memory_bytes",
"Virtual memory used by the current process"
);
pub static ref SYSTEM_VIRT_MEM_TOTAL: Result<IntGauge> =
try_create_int_gauge("system_virt_mem_total_bytes", "Total system virtual memory");
pub static ref SYSTEM_VIRT_MEM_AVAILABLE: Result<IntGauge> = try_create_int_gauge(
"system_virt_mem_available_bytes",
"Available system virtual memory"
);
pub static ref SYSTEM_VIRT_MEM_USED: Result<IntGauge> =
try_create_int_gauge("system_virt_mem_used_bytes", "Used system virtual memory");
pub static ref SYSTEM_VIRT_MEM_FREE: Result<IntGauge> =
try_create_int_gauge("system_virt_mem_free_bytes", "Free system virtual memory");
pub static ref SYSTEM_VIRT_MEM_PERCENTAGE: Result<Gauge> = try_create_float_gauge(
"system_virt_mem_percentage",
"Percentage of used virtual memory"
);
pub static ref SYSTEM_LOADAVG_1: Result<Gauge> =
try_create_float_gauge("system_loadavg_1", "Loadavg over 1 minute");
pub static ref SYSTEM_LOADAVG_5: Result<Gauge> =
try_create_float_gauge("system_loadavg_5", "Loadavg over 5 minutes");
pub static ref SYSTEM_LOADAVG_15: Result<Gauge> =
try_create_float_gauge("system_loadavg_15", "Loadavg over 15 minutes");
}
pub fn gather_prometheus_metrics<T: BeaconChainTypes>(
ctx: &Context<T>,
) -> std::result::Result<String, String> {
@@ -75,27 +39,7 @@ pub fn gather_prometheus_metrics<T: BeaconChainTypes>(
eth2_libp2p::scrape_discovery_metrics();
// This will silently fail if we are unable to observe the health. This is desired behaviour
// since we don't support `Health` for all platforms.
if let Ok(health) = Health::observe() {
set_gauge(&PROCESS_NUM_THREADS, health.pid_num_threads as i64);
set_gauge(&PROCESS_RES_MEM, health.pid_mem_resident_set_size as i64);
set_gauge(&PROCESS_VIRT_MEM, health.pid_mem_virtual_memory_size as i64);
set_gauge(&SYSTEM_VIRT_MEM_TOTAL, health.sys_virt_mem_total as i64);
set_gauge(
&SYSTEM_VIRT_MEM_AVAILABLE,
health.sys_virt_mem_available as i64,
);
set_gauge(&SYSTEM_VIRT_MEM_USED, health.sys_virt_mem_used as i64);
set_gauge(&SYSTEM_VIRT_MEM_FREE, health.sys_virt_mem_free as i64);
set_float_gauge(
&SYSTEM_VIRT_MEM_PERCENTAGE,
health.sys_virt_mem_percent as f64,
);
set_float_gauge(&SYSTEM_LOADAVG_1, health.sys_loadavg_1);
set_float_gauge(&SYSTEM_LOADAVG_5, health.sys_loadavg_5);
set_float_gauge(&SYSTEM_LOADAVG_15, health.sys_loadavg_15);
}
warp_utils::metrics::scrape_health_metrics();
encoder
.encode(&lighthouse_metrics::gather(), &mut buffer)