mirror of
https://github.com/sigp/lighthouse.git
synced 2026-04-27 01:33:33 +00:00
Add metrics to VC (#1954)
## Issue Addressed NA ## Proposed Changes - Adds a HTTP server to the VC which provides Prometheus metrics. - Moves the health metrics into the `lighthouse_metrics` crate so it can be shared between BN/VC. - Sprinkle some metrics around the VC. - Update the book to indicate that we now have VC metrics. - Shifts the "waiting for genesis" logic later in the `ProductionValidatorClient::new_from_cli` - This is worth attention during the review. ## Additional Info - ~~`clippy` has some new lints that are failing. I'll deal with that in another PR.~~
This commit is contained in:
146
validator_client/src/http_metrics/metrics.rs
Normal file
146
validator_client/src/http_metrics/metrics.rs
Normal file
@@ -0,0 +1,146 @@
|
||||
use super::Context;
|
||||
use slot_clock::SlotClock;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use types::EthSpec;
|
||||
|
||||
pub const SUCCESS: &str = "success";
|
||||
pub const SLASHABLE: &str = "slashable";
|
||||
pub const SAME_DATA: &str = "same_data";
|
||||
pub const UNREGISTERED: &str = "unregistered";
|
||||
pub const FULL_UPDATE: &str = "full_update";
|
||||
pub const BEACON_BLOCK: &str = "beacon_block";
|
||||
pub const ATTESTATIONS: &str = "attestations";
|
||||
pub const AGGREGATES: &str = "aggregates";
|
||||
pub const CURRENT_EPOCH: &str = "current_epoch";
|
||||
pub const NEXT_EPOCH: &str = "next_epoch";
|
||||
|
||||
pub use lighthouse_metrics::*;
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
pub static ref GENESIS_DISTANCE: Result<IntGauge> = try_create_int_gauge(
|
||||
"vc_genesis_distance_seconds",
|
||||
"Distance between now and genesis time"
|
||||
);
|
||||
pub static ref ENABLED_VALIDATORS_COUNT: Result<IntGauge> = try_create_int_gauge(
|
||||
"vc_validators_enabled_count",
|
||||
"Number of enabled validators"
|
||||
);
|
||||
pub static ref TOTAL_VALIDATORS_COUNT: Result<IntGauge> = try_create_int_gauge(
|
||||
"vc_validators_total_count",
|
||||
"Number of total validators (enabled and disabled)"
|
||||
);
|
||||
|
||||
pub static ref SIGNED_BLOCKS_TOTAL: Result<IntCounterVec> = try_create_int_counter_vec(
|
||||
"vc_signed_beacon_blocks_total",
|
||||
"Total count of attempted block signings",
|
||||
&["status"]
|
||||
);
|
||||
pub static ref SIGNED_ATTESTATIONS_TOTAL: Result<IntCounterVec> = try_create_int_counter_vec(
|
||||
"vc_signed_attestations_total",
|
||||
"Total count of attempted Attestation signings",
|
||||
&["status"]
|
||||
);
|
||||
pub static ref SIGNED_AGGREGATES_TOTAL: Result<IntCounterVec> = try_create_int_counter_vec(
|
||||
"vc_signed_aggregates_total",
|
||||
"Total count of attempted SignedAggregateAndProof signings",
|
||||
&["status"]
|
||||
);
|
||||
pub static ref SIGNED_SELECTION_PROOFS_TOTAL: Result<IntCounterVec> = try_create_int_counter_vec(
|
||||
"vc_signed_selection_proofs_total",
|
||||
"Total count of attempted SelectionProof signings",
|
||||
&["status"]
|
||||
);
|
||||
pub static ref DUTIES_SERVICE_TIMES: Result<HistogramVec> = try_create_histogram_vec(
|
||||
"vc_duties_service_task_times_seconds",
|
||||
"Duration to perform duties service tasks",
|
||||
&["task"]
|
||||
);
|
||||
pub static ref FORK_SERVICE_TIMES: Result<HistogramVec> = try_create_histogram_vec(
|
||||
"vc_fork_service_task_times_seconds",
|
||||
"Duration to perform fork service tasks",
|
||||
&["task"]
|
||||
);
|
||||
pub static ref ATTESTATION_SERVICE_TIMES: Result<HistogramVec> = try_create_histogram_vec(
|
||||
"vc_attestation_service_task_times_seconds",
|
||||
"Duration to perform attestation service tasks",
|
||||
&["task"]
|
||||
);
|
||||
pub static ref BLOCK_SERVICE_TIMES: Result<HistogramVec> = try_create_histogram_vec(
|
||||
"vc_beacon_block_service_task_times_seconds",
|
||||
"Duration to perform beacon block service tasks",
|
||||
&["task"]
|
||||
);
|
||||
pub static ref PROPOSER_COUNT: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"vc_beacon_block_proposer_count",
|
||||
"Number of beacon block proposers on this host",
|
||||
&["task"]
|
||||
);
|
||||
pub static ref ATTESTER_COUNT: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"vc_beacon_attester_count",
|
||||
"Number of attesters on this host",
|
||||
&["task"]
|
||||
);
|
||||
}
|
||||
|
||||
pub fn gather_prometheus_metrics<T: EthSpec>(
|
||||
ctx: &Context<T>,
|
||||
) -> std::result::Result<String, String> {
|
||||
let mut buffer = vec![];
|
||||
let encoder = TextEncoder::new();
|
||||
|
||||
{
|
||||
let shared = ctx.shared.read();
|
||||
|
||||
if let Some(genesis_time) = shared.genesis_time {
|
||||
if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) {
|
||||
let distance = now.as_secs() as i64 - genesis_time as i64;
|
||||
set_gauge(&GENESIS_DISTANCE, distance);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(validator_store) = &shared.validator_store {
|
||||
let initialized_validators_lock = validator_store.initialized_validators();
|
||||
let initialized_validators = initialized_validators_lock.read();
|
||||
|
||||
set_gauge(
|
||||
&ENABLED_VALIDATORS_COUNT,
|
||||
initialized_validators.num_enabled() as i64,
|
||||
);
|
||||
set_gauge(
|
||||
&TOTAL_VALIDATORS_COUNT,
|
||||
initialized_validators.num_total() as i64,
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(duties_service) = &shared.duties_service {
|
||||
if let Some(slot) = duties_service.slot_clock.now() {
|
||||
let current_epoch = slot.epoch(T::slots_per_epoch());
|
||||
let next_epoch = current_epoch + 1;
|
||||
|
||||
set_int_gauge(
|
||||
&PROPOSER_COUNT,
|
||||
&[CURRENT_EPOCH],
|
||||
duties_service.proposer_count(current_epoch) as i64,
|
||||
);
|
||||
set_int_gauge(
|
||||
&ATTESTER_COUNT,
|
||||
&[CURRENT_EPOCH],
|
||||
duties_service.attester_count(current_epoch) as i64,
|
||||
);
|
||||
set_int_gauge(
|
||||
&ATTESTER_COUNT,
|
||||
&[NEXT_EPOCH],
|
||||
duties_service.attester_count(next_epoch) as i64,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
warp_utils::metrics::scrape_health_metrics();
|
||||
|
||||
encoder
|
||||
.encode(&lighthouse_metrics::gather(), &mut buffer)
|
||||
.unwrap();
|
||||
|
||||
String::from_utf8(buffer).map_err(|e| format!("Failed to encode prometheus info: {:?}", e))
|
||||
}
|
||||
149
validator_client/src/http_metrics/mod.rs
Normal file
149
validator_client/src/http_metrics/mod.rs
Normal file
@@ -0,0 +1,149 @@
|
||||
//! This crate provides a HTTP server that is solely dedicated to serving the `/metrics` endpoint.
|
||||
//!
|
||||
//! For other endpoints, see the `http_api` crate.
|
||||
pub mod metrics;
|
||||
|
||||
use crate::{DutiesService, ValidatorStore};
|
||||
use lighthouse_version::version_with_platform;
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use slog::{crit, info, Logger};
|
||||
use slot_clock::SystemTimeSlotClock;
|
||||
use std::future::Future;
|
||||
use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4};
|
||||
use std::sync::Arc;
|
||||
use types::EthSpec;
|
||||
use warp::{http::Response, Filter};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Warp(warp::Error),
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl From<warp::Error> for Error {
|
||||
fn from(e: warp::Error) -> Self {
|
||||
Error::Warp(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Error {
|
||||
fn from(e: String) -> Self {
|
||||
Error::Other(e)
|
||||
}
|
||||
}
|
||||
|
||||
/// Contains objects which have shared access from inside/outside of the metrics server.
|
||||
pub struct Shared<T: EthSpec> {
|
||||
pub validator_store: Option<ValidatorStore<SystemTimeSlotClock, T>>,
|
||||
pub duties_service: Option<DutiesService<SystemTimeSlotClock, T>>,
|
||||
pub genesis_time: Option<u64>,
|
||||
}
|
||||
|
||||
/// A wrapper around all the items required to spawn the HTTP server.
|
||||
///
|
||||
/// The server will gracefully handle the case where any fields are `None`.
|
||||
pub struct Context<T: EthSpec> {
|
||||
pub config: Config,
|
||||
pub shared: RwLock<Shared<T>>,
|
||||
pub log: Logger,
|
||||
}
|
||||
|
||||
/// Configuration for the HTTP server.
|
||||
#[derive(PartialEq, Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Config {
|
||||
pub enabled: bool,
|
||||
pub listen_addr: Ipv4Addr,
|
||||
pub listen_port: u16,
|
||||
pub allow_origin: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: false,
|
||||
listen_addr: Ipv4Addr::new(127, 0, 0, 1),
|
||||
listen_port: 5064,
|
||||
allow_origin: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a server that will serve requests using information from `ctx`.
|
||||
///
|
||||
/// The server will shut down gracefully when the `shutdown` future resolves.
|
||||
///
|
||||
/// ## Returns
|
||||
///
|
||||
/// This function will bind the server to the provided address and then return a tuple of:
|
||||
///
|
||||
/// - `SocketAddr`: the address that the HTTP server will listen on.
|
||||
/// - `Future`: the actual server future that will need to be awaited.
|
||||
///
|
||||
/// ## Errors
|
||||
///
|
||||
/// Returns an error if the server is unable to bind or there is another error during
|
||||
/// configuration.
|
||||
pub fn serve<T: EthSpec>(
|
||||
ctx: Arc<Context<T>>,
|
||||
shutdown: impl Future<Output = ()> + Send + Sync + 'static,
|
||||
) -> Result<(SocketAddr, impl Future<Output = ()>), Error> {
|
||||
let config = &ctx.config;
|
||||
let log = ctx.log.clone();
|
||||
|
||||
// Configure CORS.
|
||||
let cors_builder = {
|
||||
let builder = warp::cors()
|
||||
.allow_method("GET")
|
||||
.allow_headers(vec!["Content-Type"]);
|
||||
|
||||
warp_utils::cors::set_builder_origins(
|
||||
builder,
|
||||
config.allow_origin.as_deref(),
|
||||
(config.listen_addr, config.listen_port),
|
||||
)?
|
||||
};
|
||||
|
||||
// Sanity check.
|
||||
if !config.enabled {
|
||||
crit!(log, "Cannot start disabled metrics HTTP server");
|
||||
return Err(Error::Other(
|
||||
"A disabled metrics server should not be started".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let inner_ctx = ctx.clone();
|
||||
let routes = warp::get()
|
||||
.and(warp::path("metrics"))
|
||||
.map(move || inner_ctx.clone())
|
||||
.and_then(|ctx: Arc<Context<T>>| async move {
|
||||
Ok::<_, warp::Rejection>(
|
||||
metrics::gather_prometheus_metrics(&ctx)
|
||||
.map(|body| Response::builder().status(200).body(body).unwrap())
|
||||
.unwrap_or_else(|e| {
|
||||
Response::builder()
|
||||
.status(500)
|
||||
.body(format!("Unable to gather metrics: {:?}", e))
|
||||
.unwrap()
|
||||
}),
|
||||
)
|
||||
})
|
||||
// Add a `Server` header.
|
||||
.map(|reply| warp::reply::with_header(reply, "Server", &version_with_platform()))
|
||||
.with(cors_builder.build());
|
||||
|
||||
let (listening_socket, server) = warp::serve(routes).try_bind_with_graceful_shutdown(
|
||||
SocketAddrV4::new(config.listen_addr, config.listen_port),
|
||||
async {
|
||||
shutdown.await;
|
||||
},
|
||||
)?;
|
||||
|
||||
info!(
|
||||
log,
|
||||
"Metrics HTTP server started";
|
||||
"listen_address" => listening_socket.to_string(),
|
||||
);
|
||||
|
||||
Ok((listening_socket, server))
|
||||
}
|
||||
Reference in New Issue
Block a user