mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-02 16:21:42 +00:00
Monitoring service api (#2251)
## Issue Addressed N/A ## Proposed Changes Adds a client side api for collecting system and process metrics and pushing it to a monitoring service.
This commit is contained in:
@@ -64,4 +64,5 @@ scrypt = { version = "0.5.0", default-features = false }
|
||||
lighthouse_metrics = { path = "../common/lighthouse_metrics" }
|
||||
lazy_static = "1.4.0"
|
||||
fallback = { path = "../common/fallback" }
|
||||
monitoring_api = { path = "../common/monitoring_api" }
|
||||
sensitive_url = { path = "../common/sensitive_url" }
|
||||
|
||||
@@ -302,7 +302,7 @@ impl<T: SlotClock, E: EthSpec> BeaconNodeFallback<T, E> {
|
||||
}
|
||||
|
||||
/// The count of candidates, regardless of their state.
|
||||
pub async fn num_total(&self) -> usize {
|
||||
pub fn num_total(&self) -> usize {
|
||||
self.candidates.len()
|
||||
}
|
||||
|
||||
@@ -317,6 +317,17 @@ impl<T: SlotClock, E: EthSpec> BeaconNodeFallback<T, E> {
|
||||
n
|
||||
}
|
||||
|
||||
/// The count of synced and ready fallbacks excluding the primary beacon node candidate.
|
||||
pub async fn num_synced_fallback(&self) -> usize {
|
||||
let mut n = 0;
|
||||
for candidate in self.candidates.iter().skip(1) {
|
||||
if candidate.status(RequireSynced::Yes).await.is_ok() {
|
||||
n += 1
|
||||
}
|
||||
}
|
||||
n
|
||||
}
|
||||
|
||||
/// The count of candidates that are online and compatible, but not necessarily synced.
|
||||
pub async fn num_available(&self) -> usize {
|
||||
let mut n = 0;
|
||||
|
||||
@@ -181,4 +181,19 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
|
||||
address of this server (e.g., http://localhost:5064).")
|
||||
.takes_value(true),
|
||||
)
|
||||
/*
|
||||
* Explorer metrics
|
||||
*/
|
||||
.arg(
|
||||
Arg::with_name("monitoring-endpoint")
|
||||
.long("monitoring-endpoint")
|
||||
.value_name("ADDRESS")
|
||||
.help("Enables the monitoring service for sending system metrics to a remote endpoint. \
|
||||
This can be used to monitor your setup on certain services (e.g. beaconcha.in). \
|
||||
This flag sets the endpoint where the beacon node metrics will be sent. \
|
||||
Note: This will send information to a remote sever which may identify and associate your \
|
||||
validators, IP address and other personal information. Always use a HTTPS connection \
|
||||
and never provide an untrusted URL.")
|
||||
.takes_value(true),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -43,6 +43,8 @@ pub struct Config {
|
||||
pub http_api: http_api::Config,
|
||||
/// Configuration for the HTTP REST API.
|
||||
pub http_metrics: http_metrics::Config,
|
||||
/// Configuration for sending metrics to a remote explorer endpoint.
|
||||
pub monitoring_api: Option<monitoring_api::Config>,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@@ -70,6 +72,7 @@ impl Default for Config {
|
||||
graffiti_file: None,
|
||||
http_api: <_>::default(),
|
||||
http_metrics: <_>::default(),
|
||||
monitoring_api: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -233,6 +236,16 @@ impl Config {
|
||||
|
||||
config.http_metrics.allow_origin = Some(allow_origin.to_string());
|
||||
}
|
||||
/*
|
||||
* Explorer metrics
|
||||
*/
|
||||
if let Some(monitoring_endpoint) = cli_args.value_of("monitoring-endpoint") {
|
||||
config.monitoring_api = Some(monitoring_api::Config {
|
||||
db_path: None,
|
||||
freezer_db_path: None,
|
||||
monitoring_endpoint: monitoring_endpoint.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
@@ -108,6 +108,16 @@ lazy_static::lazy_static! {
|
||||
"The number of beacon node requests for each endpoint",
|
||||
&["endpoint"]
|
||||
);
|
||||
|
||||
pub static ref ETH2_FALLBACK_CONFIGURED: Result<IntGauge> = try_create_int_gauge(
|
||||
"sync_eth2_fallback_configured",
|
||||
"The number of configured eth2 fallbacks",
|
||||
);
|
||||
|
||||
pub static ref ETH2_FALLBACK_CONNECTED: Result<IntGauge> = try_create_int_gauge(
|
||||
"sync_eth2_fallback_connected",
|
||||
"Set to 1 if connected to atleast one synced eth2 fallback node, otherwise set to 0",
|
||||
);
|
||||
}
|
||||
|
||||
pub fn gather_prometheus_metrics<T: EthSpec>(
|
||||
@@ -126,20 +136,6 @@ pub fn gather_prometheus_metrics<T: EthSpec>(
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(validator_store) = &shared.validator_store {
|
||||
let initialized_validators_lock = validator_store.initialized_validators();
|
||||
let initialized_validators = initialized_validators_lock.read();
|
||||
|
||||
set_gauge(
|
||||
&ENABLED_VALIDATORS_COUNT,
|
||||
initialized_validators.num_enabled() as i64,
|
||||
);
|
||||
set_gauge(
|
||||
&TOTAL_VALIDATORS_COUNT,
|
||||
initialized_validators.num_total() as i64,
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(duties_service) = &shared.duties_service {
|
||||
if let Some(slot) = duties_service.slot_clock.now() {
|
||||
let current_epoch = slot.epoch(T::slots_per_epoch());
|
||||
|
||||
@@ -14,6 +14,7 @@ use account_utils::{
|
||||
ZeroizeString,
|
||||
};
|
||||
use eth2_keystore::Keystore;
|
||||
use lighthouse_metrics::set_gauge;
|
||||
use lockfile::{Lockfile, LockfileError};
|
||||
use slog::{debug, error, info, warn, Logger};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
@@ -609,6 +610,16 @@ impl InitializedValidators {
|
||||
} else {
|
||||
debug!(log, "Key cache not modified");
|
||||
}
|
||||
|
||||
// Update the enabled and total validator counts
|
||||
set_gauge(
|
||||
&crate::http_metrics::metrics::ENABLED_VALIDATORS_COUNT,
|
||||
self.num_enabled() as i64,
|
||||
);
|
||||
set_gauge(
|
||||
&crate::http_metrics::metrics::TOTAL_VALIDATORS_COUNT,
|
||||
self.num_total() as i64,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,8 @@ pub mod http_api;
|
||||
|
||||
pub use cli::cli_app;
|
||||
pub use config::Config;
|
||||
use lighthouse_metrics::set_gauge;
|
||||
use monitoring_api::{MonitoringHttpClient, ProcessType};
|
||||
|
||||
use crate::beacon_node_fallback::{
|
||||
start_fallback_updater_service, BeaconNodeFallback, CandidateBeaconNode, RequireSynced,
|
||||
@@ -125,6 +127,17 @@ impl<T: EthSpec> ProductionValidatorClient<T> {
|
||||
None
|
||||
};
|
||||
|
||||
// Start the explorer client which periodically sends validator process
|
||||
// and system metrics to the configured endpoint.
|
||||
if let Some(monitoring_config) = &config.monitoring_api {
|
||||
let monitoring_client =
|
||||
MonitoringHttpClient::new(monitoring_config, context.log().clone())?;
|
||||
monitoring_client.auto_update(
|
||||
context.executor.clone(),
|
||||
vec![ProcessType::Validator, ProcessType::System],
|
||||
);
|
||||
};
|
||||
|
||||
let mut validator_defs = ValidatorDefinitions::open_or_create(&config.validator_dir)
|
||||
.map_err(|e| format!("Unable to open or create validator definitions: {:?}", e))?;
|
||||
|
||||
@@ -225,10 +238,19 @@ impl<T: EthSpec> ProductionValidatorClient<T> {
|
||||
})
|
||||
.collect::<Result<Vec<BeaconNodeHttpClient>, String>>()?;
|
||||
|
||||
let num_nodes = beacon_nodes.len();
|
||||
let candidates = beacon_nodes
|
||||
.into_iter()
|
||||
.map(CandidateBeaconNode::new)
|
||||
.collect();
|
||||
|
||||
// Set the count for beacon node fallbacks excluding the primary beacon node
|
||||
set_gauge(
|
||||
&http_metrics::metrics::ETH2_FALLBACK_CONFIGURED,
|
||||
num_nodes.saturating_sub(1) as i64,
|
||||
);
|
||||
// Initialize the number of connected, synced fallbacks to 0.
|
||||
set_gauge(&http_metrics::metrics::ETH2_FALLBACK_CONNECTED, 0);
|
||||
let mut beacon_nodes: BeaconNodeFallback<_, T> =
|
||||
BeaconNodeFallback::new(candidates, context.eth2_config.spec.clone(), log.clone());
|
||||
|
||||
@@ -409,7 +431,7 @@ async fn init_from_beacon_node<E: EthSpec>(
|
||||
loop {
|
||||
beacon_nodes.update_unready_candidates().await;
|
||||
let num_available = beacon_nodes.num_available().await;
|
||||
let num_total = beacon_nodes.num_total().await;
|
||||
let num_total = beacon_nodes.num_total();
|
||||
if num_available > 0 {
|
||||
info!(
|
||||
context.log(),
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use crate::http_metrics;
|
||||
use crate::{DutiesService, ProductionValidatorClient};
|
||||
use lighthouse_metrics::set_gauge;
|
||||
use slog::{error, info, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
use tokio::time::{sleep, Duration};
|
||||
@@ -39,7 +41,7 @@ async fn notify<T: SlotClock + 'static, E: EthSpec>(
|
||||
) {
|
||||
let num_available = duties_service.beacon_nodes.num_available().await;
|
||||
let num_synced = duties_service.beacon_nodes.num_synced().await;
|
||||
let num_total = duties_service.beacon_nodes.num_total().await;
|
||||
let num_total = duties_service.beacon_nodes.num_total();
|
||||
if num_synced > 0 {
|
||||
info!(
|
||||
log,
|
||||
@@ -57,6 +59,12 @@ async fn notify<T: SlotClock + 'static, E: EthSpec>(
|
||||
"synced" => num_synced,
|
||||
)
|
||||
}
|
||||
let num_synced_fallback = duties_service.beacon_nodes.num_synced_fallback().await;
|
||||
if num_synced_fallback > 0 {
|
||||
set_gauge(&http_metrics::metrics::ETH2_FALLBACK_CONNECTED, 1);
|
||||
} else {
|
||||
set_gauge(&http_metrics::metrics::ETH2_FALLBACK_CONNECTED, 0);
|
||||
}
|
||||
|
||||
if let Some(slot) = duties_service.slot_clock.now() {
|
||||
let epoch = slot.epoch(E::slots_per_epoch());
|
||||
|
||||
Reference in New Issue
Block a user