mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-15 02:42:38 +00:00
Monitoring service api (#2251)
## Issue Addressed N/A ## Proposed Changes Adds a client side api for collecting system and process metrics and pushing it to a monitoring service.
This commit is contained in:
@@ -44,4 +44,5 @@ hyper = "0.14.4"
|
||||
lighthouse_version = { path = "../common/lighthouse_version" }
|
||||
hex = "0.4.2"
|
||||
slasher = { path = "../slasher" }
|
||||
monitoring_api = { path = "../common/monitoring_api" }
|
||||
sensitive_url = { path = "../common/sensitive_url" }
|
||||
|
||||
@@ -44,3 +44,4 @@ http_api = { path = "../http_api" }
|
||||
http_metrics = { path = "../http_metrics" }
|
||||
slasher = { path = "../../slasher" }
|
||||
slasher_service = { path = "../../slasher/service" }
|
||||
monitoring_api = {path = "../../common/monitoring_api"}
|
||||
|
||||
@@ -14,6 +14,7 @@ use environment::RuntimeContext;
|
||||
use eth1::{Config as Eth1Config, Service as Eth1Service};
|
||||
use eth2_libp2p::NetworkGlobals;
|
||||
use genesis::{interop_genesis_state, Eth1GenesisService};
|
||||
use monitoring_api::{MonitoringHttpClient, ProcessType};
|
||||
use network::{NetworkConfig, NetworkMessage, NetworkService};
|
||||
use slasher::Slasher;
|
||||
use slasher_service::SlasherService;
|
||||
@@ -374,6 +375,22 @@ where
|
||||
SlasherService::new(beacon_chain, network_send).run(&context.executor)
|
||||
}
|
||||
|
||||
/// Start the explorer client which periodically sends beacon
|
||||
/// and system metrics to the configured endpoint.
|
||||
pub fn monitoring_client(self, config: &monitoring_api::Config) -> Result<Self, String> {
|
||||
let context = self
|
||||
.runtime_context
|
||||
.as_ref()
|
||||
.ok_or("monitoring_client requires a runtime_context")?
|
||||
.service_context("monitoring_client".into());
|
||||
let monitoring_client = MonitoringHttpClient::new(config, context.log().clone())?;
|
||||
monitoring_client.auto_update(
|
||||
context.executor,
|
||||
vec![ProcessType::BeaconNode, ProcessType::System],
|
||||
);
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Immediately starts the service that periodically logs information each slot.
|
||||
pub fn notifier(self) -> Result<Self, String> {
|
||||
let context = self
|
||||
|
||||
@@ -66,6 +66,7 @@ pub struct Config {
|
||||
pub eth1: eth1::Config,
|
||||
pub http_api: http_api::Config,
|
||||
pub http_metrics: http_metrics::Config,
|
||||
pub monitoring_api: Option<monitoring_api::Config>,
|
||||
pub slasher: Option<slasher::Config>,
|
||||
}
|
||||
|
||||
@@ -87,6 +88,7 @@ impl Default for Config {
|
||||
graffiti: Graffiti::default(),
|
||||
http_api: <_>::default(),
|
||||
http_metrics: <_>::default(),
|
||||
monitoring_api: None,
|
||||
slasher: None,
|
||||
validator_monitor_auto: false,
|
||||
validator_monitor_pubkeys: vec![],
|
||||
|
||||
@@ -6,4 +6,14 @@ lazy_static! {
|
||||
"sync_slots_per_second",
|
||||
"The number of blocks being imported per second"
|
||||
);
|
||||
|
||||
pub static ref IS_SYNCED: Result<IntGauge> = try_create_int_gauge(
|
||||
"sync_eth2_synced",
|
||||
"Metric to check if the beacon chain is synced to head. 0 if not synced and non-zero if synced"
|
||||
);
|
||||
|
||||
pub static ref NOTIFIER_HEAD_SLOT: Result<IntGauge> = try_create_int_gauge(
|
||||
"notifier_head_slot",
|
||||
"The head slot sourced from the beacon chain notifier"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -77,6 +77,9 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
};
|
||||
|
||||
let head_slot = head_info.slot;
|
||||
|
||||
metrics::set_gauge(&metrics::NOTIFIER_HEAD_SLOT, head_slot.as_u64() as i64);
|
||||
|
||||
let current_slot = match beacon_chain.slot() {
|
||||
Ok(slot) => slot,
|
||||
Err(e) => {
|
||||
@@ -123,6 +126,7 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
|
||||
// Log if we are syncing
|
||||
if sync_state.is_syncing() {
|
||||
metrics::set_gauge(&metrics::IS_SYNCED, 0);
|
||||
let distance = format!(
|
||||
"{} slots ({})",
|
||||
head_distance.as_u64(),
|
||||
@@ -151,6 +155,7 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
);
|
||||
}
|
||||
} else if sync_state.is_synced() {
|
||||
metrics::set_gauge(&metrics::IS_SYNCED, 1);
|
||||
let block_info = if current_slot > head_slot {
|
||||
" … empty".to_string()
|
||||
} else {
|
||||
@@ -167,6 +172,7 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
"slot" => current_slot,
|
||||
);
|
||||
} else {
|
||||
metrics::set_gauge(&metrics::IS_SYNCED, 0);
|
||||
info!(
|
||||
log,
|
||||
"Searching for peers";
|
||||
|
||||
@@ -26,4 +26,23 @@ lazy_static! {
|
||||
pub static ref ENDPOINT_REQUESTS: Result<IntCounterVec> = try_create_int_counter_vec(
|
||||
"eth1_endpoint_requests", "The number of eth1 requests for each endpoint", &["endpoint"]
|
||||
);
|
||||
|
||||
/*
|
||||
* Eth1 rpc connection
|
||||
*/
|
||||
|
||||
pub static ref ETH1_CONNECTED: Result<IntGauge> = try_create_int_gauge(
|
||||
"sync_eth1_connected", "Set to 1 if connected to an eth1 node, otherwise set to 0"
|
||||
);
|
||||
|
||||
pub static ref ETH1_FALLBACK_CONFIGURED: Result<IntGauge> = try_create_int_gauge(
|
||||
"sync_eth1_fallback_configured", "Number of configured eth1 fallbacks"
|
||||
);
|
||||
|
||||
// Note: This metric only checks if an eth1 fallback is configured, not if it is connected and synced.
|
||||
// Checking for liveness of the fallback would require moving away from lazy checking of fallbacks.
|
||||
pub static ref ETH1_FALLBACK_CONNECTED: Result<IntGauge> = try_create_int_gauge(
|
||||
"eth1_sync_fallback_connected", "Set to 1 if an eth1 fallback is connected, otherwise set to 0"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
@@ -94,6 +94,9 @@ impl EndpointsCache {
|
||||
&crate::metrics::ENDPOINT_ERRORS,
|
||||
&[&endpoint.0.to_string()],
|
||||
);
|
||||
crate::metrics::set_gauge(&metrics::ETH1_CONNECTED, 0);
|
||||
} else {
|
||||
crate::metrics::set_gauge(&metrics::ETH1_CONNECTED, 1);
|
||||
}
|
||||
state
|
||||
}
|
||||
@@ -730,6 +733,7 @@ impl Service {
|
||||
|
||||
let mut interval = interval_at(Instant::now(), update_interval);
|
||||
|
||||
let num_fallbacks = self.config().endpoints.len() - 1;
|
||||
let update_future = async move {
|
||||
loop {
|
||||
interval.tick().await;
|
||||
@@ -737,6 +741,15 @@ impl Service {
|
||||
}
|
||||
};
|
||||
|
||||
// Set the number of configured eth1 servers
|
||||
metrics::set_gauge(&metrics::ETH1_FALLBACK_CONFIGURED, num_fallbacks as i64);
|
||||
// Since we lazily update eth1 fallbacks, it's not possible to know connection status of fallback.
|
||||
// Hence, we set it to 1 if we have atleast one configured fallback.
|
||||
if num_fallbacks > 0 {
|
||||
metrics::set_gauge(&metrics::ETH1_FALLBACK_CONNECTED, 1);
|
||||
} else {
|
||||
metrics::set_gauge(&metrics::ETH1_FALLBACK_CONNECTED, 0);
|
||||
}
|
||||
handle.spawn(update_future, "eth1");
|
||||
}
|
||||
|
||||
|
||||
@@ -232,6 +232,23 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
|
||||
.takes_value(true),
|
||||
)
|
||||
|
||||
/*
|
||||
* Monitoring metrics
|
||||
*/
|
||||
|
||||
.arg(
|
||||
Arg::with_name("monitoring-endpoint")
|
||||
.long("monitoring-endpoint")
|
||||
.value_name("ADDRESS")
|
||||
.help("Enables the monitoring service for sending system metrics to a remote endpoint. \
|
||||
This can be used to monitor your setup on certain services (e.g. beaconcha.in). \
|
||||
This flag sets the endpoint where the beacon node metrics will be sent. \
|
||||
Note: This will send information to a remote sever which may identify and associate your \
|
||||
validators, IP address and other personal information. Always use a HTTPS connection \
|
||||
and never provide an untrusted URL.")
|
||||
.takes_value(true),
|
||||
)
|
||||
|
||||
/*
|
||||
* Standard staking flags
|
||||
*/
|
||||
|
||||
@@ -136,6 +136,17 @@ pub fn get_config<E: EthSpec>(
|
||||
client_config.http_metrics.allow_origin = Some(allow_origin.to_string());
|
||||
}
|
||||
|
||||
/*
|
||||
* Explorer metrics
|
||||
*/
|
||||
if let Some(monitoring_endpoint) = cli_args.value_of("monitoring-endpoint") {
|
||||
client_config.monitoring_api = Some(monitoring_api::Config {
|
||||
db_path: None,
|
||||
freezer_db_path: None,
|
||||
monitoring_endpoint: monitoring_endpoint.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Log a warning indicating an open HTTP server if it wasn't specified explicitly
|
||||
// (e.g. using the --staking flag).
|
||||
if cli_args.is_present("staking") {
|
||||
|
||||
@@ -63,14 +63,14 @@ impl<E: EthSpec> ProductionBeaconNode<E> {
|
||||
let log = context.log().clone();
|
||||
let datadir = client_config.create_data_dir()?;
|
||||
let db_path = client_config.create_db_path()?;
|
||||
let freezer_db_path_res = client_config.create_freezer_db_path();
|
||||
let freezer_db_path = client_config.create_freezer_db_path()?;
|
||||
let executor = context.executor.clone();
|
||||
|
||||
let builder = ClientBuilder::new(context.eth_spec_instance.clone())
|
||||
.runtime_context(context)
|
||||
.chain_spec(spec)
|
||||
.http_api_config(client_config.http_api.clone())
|
||||
.disk_store(&datadir, &db_path, &freezer_db_path_res?, store_config)?;
|
||||
.disk_store(&datadir, &db_path, &freezer_db_path, store_config)?;
|
||||
|
||||
let builder = if let Some(slasher_config) = client_config.slasher.clone() {
|
||||
let slasher = Arc::new(
|
||||
@@ -82,6 +82,14 @@ impl<E: EthSpec> ProductionBeaconNode<E> {
|
||||
builder
|
||||
};
|
||||
|
||||
let builder = if let Some(monitoring_config) = &mut client_config.monitoring_api {
|
||||
monitoring_config.db_path = Some(db_path);
|
||||
monitoring_config.freezer_db_path = Some(freezer_db_path);
|
||||
builder.monitoring_client(monitoring_config)?
|
||||
} else {
|
||||
builder
|
||||
};
|
||||
|
||||
let builder = builder
|
||||
.beacon_chain_builder(client_genesis, client_config.clone())
|
||||
.await?;
|
||||
|
||||
@@ -21,7 +21,7 @@ mod impls;
|
||||
mod leveldb_store;
|
||||
mod memory_store;
|
||||
pub mod metadata;
|
||||
mod metrics;
|
||||
pub mod metrics;
|
||||
mod partial_beacon_state;
|
||||
|
||||
pub mod iter;
|
||||
|
||||
Reference in New Issue
Block a user