mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-22 06:14:38 +00:00
Fix block processing blowup, upgrade metrics (#500)
* Renamed fork_choice::process_attestation_from_block * Processing attestation in fork choice * Retrieving state from store and checking signature * Looser check on beacon state validity. * Cleaned up get_attestation_state * Expanded fork choice api to provide latest validator message. * Checking if the an attestation contains a latest message * Correct process_attestation error handling. * Copy paste error in comment fixed. * Tidy ancestor iterators * Getting attestation slot via helper method * Refactored attestation creation in test utils * Revert "Refactored attestation creation in test utils" This reverts commit 4d277fe4239a7194758b18fb5c00dfe0b8231306. * Integration tests for free attestation processing * Implicit conflicts resolved. * formatting * Do first pass on Grants code * Add another attestation processing test * Tidy attestation processing * Remove old code fragment * Add non-compiling half finished changes * Simplify, fix bugs, add tests for chain iters * Remove attestation processing from op pool * Fix bug with fork choice, tidy * Fix overly restrictive check in fork choice. * Ensure committee cache is build during attn proc * Ignore unknown blocks at fork choice * Various minor fixes * Make fork choice write lock in to read lock * Remove unused method * Tidy comments * Fix attestation prod. target roots change * Fix compile error in store iters * Reject any attestation prior to finalization * Begin metrics refactor * Move beacon_chain to new metrics structure. * Make metrics not panic if already defined * Use global prometheus gather at rest api * Unify common metric fns into a crate * Add heavy metering to block processing * Remove hypen from prometheus metric name * Add more beacon chain metrics * Add beacon chain persistence metric * Prune op pool on finalization * Add extra prom beacon chain metrics * Prefix BeaconChain metrics with "beacon_" * Add more store metrics * Add basic metrics to libp2p * Add metrics to HTTP server * Remove old `http_server` crate * Update metrics names to be more like standard * Fix broken beacon chain metrics, add slot clock metrics * Add lighthouse_metrics gather fn * Remove http args * Fix wrong state given to op pool prune * Make prom metric names more consistent * Add more metrics, tidy existing metrics * Fix store block read metrics * Tidy attestation metrics * Fix minor PR comments * Allow travis failures on beta (see desc) There's a non-backward compatible change in `cargo fmt`. Stable and beta do not agree. * Tidy `lighthouse_metrics` docs * Fix typo
This commit is contained in:
11
eth2/utils/lighthouse_metrics/Cargo.toml
Normal file
11
eth2/utils/lighthouse_metrics/Cargo.toml
Normal file
@@ -0,0 +1,11 @@
|
||||
[package]
|
||||
name = "lighthouse_metrics"
|
||||
version = "0.1.0"
|
||||
authors = ["Paul Hauner <paul@paulhauner.com>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
lazy_static = "1.3.0"
|
||||
prometheus = "^0.6"
|
||||
129
eth2/utils/lighthouse_metrics/src/lib.rs
Normal file
129
eth2/utils/lighthouse_metrics/src/lib.rs
Normal file
@@ -0,0 +1,129 @@
|
||||
//! A wrapper around the `prometheus` crate that provides a global, `lazy_static` metrics registry
|
||||
//! and functions to add and use the following components (more info at
|
||||
//! [Prometheus docs](https://prometheus.io/docs/concepts/metric_types/)):
|
||||
//!
|
||||
//! - `Histogram`: used with `start_timer(..)` and `stop_timer(..)` to record durations (e.g.,
|
||||
//! block processing time).
|
||||
//! - `IncCounter`: used to represent an ideally ever-growing, never-shrinking integer (e.g.,
|
||||
//! number of block processing requests).
|
||||
//! - `IntGauge`: used to represent an varying integer (e.g., number of attestations per block).
|
||||
//!
|
||||
//! ## Important
|
||||
//!
|
||||
//! Metrics will fail if two items have the same `name`. All metrics must have a unique `name`.
|
||||
//! Because we use a global registry there is no namespace per crate, it's one big global space.
|
||||
//!
|
||||
//! See the [Prometheus naming best practices](https://prometheus.io/docs/practices/naming/) when
|
||||
//! choosing metric names.
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```rust
|
||||
//! #[macro_use]
|
||||
//! extern crate lazy_static;
|
||||
//! use lighthouse_metrics::*;
|
||||
//!
|
||||
//! // These metrics are "magically" linked to the global registry defined in `lighthouse_metrics`.
|
||||
//! lazy_static! {
|
||||
//! pub static ref RUN_COUNT: Result<IntCounter> = try_create_int_counter(
|
||||
//! "runs_total",
|
||||
//! "Total number of runs"
|
||||
//! );
|
||||
//! pub static ref CURRENT_VALUE: Result<IntGauge> = try_create_int_gauge(
|
||||
//! "current_value",
|
||||
//! "The current value"
|
||||
//! );
|
||||
//! pub static ref RUN_TIME: Result<Histogram> =
|
||||
//! try_create_histogram("run_seconds", "Time taken (measured to high precision)");
|
||||
//! }
|
||||
//!
|
||||
//!
|
||||
//! fn main() {
|
||||
//! for i in 0..100 {
|
||||
//! inc_counter(&RUN_COUNT);
|
||||
//! let timer = start_timer(&RUN_TIME);
|
||||
//!
|
||||
//! for j in 0..10 {
|
||||
//! set_gauge(&CURRENT_VALUE, j);
|
||||
//! println!("Howdy partner");
|
||||
//! }
|
||||
//!
|
||||
//! stop_timer(timer);
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use prometheus::{HistogramOpts, HistogramTimer, Opts};
|
||||
|
||||
pub use prometheus::{Histogram, IntCounter, IntGauge, Result};
|
||||
|
||||
/// Collect all the metrics for reporting.
|
||||
pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
|
||||
prometheus::gather()
|
||||
}
|
||||
|
||||
/// Attempts to crate an `IntCounter`, returning `Err` if the registry does not accept the counter
|
||||
/// (potentially due to naming conflict).
|
||||
pub fn try_create_int_counter(name: &str, help: &str) -> Result<IntCounter> {
|
||||
let opts = Opts::new(name, help);
|
||||
let counter = IntCounter::with_opts(opts)?;
|
||||
prometheus::register(Box::new(counter.clone()))?;
|
||||
Ok(counter)
|
||||
}
|
||||
|
||||
/// Attempts to crate an `IntGauge`, returning `Err` if the registry does not accept the counter
|
||||
/// (potentially due to naming conflict).
|
||||
pub fn try_create_int_gauge(name: &str, help: &str) -> Result<IntGauge> {
|
||||
let opts = Opts::new(name, help);
|
||||
let gauge = IntGauge::with_opts(opts)?;
|
||||
prometheus::register(Box::new(gauge.clone()))?;
|
||||
Ok(gauge)
|
||||
}
|
||||
|
||||
/// Attempts to crate a `Histogram`, returning `Err` if the registry does not accept the counter
|
||||
/// (potentially due to naming conflict).
|
||||
pub fn try_create_histogram(name: &str, help: &str) -> Result<Histogram> {
|
||||
let opts = HistogramOpts::new(name, help);
|
||||
let histogram = Histogram::with_opts(opts)?;
|
||||
prometheus::register(Box::new(histogram.clone()))?;
|
||||
Ok(histogram)
|
||||
}
|
||||
|
||||
/// Starts a timer for the given `Histogram`, stopping when it gets dropped or given to `stop_timer(..)`.
|
||||
pub fn start_timer(histogram: &Result<Histogram>) -> Option<HistogramTimer> {
|
||||
if let Ok(histogram) = histogram {
|
||||
Some(histogram.start_timer())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Stops a timer created with `start_timer(..)`.
|
||||
pub fn stop_timer(timer: Option<HistogramTimer>) {
|
||||
timer.map(|t| t.observe_duration());
|
||||
}
|
||||
|
||||
pub fn inc_counter(counter: &Result<IntCounter>) {
|
||||
if let Ok(counter) = counter {
|
||||
counter.inc();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn inc_counter_by(counter: &Result<IntCounter>, value: i64) {
|
||||
if let Ok(counter) = counter {
|
||||
counter.inc_by(value);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_gauge(gauge: &Result<IntGauge>, value: i64) {
|
||||
if let Ok(gauge) = gauge {
|
||||
gauge.set(value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the value of a `Histogram` manually.
|
||||
pub fn observe(histogram: &Result<Histogram>, value: f64) {
|
||||
if let Ok(histogram) = histogram {
|
||||
histogram.observe(value);
|
||||
}
|
||||
}
|
||||
@@ -6,3 +6,5 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
types = { path = "../../types" }
|
||||
lazy_static = "1.3.0"
|
||||
lighthouse_metrics = { path = "../lighthouse_metrics" }
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
mod metrics;
|
||||
mod system_time_slot_clock;
|
||||
mod testing_slot_clock;
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
pub use crate::system_time_slot_clock::{Error as SystemTimeSlotClockError, SystemTimeSlotClock};
|
||||
pub use crate::testing_slot_clock::{Error as TestingSlotClockError, TestingSlotClock};
|
||||
use std::time::Duration;
|
||||
pub use metrics::scrape_for_metrics;
|
||||
pub use types::Slot;
|
||||
|
||||
pub trait SlotClock: Send + Sync + Sized {
|
||||
@@ -17,4 +23,6 @@ pub trait SlotClock: Send + Sync + Sized {
|
||||
fn present_slot(&self) -> Result<Option<Slot>, Self::Error>;
|
||||
|
||||
fn duration_to_next_slot(&self) -> Result<Option<Duration>, Self::Error>;
|
||||
|
||||
fn slot_duration_millis(&self) -> u64;
|
||||
}
|
||||
|
||||
32
eth2/utils/slot_clock/src/metrics.rs
Normal file
32
eth2/utils/slot_clock/src/metrics.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use crate::SlotClock;
|
||||
pub use lighthouse_metrics::*;
|
||||
use types::{EthSpec, Slot};
|
||||
|
||||
lazy_static! {
|
||||
pub static ref PRESENT_SLOT: Result<IntGauge> =
|
||||
try_create_int_gauge("slotclock_present_slot", "The present wall-clock slot");
|
||||
pub static ref PRESENT_EPOCH: Result<IntGauge> =
|
||||
try_create_int_gauge("slotclock_present_epoch", "The present wall-clock epoch");
|
||||
pub static ref SLOTS_PER_EPOCH: Result<IntGauge> =
|
||||
try_create_int_gauge("slotclock_slots_per_epoch", "Slots per epoch (constant)");
|
||||
pub static ref MILLISECONDS_PER_SLOT: Result<IntGauge> = try_create_int_gauge(
|
||||
"slotclock_slot_time_milliseconds",
|
||||
"The duration in milliseconds between each slot"
|
||||
);
|
||||
}
|
||||
|
||||
/// Update the global metrics `DEFAULT_REGISTRY` with info from the slot clock.
|
||||
pub fn scrape_for_metrics<T: EthSpec, U: SlotClock>(clock: &U) {
|
||||
let present_slot = match clock.present_slot() {
|
||||
Ok(Some(slot)) => slot,
|
||||
_ => Slot::new(0),
|
||||
};
|
||||
|
||||
set_gauge(&PRESENT_SLOT, present_slot.as_u64() as i64);
|
||||
set_gauge(
|
||||
&PRESENT_EPOCH,
|
||||
present_slot.epoch(T::slots_per_epoch()).as_u64() as i64,
|
||||
);
|
||||
set_gauge(&SLOTS_PER_EPOCH, T::slots_per_epoch() as i64);
|
||||
set_gauge(&MILLISECONDS_PER_SLOT, clock.slot_duration_millis() as i64);
|
||||
}
|
||||
@@ -52,6 +52,10 @@ impl SlotClock for SystemTimeSlotClock {
|
||||
fn duration_to_next_slot(&self) -> Result<Option<Duration>, Error> {
|
||||
duration_to_next_slot(self.genesis_seconds, self.slot_duration_seconds)
|
||||
}
|
||||
|
||||
fn slot_duration_millis(&self) -> u64 {
|
||||
self.slot_duration_seconds * 1000
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SystemTimeError> for Error {
|
||||
|
||||
@@ -40,6 +40,10 @@ impl SlotClock for TestingSlotClock {
|
||||
fn duration_to_next_slot(&self) -> Result<Option<Duration>, Error> {
|
||||
Ok(Some(Duration::from_secs(1)))
|
||||
}
|
||||
|
||||
fn slot_duration_millis(&self) -> u64 {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
Reference in New Issue
Block a user