mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-22 22:34:45 +00:00
Improve tokio task execution (#1181)
* Add logging on shutdown
* Replace tokio::spawn with handle.spawn
* Upgrade tokio
* Add a task executor
* Beacon chain tasks use task executor
* Validator client tasks use task executor
* Rename runtime_handle to executor
* Add duration histograms; minor fixes
* Cleanup
* Fix logs
* Fix tests
* Remove random file
* Get enr dependency instead of libp2p
* Address some review comments
* Libp2p takes a TaskExecutor
* Ugly fix libp2p tests
* Move TaskExecutor to own file
* Upgrade Dockerfile rust version
* Minor fixes
* Revert "Ugly fix libp2p tests"
This reverts commit 58d4bb690f.
* Pretty fix libp2p tests
* Add spawn_without_exit; change Counter to Gauge
* Tidy
* Move log from RuntimeContext to TaskExecutor
* Fix errors
* Replace histogram with int_gauge for async tasks
* Fix todo
* Fix memory leak in test by exiting all spawned tasks at the end
This commit is contained in:
@@ -6,7 +6,7 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
clap = "2.33.0"
|
||||
tokio = "0.2.20"
|
||||
tokio = "0.2.21"
|
||||
slog = { version = "2.5.2", features = ["max_level_trace"] }
|
||||
sloggers = "1.0.0"
|
||||
types = { "path" = "../../consensus/types" }
|
||||
@@ -20,6 +20,9 @@ ctrlc = { version = "3.1.4", features = ["termination"] }
|
||||
futures = "0.3.5"
|
||||
parking_lot = "0.10.2"
|
||||
slog-json = "2.3.0"
|
||||
exit-future = "0.2.0"
|
||||
lazy_static = "1.4.0"
|
||||
lighthouse_metrics = { path = "../../common/lighthouse_metrics" }
|
||||
|
||||
[dev-dependencies]
|
||||
beacon_node = { path = "../../beacon_node" }
|
||||
|
||||
128
lighthouse/environment/src/executor.rs
Normal file
128
lighthouse/environment/src/executor.rs
Normal file
@@ -0,0 +1,128 @@
|
||||
use crate::metrics;
|
||||
use futures::prelude::*;
|
||||
use slog::{debug, trace};
|
||||
use tokio::runtime::Handle;
|
||||
|
||||
/// A wrapper over a runtime handle which can spawn async and blocking tasks.
|
||||
#[derive(Clone)]
|
||||
pub struct TaskExecutor {
|
||||
/// The handle to the runtime on which tasks are spawned
|
||||
pub(crate) handle: Handle,
|
||||
/// The receiver exit future which on receiving shuts down the task
|
||||
pub(crate) exit: exit_future::Exit,
|
||||
pub(crate) log: slog::Logger,
|
||||
}
|
||||
|
||||
impl TaskExecutor {
|
||||
/// Create a new task executor.
|
||||
///
|
||||
/// Note: this function is mainly useful in tests. A `TaskExecutor` should be normally obtained from
|
||||
/// a [`RuntimeContext`](struct.RuntimeContext.html)
|
||||
pub fn new(handle: Handle, exit: exit_future::Exit, log: slog::Logger) -> Self {
|
||||
Self { handle, exit, log }
|
||||
}
|
||||
|
||||
/// Spawn a future on the tokio runtime wrapped in an `exit_future::Exit`. The task is canceled
|
||||
/// when the corresponding exit_future `Signal` is fired/dropped.
|
||||
///
|
||||
/// This function generates prometheus metrics on number of tasks and task duration.
|
||||
pub fn spawn(&self, task: impl Future<Output = ()> + Send + 'static, name: &'static str) {
|
||||
let exit = self.exit.clone();
|
||||
let log = self.log.clone();
|
||||
|
||||
if let Some(int_gauge) = metrics::get_int_gauge(&metrics::ASYNC_TASKS_COUNT, &[name]) {
|
||||
// Task is shutdown before it completes if `exit` receives
|
||||
let int_gauge_1 = int_gauge.clone();
|
||||
let future = future::select(Box::pin(task), exit).then(move |either| {
|
||||
match either {
|
||||
future::Either::Left(_) => trace!(log, "Async task completed"; "task" => name),
|
||||
future::Either::Right(_) => {
|
||||
debug!(log, "Async task shutdown, exit received"; "task" => name)
|
||||
}
|
||||
}
|
||||
int_gauge_1.dec();
|
||||
futures::future::ready(())
|
||||
});
|
||||
|
||||
int_gauge.inc();
|
||||
self.handle.spawn(future);
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn a future on the tokio runtime. This function does not wrap the task in an `exit_future::Exit`
|
||||
/// like [spawn](#method.spawn).
|
||||
/// The caller of this function is responsible for wrapping up the task with an `exit_future::Exit` to
|
||||
/// ensure that the task gets canceled appropriately.
|
||||
/// This function generates prometheus metrics on number of tasks and task duration.
|
||||
///
|
||||
/// This is useful in cases where the future to be spawned needs to do additional cleanup work when
|
||||
/// the task is completed/canceled (e.g. writing local variables to disk) or the task is created from
|
||||
/// some framework which does its own cleanup (e.g. a hyper server).
|
||||
pub fn spawn_without_exit(
|
||||
&self,
|
||||
task: impl Future<Output = ()> + Send + 'static,
|
||||
name: &'static str,
|
||||
) {
|
||||
if let Some(int_gauge) = metrics::get_int_gauge(&metrics::ASYNC_TASKS_COUNT, &[name]) {
|
||||
let int_gauge_1 = int_gauge.clone();
|
||||
let future = task.then(move |_| {
|
||||
int_gauge_1.dec();
|
||||
futures::future::ready(())
|
||||
});
|
||||
|
||||
int_gauge.inc();
|
||||
self.handle.spawn(future);
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn a blocking task on a dedicated tokio thread pool wrapped in an exit future.
|
||||
/// This function generates prometheus metrics on number of tasks and task duration.
|
||||
pub fn spawn_blocking<F>(&self, task: F, name: &'static str)
|
||||
where
|
||||
F: FnOnce() -> () + Send + 'static,
|
||||
{
|
||||
let exit = self.exit.clone();
|
||||
let log = self.log.clone();
|
||||
|
||||
if let Some(metric) = metrics::get_histogram(&metrics::BLOCKING_TASKS_HISTOGRAM, &[name]) {
|
||||
if let Some(int_gauge) = metrics::get_int_gauge(&metrics::BLOCKING_TASKS_COUNT, &[name])
|
||||
{
|
||||
let int_gauge_1 = int_gauge.clone();
|
||||
let timer = metric.start_timer();
|
||||
let join_handle = self.handle.spawn_blocking(task);
|
||||
|
||||
let future = future::select(join_handle, exit).then(move |either| {
|
||||
match either {
|
||||
future::Either::Left(_) => {
|
||||
trace!(log, "Blocking task completed"; "task" => name)
|
||||
}
|
||||
future::Either::Right(_) => {
|
||||
debug!(log, "Blocking task shutdown, exit received"; "task" => name)
|
||||
}
|
||||
}
|
||||
timer.observe_duration();
|
||||
int_gauge_1.dec();
|
||||
futures::future::ready(())
|
||||
});
|
||||
|
||||
int_gauge.inc();
|
||||
self.handle.spawn(future);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the underlying runtime handle.
|
||||
pub fn runtime_handle(&self) -> Handle {
|
||||
self.handle.clone()
|
||||
}
|
||||
|
||||
/// Returns a copy of the `exit_future::Exit`.
|
||||
pub fn exit(&self) -> exit_future::Exit {
|
||||
self.exit.clone()
|
||||
}
|
||||
|
||||
/// Returns a reference to the logger.
|
||||
pub fn log(&self) -> &slog::Logger {
|
||||
&self.log
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,8 @@
|
||||
use eth2_config::Eth2Config;
|
||||
use eth2_testnet_config::Eth2TestnetConfig;
|
||||
use futures::channel::oneshot;
|
||||
|
||||
pub use executor::TaskExecutor;
|
||||
use slog::{info, o, Drain, Level, Logger};
|
||||
use sloggers::{null::NullLoggerBuilder, Build};
|
||||
use std::cell::RefCell;
|
||||
@@ -17,8 +19,10 @@ use std::ffi::OsStr;
|
||||
use std::fs::{rename as FsRename, OpenOptions};
|
||||
use std::path::PathBuf;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use tokio::runtime::{Builder as RuntimeBuilder, Handle, Runtime};
|
||||
use tokio::runtime::{Builder as RuntimeBuilder, Runtime};
|
||||
use types::{EthSpec, InteropEthSpec, MainnetEthSpec, MinimalEthSpec};
|
||||
mod executor;
|
||||
mod metrics;
|
||||
|
||||
pub const ETH2_CONFIG_FILENAME: &str = "eth2-spec.toml";
|
||||
|
||||
@@ -172,10 +176,13 @@ impl<E: EthSpec> EnvironmentBuilder<E> {
|
||||
|
||||
/// Consumes the builder, returning an `Environment`.
|
||||
pub fn build(self) -> Result<Environment<E>, String> {
|
||||
let (signal, exit) = exit_future::signal();
|
||||
Ok(Environment {
|
||||
runtime: self
|
||||
.runtime
|
||||
.ok_or_else(|| "Cannot build environment without runtime".to_string())?,
|
||||
signal: Some(signal),
|
||||
exit,
|
||||
log: self
|
||||
.log
|
||||
.ok_or_else(|| "Cannot build environment without log".to_string())?,
|
||||
@@ -192,8 +199,7 @@ impl<E: EthSpec> EnvironmentBuilder<E> {
|
||||
/// `Runtime`, instead it only has access to a `Runtime`.
|
||||
#[derive(Clone)]
|
||||
pub struct RuntimeContext<E: EthSpec> {
|
||||
pub runtime_handle: Handle,
|
||||
pub log: Logger,
|
||||
pub executor: TaskExecutor,
|
||||
pub eth_spec_instance: E,
|
||||
pub eth2_config: Eth2Config,
|
||||
}
|
||||
@@ -204,8 +210,11 @@ impl<E: EthSpec> RuntimeContext<E> {
|
||||
/// The generated service will have the `service_name` in all it's logs.
|
||||
pub fn service_context(&self, service_name: String) -> Self {
|
||||
Self {
|
||||
runtime_handle: self.runtime_handle.clone(),
|
||||
log: self.log.new(o!("service" => service_name)),
|
||||
executor: TaskExecutor {
|
||||
handle: self.executor.handle.clone(),
|
||||
exit: self.executor.exit.clone(),
|
||||
log: self.executor.log.new(o!("service" => service_name)),
|
||||
},
|
||||
eth_spec_instance: self.eth_spec_instance.clone(),
|
||||
eth2_config: self.eth2_config.clone(),
|
||||
}
|
||||
@@ -215,12 +224,19 @@ impl<E: EthSpec> RuntimeContext<E> {
|
||||
pub fn eth2_config(&self) -> &Eth2Config {
|
||||
&self.eth2_config
|
||||
}
|
||||
|
||||
/// Returns a reference to the logger for this service.
|
||||
pub fn log(&self) -> &slog::Logger {
|
||||
self.executor.log()
|
||||
}
|
||||
}
|
||||
|
||||
/// An environment where Lighthouse services can run. Used to start a production beacon node or
|
||||
/// validator client, or to run tests that involve logging and async task execution.
|
||||
pub struct Environment<E: EthSpec> {
|
||||
runtime: Runtime,
|
||||
signal: Option<exit_future::Signal>,
|
||||
exit: exit_future::Exit,
|
||||
log: Logger,
|
||||
eth_spec_instance: E,
|
||||
pub eth2_config: Eth2Config,
|
||||
@@ -239,8 +255,11 @@ impl<E: EthSpec> Environment<E> {
|
||||
/// Returns a `Context` where no "service" has been added to the logger output.
|
||||
pub fn core_context(&mut self) -> RuntimeContext<E> {
|
||||
RuntimeContext {
|
||||
runtime_handle: self.runtime.handle().clone(),
|
||||
log: self.log.clone(),
|
||||
executor: TaskExecutor {
|
||||
exit: self.exit.clone(),
|
||||
handle: self.runtime().handle().clone(),
|
||||
log: self.log.clone(),
|
||||
},
|
||||
eth_spec_instance: self.eth_spec_instance.clone(),
|
||||
eth2_config: self.eth2_config.clone(),
|
||||
}
|
||||
@@ -249,8 +268,11 @@ impl<E: EthSpec> Environment<E> {
|
||||
/// Returns a `Context` where the `service_name` is added to the logger output.
|
||||
pub fn service_context(&mut self, service_name: String) -> RuntimeContext<E> {
|
||||
RuntimeContext {
|
||||
runtime_handle: self.runtime.handle().clone(),
|
||||
log: self.log.new(o!("service" => service_name)),
|
||||
executor: TaskExecutor {
|
||||
exit: self.exit.clone(),
|
||||
handle: self.runtime().handle().clone(),
|
||||
log: self.log.new(o!("service" => service_name.clone())),
|
||||
},
|
||||
eth_spec_instance: self.eth_spec_instance.clone(),
|
||||
eth2_config: self.eth2_config.clone(),
|
||||
}
|
||||
@@ -279,6 +301,13 @@ impl<E: EthSpec> Environment<E> {
|
||||
.shutdown_timeout(std::time::Duration::from_secs(2))
|
||||
}
|
||||
|
||||
/// Fire exit signal which shuts down all spawned services
|
||||
pub fn fire_signal(&mut self) {
|
||||
if let Some(signal) = self.signal.take() {
|
||||
let _ = signal.fire();
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the logger (and all child loggers) to log to a file.
|
||||
pub fn log_to_json_file(
|
||||
&mut self,
|
||||
|
||||
21
lighthouse/environment/src/metrics.rs
Normal file
21
lighthouse/environment/src/metrics.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
/// Handles async task metrics
|
||||
use lazy_static::lazy_static;
|
||||
pub use lighthouse_metrics::*;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref ASYNC_TASKS_COUNT: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"async_tasks_count",
|
||||
"Total number of async tasks spawned using spawn",
|
||||
&["async_task_count"]
|
||||
);
|
||||
pub static ref BLOCKING_TASKS_COUNT: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"blocking_tasks_count",
|
||||
"Total number of async tasks spawned using spawn_blocking",
|
||||
&["blocking_task_count"]
|
||||
);
|
||||
pub static ref BLOCKING_TASKS_HISTOGRAM: Result<HistogramVec> = try_create_histogram_vec(
|
||||
"blocking_tasks_histogram",
|
||||
"Time taken by blocking tasks",
|
||||
&["blocking_task_hist"]
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user