mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-02 16:21:42 +00:00
Instrument tracing spans for block processing and import (#7816)
#7815 - removes all existing spans, so some span fields that appear in logs like `service_name` may be lost. - instruments a few key code paths in the beacon node, starting from **root spans** named below: * Gossip block and blobs * `process_gossip_data_column_sidecar` * `process_gossip_blob` * `process_gossip_block` * Rpc block and blobs * `process_rpc_block` * `process_rpc_blobs` * `process_rpc_custody_columns` * Rpc blocks (range and backfill) * `process_chain_segment` * `PendingComponents` lifecycle * `pending_components` To test locally: * Run Grafana and Tempo with https://github.com/sigp/lighthouse-metrics/pull/57 * Run Lighthouse BN with `--telemetry-collector-url http://localhost:4317` Some captured traces can be found here: https://hackmd.io/@jimmygchen/r1sLOxPPeg Removing the old spans seem to have reduced the memory usage quite a lot - i think we were using them on long running tasks and too excessively: <img width="910" height="495" alt="image" src="https://github.com/user-attachments/assets/5208bbe4-53b2-4ead-bc71-0b782c788669" />
This commit is contained in:
@@ -5,7 +5,7 @@ use futures::channel::mpsc::Sender;
|
||||
use futures::prelude::*;
|
||||
use std::sync::Weak;
|
||||
use tokio::runtime::{Handle, Runtime};
|
||||
use tracing::{debug, instrument};
|
||||
use tracing::debug;
|
||||
|
||||
pub use tokio::task::JoinHandle;
|
||||
|
||||
@@ -81,6 +81,8 @@ pub struct TaskExecutor {
|
||||
signal_tx: Sender<ShutdownReason>,
|
||||
|
||||
/// The name of the service for inclusion in the logger output.
|
||||
// FIXME(sproul): delete?
|
||||
#[allow(dead_code)]
|
||||
service_name: String,
|
||||
}
|
||||
|
||||
@@ -92,7 +94,6 @@ impl TaskExecutor {
|
||||
/// This function should only be used during testing. In production, prefer to obtain an
|
||||
/// instance of `Self` via a `environment::RuntimeContext` (see the `lighthouse/environment`
|
||||
/// crate).
|
||||
#[instrument(parent = None,fields(service = service_name), name = "task_executor", skip_all)]
|
||||
pub fn new<T: Into<HandleProvider>>(
|
||||
handle: T,
|
||||
exit: async_channel::Receiver<()>,
|
||||
@@ -108,7 +109,6 @@ impl TaskExecutor {
|
||||
}
|
||||
|
||||
/// Clones the task executor adding a service name.
|
||||
#[instrument(parent = None, fields(service = service_name), name = "task_executor", skip_all)]
|
||||
pub fn clone_with_name(&self, service_name: String) -> Self {
|
||||
TaskExecutor {
|
||||
handle_provider: self.handle_provider.clone(),
|
||||
@@ -124,7 +124,6 @@ impl TaskExecutor {
|
||||
/// The purpose of this function is to create a compile error if some function which previously
|
||||
/// returned `()` starts returning something else. Such a case may otherwise result in
|
||||
/// accidental error suppression.
|
||||
#[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)]
|
||||
pub fn spawn_ignoring_error(
|
||||
&self,
|
||||
task: impl Future<Output = Result<(), ()>> + Send + 'static,
|
||||
@@ -136,7 +135,6 @@ impl TaskExecutor {
|
||||
/// Spawn a task to monitor the completion of another task.
|
||||
///
|
||||
/// If the other task exits by panicking, then the monitor task will shut down the executor.
|
||||
#[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)]
|
||||
fn spawn_monitor<R: Send>(
|
||||
&self,
|
||||
task_handle: impl Future<Output = Result<R, tokio::task::JoinError>> + Send + 'static,
|
||||
@@ -175,7 +173,6 @@ impl TaskExecutor {
|
||||
/// of a panic, the executor will be shut down via `self.signal_tx`.
|
||||
///
|
||||
/// This function generates prometheus metrics on number of tasks and task duration.
|
||||
#[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)]
|
||||
pub fn spawn(&self, task: impl Future<Output = ()> + Send + 'static, name: &'static str) {
|
||||
if let Some(task_handle) = self.spawn_handle(task, name) {
|
||||
self.spawn_monitor(task_handle, name)
|
||||
@@ -191,7 +188,6 @@ impl TaskExecutor {
|
||||
/// This is useful in cases where the future to be spawned needs to do additional cleanup work when
|
||||
/// the task is completed/canceled (e.g. writing local variables to disk) or the task is created from
|
||||
/// some framework which does its own cleanup (e.g. a hyper server).
|
||||
#[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)]
|
||||
pub fn spawn_without_exit(
|
||||
&self,
|
||||
task: impl Future<Output = ()> + Send + 'static,
|
||||
@@ -235,7 +231,6 @@ impl TaskExecutor {
|
||||
/// The task is cancelled when the corresponding async-channel is dropped.
|
||||
///
|
||||
/// This function generates prometheus metrics on number of tasks and task duration.
|
||||
#[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)]
|
||||
pub fn spawn_handle<R: Send + 'static>(
|
||||
&self,
|
||||
task: impl Future<Output = R> + Send + 'static,
|
||||
@@ -283,7 +278,6 @@ impl TaskExecutor {
|
||||
/// The Future returned behaves like the standard JoinHandle which can return an error if the
|
||||
/// task failed.
|
||||
/// This function generates prometheus metrics on number of tasks and task duration.
|
||||
#[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)]
|
||||
pub fn spawn_blocking_handle<F, R>(
|
||||
&self,
|
||||
task: F,
|
||||
@@ -332,7 +326,6 @@ impl TaskExecutor {
|
||||
/// a `tokio` context present in the thread-local storage due to some `rayon` funkiness. Talk to
|
||||
/// @paulhauner if you plan to use this function in production. He has put metrics in here to
|
||||
/// track any use of it, so don't think you can pull a sneaky one on him.
|
||||
#[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)]
|
||||
pub fn block_on_dangerous<F: Future>(
|
||||
&self,
|
||||
future: F,
|
||||
@@ -368,7 +361,6 @@ impl TaskExecutor {
|
||||
}
|
||||
|
||||
/// Returns a `Handle` to the current runtime.
|
||||
#[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)]
|
||||
pub fn handle(&self) -> Option<Handle> {
|
||||
self.handle_provider.handle()
|
||||
}
|
||||
@@ -383,7 +375,6 @@ impl TaskExecutor {
|
||||
}
|
||||
|
||||
/// Get a channel to request shutting down.
|
||||
#[instrument(parent = None, fields(service = self.service_name), name = "task_executor", skip_all)]
|
||||
pub fn shutdown_sender(&self) -> Sender<ShutdownReason> {
|
||||
self.signal_tx.clone()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user