mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-06 10:11:44 +00:00
[Merge] Block validator duties when EL is not ready (#2672)
* Reject some HTTP endpoints when EL is not ready * Restrict more endpoints * Add watchdog task * Change scheduling * Update to new schedule * Add "syncing" concept * Remove RequireSynced * Add is_merge_complete to head_info * Cache latest_head in Engines * Call consensus_forkchoiceUpdate on startup
This commit is contained in:
@@ -2,32 +2,38 @@
|
||||
|
||||
use crate::engine_api::{EngineApi, Error as EngineApiError};
|
||||
use futures::future::join_all;
|
||||
use slog::{crit, error, info, warn, Logger};
|
||||
use slog::{crit, debug, error, info, warn, Logger};
|
||||
use std::future::Future;
|
||||
use tokio::sync::RwLock;
|
||||
use types::Hash256;
|
||||
|
||||
/// Stores the remembered state of a engine.
|
||||
#[derive(Copy, Clone, PartialEq)]
|
||||
enum EngineState {
|
||||
Online,
|
||||
Synced,
|
||||
Offline,
|
||||
Syncing,
|
||||
}
|
||||
|
||||
impl EngineState {
|
||||
fn set_online(&mut self) {
|
||||
*self = EngineState::Online
|
||||
}
|
||||
#[derive(Copy, Clone, PartialEq, Debug)]
|
||||
pub struct ForkChoiceHead {
|
||||
pub head_block_hash: Hash256,
|
||||
pub finalized_block_hash: Hash256,
|
||||
}
|
||||
|
||||
fn set_offline(&mut self) {
|
||||
*self = EngineState::Offline
|
||||
}
|
||||
/// Used to enable/disable logging on some tasks.
|
||||
#[derive(Copy, Clone, PartialEq)]
|
||||
pub enum Logging {
|
||||
Enabled,
|
||||
Disabled,
|
||||
}
|
||||
|
||||
fn is_online(&self) -> bool {
|
||||
*self == EngineState::Online
|
||||
}
|
||||
|
||||
fn is_offline(&self) -> bool {
|
||||
*self == EngineState::Offline
|
||||
impl Logging {
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
match self {
|
||||
Logging::Enabled => true,
|
||||
Logging::Disabled => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,6 +59,7 @@ impl<T> Engine<T> {
|
||||
/// manner.
|
||||
pub struct Engines<T> {
|
||||
pub engines: Vec<Engine<T>>,
|
||||
pub latest_head: RwLock<Option<ForkChoiceHead>>,
|
||||
pub log: Logger,
|
||||
}
|
||||
|
||||
@@ -63,45 +70,112 @@ pub enum EngineError {
|
||||
}
|
||||
|
||||
impl<T: EngineApi> Engines<T> {
|
||||
pub async fn set_latest_head(&self, latest_head: ForkChoiceHead) {
|
||||
*self.latest_head.write().await = Some(latest_head);
|
||||
}
|
||||
|
||||
async fn send_latest_head(&self, engine: &Engine<T>) {
|
||||
let latest_head: Option<ForkChoiceHead> = *self.latest_head.read().await;
|
||||
if let Some(head) = latest_head {
|
||||
info!(
|
||||
self.log,
|
||||
"Issuing forkchoiceUpdated";
|
||||
"head" => ?head,
|
||||
"id" => &engine.id,
|
||||
);
|
||||
|
||||
if let Err(e) = engine
|
||||
.api
|
||||
.forkchoice_updated(head.head_block_hash, head.finalized_block_hash)
|
||||
.await
|
||||
{
|
||||
error!(
|
||||
self.log,
|
||||
"Failed to issue latest head to engine";
|
||||
"error" => ?e,
|
||||
"id" => &engine.id,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
debug!(
|
||||
self.log,
|
||||
"No head, not sending to engine";
|
||||
"id" => &engine.id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if there is at least one engine with a "synced" status.
|
||||
pub async fn any_synced(&self) -> bool {
|
||||
for engine in &self.engines {
|
||||
if *engine.state.read().await == EngineState::Synced {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Run the `EngineApi::upcheck` function on all nodes which are currently offline.
|
||||
///
|
||||
/// This can be used to try and recover any offline nodes.
|
||||
async fn upcheck_offline(&self) {
|
||||
pub async fn upcheck_not_synced(&self, logging: Logging) {
|
||||
let upcheck_futures = self.engines.iter().map(|engine| async move {
|
||||
let mut state = engine.state.write().await;
|
||||
if state.is_offline() {
|
||||
let mut state_lock = engine.state.write().await;
|
||||
if *state_lock != EngineState::Synced {
|
||||
match engine.api.upcheck().await {
|
||||
Ok(()) => {
|
||||
info!(
|
||||
self.log,
|
||||
"Execution engine online";
|
||||
"id" => &engine.id
|
||||
);
|
||||
state.set_online()
|
||||
if logging.is_enabled() {
|
||||
info!(
|
||||
self.log,
|
||||
"Execution engine online";
|
||||
"id" => &engine.id
|
||||
);
|
||||
}
|
||||
|
||||
// Send the node our latest head.
|
||||
self.send_latest_head(engine).await;
|
||||
|
||||
*state_lock = EngineState::Synced
|
||||
}
|
||||
Err(EngineApiError::IsSyncing) => {
|
||||
if logging.is_enabled() {
|
||||
warn!(
|
||||
self.log,
|
||||
"Execution engine syncing";
|
||||
"id" => &engine.id
|
||||
)
|
||||
}
|
||||
|
||||
// Send the node our latest head, it may assist with syncing.
|
||||
self.send_latest_head(engine).await;
|
||||
|
||||
*state_lock = EngineState::Syncing
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
self.log,
|
||||
"Execution engine offline";
|
||||
"error" => ?e,
|
||||
"id" => &engine.id
|
||||
)
|
||||
if logging.is_enabled() {
|
||||
warn!(
|
||||
self.log,
|
||||
"Execution engine offline";
|
||||
"error" => ?e,
|
||||
"id" => &engine.id
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*state
|
||||
*state_lock
|
||||
});
|
||||
|
||||
let num_online = join_all(upcheck_futures)
|
||||
let num_synced = join_all(upcheck_futures)
|
||||
.await
|
||||
.into_iter()
|
||||
.filter(|state: &EngineState| state.is_online())
|
||||
.filter(|state: &EngineState| *state == EngineState::Synced)
|
||||
.count();
|
||||
|
||||
if num_online == 0 {
|
||||
if num_synced == 0 && logging.is_enabled() {
|
||||
crit!(
|
||||
self.log,
|
||||
"No execution engines online";
|
||||
"No synced execution engines";
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -120,7 +194,7 @@ impl<T: EngineApi> Engines<T> {
|
||||
Ok(result) => Ok(result),
|
||||
Err(mut first_errors) => {
|
||||
// Try to recover some nodes.
|
||||
self.upcheck_offline().await;
|
||||
self.upcheck_not_synced(Logging::Enabled).await;
|
||||
// Retry the call on all nodes.
|
||||
match self.first_success_without_retry(func).await {
|
||||
Ok(result) => Ok(result),
|
||||
@@ -146,8 +220,8 @@ impl<T: EngineApi> Engines<T> {
|
||||
let mut errors = vec![];
|
||||
|
||||
for engine in &self.engines {
|
||||
let engine_online = engine.state.read().await.is_online();
|
||||
if engine_online {
|
||||
let engine_synced = *engine.state.read().await == EngineState::Synced;
|
||||
if engine_synced {
|
||||
match func(engine).await {
|
||||
Ok(result) => return Ok(result),
|
||||
Err(error) => {
|
||||
@@ -157,7 +231,7 @@ impl<T: EngineApi> Engines<T> {
|
||||
"error" => ?error,
|
||||
"id" => &engine.id
|
||||
);
|
||||
engine.state.write().await.set_offline();
|
||||
*engine.state.write().await = EngineState::Offline;
|
||||
errors.push(EngineError::Api {
|
||||
id: engine.id.clone(),
|
||||
error,
|
||||
@@ -174,7 +248,8 @@ impl<T: EngineApi> Engines<T> {
|
||||
Err(errors)
|
||||
}
|
||||
|
||||
/// Runs `func` on all nodes concurrently, returning all results.
|
||||
/// Runs `func` on all nodes concurrently, returning all results. Any nodes that are offline
|
||||
/// will be ignored, however all synced or unsynced nodes will receive the broadcast.
|
||||
///
|
||||
/// This function might try to run `func` twice. If all nodes return an error on the first time
|
||||
/// it runs, it will try to upcheck all offline nodes and then run the function again.
|
||||
@@ -195,7 +270,7 @@ impl<T: EngineApi> Engines<T> {
|
||||
}
|
||||
|
||||
if any_offline {
|
||||
self.upcheck_offline().await;
|
||||
self.upcheck_not_synced(Logging::Enabled).await;
|
||||
self.broadcast_without_retry(func).await
|
||||
} else {
|
||||
first_results
|
||||
@@ -213,8 +288,8 @@ impl<T: EngineApi> Engines<T> {
|
||||
{
|
||||
let func = &func;
|
||||
let futures = self.engines.iter().map(|engine| async move {
|
||||
let engine_online = engine.state.read().await.is_online();
|
||||
if engine_online {
|
||||
let is_offline = *engine.state.read().await == EngineState::Offline;
|
||||
if !is_offline {
|
||||
func(engine).await.map_err(|error| {
|
||||
error!(
|
||||
self.log,
|
||||
|
||||
Reference in New Issue
Block a user