From 507bb9dad46a02e6331d936d5a9974306d00ba80 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Mon, 19 Sep 2022 07:58:49 +0000 Subject: [PATCH] Refined payload pruning (#3587) ## Proposed Changes Improve the payload pruning feature in several ways: - Payload pruning is now entirely optional. It is enabled by default but can be disabled with `--prune-payloads false`. The previous `--prune-payloads-on-startup` flag from #3565 is removed. - Initial payload pruning on startup now runs in a background thread. This thread will always load the split state, which is a small fraction of its total work (up to ~300ms) and then backtrack from that state. This pruning process ran in 2m5s on one Prater node with good I/O and 16m on a node with slower I/O. - To work with the optional payload pruning the database function `try_load_full_block` will now attempt to load execution payloads for finalized slots _if_ pruning is currently disabled. This gives users an opt-out for the extensive traffic between the CL and EL for reconstructing payloads. ## Additional Info If the `prune-payloads` flag is toggled on and off then the on-startup check may not see any payloads to delete and fail to clean them up. In this case the `lighthouse db prune_payloads` command should be used to force a manual sweep of the database. --- beacon_node/beacon_chain/src/builder.rs | 21 ++++--- beacon_node/src/cli.rs | 8 ++- beacon_node/src/config.rs | 6 +- beacon_node/store/src/config.rs | 6 +- beacon_node/store/src/hot_cold_store.rs | 75 +++++++++++++++++++------ lighthouse/tests/beacon_node.rs | 8 +-- 6 files changed, 85 insertions(+), 39 deletions(-) diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 45375ec01e..916ebd2359 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -266,13 +266,6 @@ where self.genesis_time = Some(genesis_state.genesis_time()); - // Prune finalized execution payloads. - if store.get_config().prune_payloads_on_init { - store - .try_prune_execution_payloads(false) - .map_err(|e| format!("Error pruning execution payloads: {e:?}"))?; - } - self.op_pool = Some( store .get_item::>(&OP_POOL_DB_KEY) @@ -863,6 +856,20 @@ where beacon_chain.store_migrator.process_reconstruction(); } + // Prune finalized execution payloads in the background. + if beacon_chain.store.get_config().prune_payloads { + let store = beacon_chain.store.clone(); + let log = log.clone(); + beacon_chain.task_executor.spawn_blocking( + move || { + if let Err(e) = store.try_prune_execution_payloads(false) { + error!(log, "Error pruning payloads in background"; "error" => ?e); + } + }, + "prune_payloads_background", + ); + } + Ok(beacon_chain) } } diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index d9d5c715b4..9b5f65622a 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -516,9 +516,11 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { .default_value("true") ) .arg( - Arg::with_name("prune-payloads-on-startup") - .long("prune-payloads-on-startup") - .help("Check for execution payloads to prune on start-up.") + Arg::with_name("prune-payloads") + .long("prune-payloads") + .help("Prune execution payloads from Lighthouse's database. This saves space but \ + imposes load on the execution client, as payloads need to be \ + reconstructed and sent to syncing peers.") .takes_value(true) .default_value("true") ) diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 368fce573d..982cb82ed4 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -358,10 +358,8 @@ pub fn get_config( .map_err(|_| "auto-compact-db takes a boolean".to_string())?; } - if let Some(prune_payloads_on_init) = - clap_utils::parse_optional(cli_args, "prune-payloads-on-startup")? - { - client_config.store.prune_payloads_on_init = prune_payloads_on_init; + if let Some(prune_payloads) = clap_utils::parse_optional(cli_args, "prune-payloads")? { + client_config.store.prune_payloads = prune_payloads; } /* diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index 9bc9ee8a45..027b8152ee 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -21,8 +21,8 @@ pub struct StoreConfig { pub compact_on_init: bool, /// Whether to compact the database during database pruning. pub compact_on_prune: bool, - /// Whether to try pruning execution payloads on initialization. - pub prune_payloads_on_init: bool, + /// Whether to prune payloads on initialization and finalization. + pub prune_payloads: bool, } /// Variant of `StoreConfig` that gets written to disk. Contains immutable configuration params. @@ -45,7 +45,7 @@ impl Default for StoreConfig { block_cache_size: DEFAULT_BLOCK_CACHE_SIZE, compact_on_init: false, compact_on_prune: true, - prune_payloads_on_init: true, + prune_payloads: true, } } } diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 80de674e9a..aff2be4cf1 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -21,6 +21,7 @@ use crate::{ get_key_for_col, DBColumn, DatabaseBlock, Error, ItemStore, KeyValueStoreOp, PartialBeaconState, StoreItem, StoreOp, }; +use itertools::process_results; use leveldb::iterator::LevelDBIterator; use lru::LruCache; use parking_lot::{Mutex, RwLock}; @@ -334,8 +335,10 @@ impl, Cold: ItemStore> HotColdDB }; // If the block is after the split point then we should have the full execution payload - // stored in the database. Otherwise, just return the blinded block. - // Hold the split lock so that it can't change. + // stored in the database. If it isn't but payload pruning is disabled, try to load it + // on-demand. + // + // Hold the split lock so that it can't change while loading the payload. let split = self.split.read_recursive(); let block = if blinded_block.message().execution_payload().is_err() @@ -348,6 +351,18 @@ impl, Cold: ItemStore> HotColdDB self.block_cache.lock().put(*block_root, full_block.clone()); DatabaseBlock::Full(full_block) + } else if !self.config.prune_payloads { + // If payload pruning is disabled there's a chance we may have the payload of + // this finalized block. Attempt to load it but don't error in case it's missing. + if let Some(payload) = self.get_execution_payload(block_root)? { + DatabaseBlock::Full( + blinded_block + .try_into_full_block(Some(payload)) + .ok_or(Error::AddPayloadLogicError)?, + ) + } else { + DatabaseBlock::Blinded(blinded_block) + } } else { DatabaseBlock::Blinded(blinded_block) }; @@ -388,7 +403,9 @@ impl, Cold: ItemStore> HotColdDB blinded_block: SignedBeaconBlock>, ) -> Result, Error> { if blinded_block.message().execution_payload().is_ok() { - let execution_payload = self.get_execution_payload(block_root)?; + let execution_payload = self + .get_execution_payload(block_root)? + .ok_or(HotColdDBError::MissingExecutionPayload(*block_root))?; blinded_block.try_into_full_block(Some(execution_payload)) } else { blinded_block.try_into_full_block(None) @@ -433,9 +450,8 @@ impl, Cold: ItemStore> HotColdDB pub fn get_execution_payload( &self, block_root: &Hash256, - ) -> Result, Error> { - self.get_item(block_root)? - .ok_or_else(|| HotColdDBError::MissingExecutionPayload(*block_root).into()) + ) -> Result>, Error> { + self.get_item(block_root) } /// Check if the execution payload for a block exists on disk. @@ -1446,19 +1462,36 @@ impl, Cold: ItemStore> HotColdDB // The finalized block may or may not have its execution payload stored, depending on // whether it was at a skipped slot. However for a fully pruned database its parent - // should *always* have been pruned. - let split_parent_block_root = split_state.get_block_root(split.slot - 1)?; - if !self.execution_payload_exists(split_parent_block_root)? && !force { + // should *always* have been pruned. In case of a long split (no parent found) we + // continue as if the payloads are pruned, as the node probably has other things to worry + // about. + let split_block_root = split_state.get_latest_block_root(split.state_root); + + let already_pruned = + process_results(split_state.rev_iter_block_roots(&self.spec), |mut iter| { + iter.find(|(_, block_root)| *block_root != split_block_root) + .map_or(Ok(true), |(_, split_parent_root)| { + self.execution_payload_exists(&split_parent_root) + .map(|exists| !exists) + }) + })??; + + if already_pruned && !force { info!(self.log, "Execution payloads are pruned"); return Ok(()); } // Iterate block roots backwards to the Bellatrix fork or the anchor slot, whichever comes // first. - let split_block_root = split_state.get_latest_block_root(split.state_root); + warn!( + self.log, + "Pruning finalized payloads"; + "info" => "you may notice degraded I/O performance while this runs" + ); let anchor_slot = self.get_anchor_info().map(|info| info.anchor_slot); let mut ops = vec![]; + let mut last_pruned_block_root = None; for res in std::iter::once(Ok((split_block_root, split.slot))) .chain(BlockRootsIterator::new(self, &split_state)) @@ -1468,7 +1501,7 @@ impl, Cold: ItemStore> HotColdDB Err(e) => { warn!( self.log, - "Stopping backtrack early"; + "Stopping payload pruning early"; "error" => ?e, ); break; @@ -1478,25 +1511,28 @@ impl, Cold: ItemStore> HotColdDB if slot < bellatrix_fork_slot { info!( self.log, - "Finished backtrack to Bellatrix fork"; + "Payload pruning reached Bellatrix boundary"; ); break; } - if self.execution_payload_exists(&block_root)? { + if Some(block_root) != last_pruned_block_root + && self.execution_payload_exists(&block_root)? + { debug!( self.log, "Pruning execution payload"; "slot" => slot, "block_root" => ?block_root, ); + last_pruned_block_root = Some(block_root); ops.push(StoreOp::DeleteExecutionPayload(block_root)); } if Some(slot) == anchor_slot { info!( self.log, - "Finished backtrack to anchor state"; + "Payload pruning reached anchor state"; "slot" => slot ); break; @@ -1583,10 +1619,13 @@ pub fn migrate_database, Cold: ItemStore>( // Delete the old summary, and the full state if we lie on an epoch boundary. hot_db_ops.push(StoreOp::DeleteState(state_root, Some(slot))); - // Delete the execution payload. Even if this execution payload is the payload of the - // new finalized block it is OK to delete it, as `try_get_full_block` looks at the split - // slot when determining whether to reconstruct payloads. - hot_db_ops.push(StoreOp::DeleteExecutionPayload(block_root)); + // Delete the execution payload if payload pruning is enabled. At a skipped slot we may + // delete the payload for the finalized block itself, but that's OK as we only guarantee + // that payloads are present for slots >= the split slot. The payload fetching code is also + // forgiving of missing payloads. + if store.config.prune_payloads { + hot_db_ops.push(StoreOp::DeleteExecutionPayload(block_root)); + } } // Warning: Critical section. We have to take care not to put any of the two databases in an diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index aed8ebf394..661bbcdb0c 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -1227,17 +1227,17 @@ fn compact_db_flag() { .with_config(|config| assert!(config.store.compact_on_init)); } #[test] -fn prune_payloads_on_startup_default() { +fn prune_payloads_default() { CommandLineTest::new() .run_with_zero_port() - .with_config(|config| assert!(config.store.prune_payloads_on_init)); + .with_config(|config| assert!(config.store.prune_payloads)); } #[test] fn prune_payloads_on_startup_false() { CommandLineTest::new() - .flag("prune-payloads-on-startup", Some("false")) + .flag("prune-payloads", Some("false")) .run_with_zero_port() - .with_config(|config| assert!(!config.store.prune_payloads_on_init)); + .with_config(|config| assert!(!config.store.prune_payloads)); } #[test] fn reconstruct_historic_states_flag() {