diff --git a/Cargo.lock b/Cargo.lock index 90196ea5b3..f23777bc4c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -855,7 +855,7 @@ dependencies = [ [[package]] name = "beacon_node" -version = "5.2.0" +version = "5.2.1" dependencies = [ "beacon_chain", "clap", @@ -1061,7 +1061,7 @@ dependencies = [ [[package]] name = "boot_node" -version = "5.2.0" +version = "5.2.1" dependencies = [ "beacon_node", "clap", @@ -4322,7 +4322,7 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "lcli" -version = "5.2.0" +version = "5.2.1" dependencies = [ "account_utils", "beacon_chain", @@ -4893,7 +4893,7 @@ dependencies = [ [[package]] name = "lighthouse" -version = "5.2.0" +version = "5.2.1" dependencies = [ "account_manager", "account_utils", @@ -6678,6 +6678,15 @@ dependencies = [ "yasna", ] +[[package]] +name = "redb" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed7508e692a49b6b2290b56540384ccae9b1fb4d77065640b165835b56ffe3bb" +dependencies = [ + "libc", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -7635,6 +7644,7 @@ version = "0.1.0" dependencies = [ "bincode", "byteorder", + "derivative", "ethereum_ssz", "ethereum_ssz_derive", "filesystem", @@ -7650,6 +7660,7 @@ dependencies = [ "parking_lot 0.12.3", "rand", "rayon", + "redb", "safe_arith", "serde", "slog", diff --git a/Makefile b/Makefile index c3c8d13238..7d144e55fb 100644 --- a/Makefile +++ b/Makefile @@ -14,16 +14,8 @@ BUILD_PATH_AARCH64 = "target/$(AARCH64_TAG)/release" PINNED_NIGHTLY ?= nightly CLIPPY_PINNED_NIGHTLY=nightly-2022-05-19 -# List of features to use when building natively. Can be overridden via the environment. -# No jemalloc on Windows -ifeq ($(OS),Windows_NT) - FEATURES?= -else - FEATURES?=jemalloc -endif - # List of features to use when cross-compiling. Can be overridden via the environment. -CROSS_FEATURES ?= gnosis,slasher-lmdb,slasher-mdbx,jemalloc +CROSS_FEATURES ?= gnosis,slasher-lmdb,slasher-mdbx,slasher-redb,jemalloc # Cargo profile for Cross builds. Default is for local builds, CI uses an override. CROSS_PROFILE ?= release diff --git a/beacon_node/Cargo.toml b/beacon_node/Cargo.toml index b95720e807..a5fd29c971 100644 --- a/beacon_node/Cargo.toml +++ b/beacon_node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "beacon_node" -version = "5.2.0" +version = "5.2.1" authors = [ "Paul Hauner ", "Age Manning NetworkBehaviour for PeerManager { ) -> Result, ConnectionDenied> { trace!(self.log, "Inbound connection"; "peer_id" => %peer_id, "multiaddr" => %remote_addr); // We already checked if the peer was banned on `handle_pending_inbound_connection`. - if let Some(BanResult::BadScore) = self.ban_status(&peer_id) { + if self.ban_status(&peer_id).is_some() { return Err(ConnectionDenied::new( "Connection to peer rejected: peer has a bad score", )); diff --git a/beacon_node/lighthouse_network/src/rpc/handler.rs b/beacon_node/lighthouse_network/src/rpc/handler.rs index b7166efc37..6f338ebc8b 100644 --- a/beacon_node/lighthouse_network/src/rpc/handler.rs +++ b/beacon_node/lighthouse_network/src/rpc/handler.rs @@ -352,37 +352,6 @@ where !matches!(self.state, HandlerState::Deactivated) } - // NOTE: This function gets polled to completion upon a connection close. - fn poll_close(&mut self, _: &mut Context<'_>) -> Poll> { - // Inform the network behaviour of any failed requests - - while let Some(substream_id) = self.outbound_substreams.keys().next().cloned() { - let outbound_info = self - .outbound_substreams - .remove(&substream_id) - .expect("The value must exist for a key"); - // If the state of the connection is closing, we do not need to report this case to - // the behaviour, as the connection has just closed non-gracefully - if matches!(outbound_info.state, OutboundSubstreamState::Closing(_)) { - continue; - } - - // Register this request as an RPC Error - return Poll::Ready(Some(HandlerEvent::Err(HandlerErr::Outbound { - error: RPCError::Disconnected, - proto: outbound_info.proto, - id: outbound_info.req_id, - }))); - } - - // Also handle any events that are awaiting to be sent to the behaviour - if !self.events_out.is_empty() { - return Poll::Ready(Some(self.events_out.remove(0))); - } - - Poll::Ready(None) - } - fn poll( &mut self, cx: &mut Context<'_>, diff --git a/beacon_node/lighthouse_network/tests/rpc_tests.rs b/beacon_node/lighthouse_network/tests/rpc_tests.rs index 8d29f5158b..527b853dc3 100644 --- a/beacon_node/lighthouse_network/tests/rpc_tests.rs +++ b/beacon_node/lighthouse_network/tests/rpc_tests.rs @@ -3,7 +3,7 @@ mod common; use common::Protocol; -use lighthouse_network::rpc::{methods::*, RPCError}; +use lighthouse_network::rpc::methods::*; use lighthouse_network::{rpc::max_rpc_size, NetworkEvent, ReportSource, Request, Response}; use slog::{debug, warn, Level}; use ssz::Encode; @@ -1012,98 +1012,6 @@ fn test_tcp_blocks_by_root_chunked_rpc_terminates_correctly() { }) } -#[test] -fn test_disconnect_triggers_rpc_error() { - // set up the logging. The level and enabled logging or not - let log_level = Level::Debug; - let enable_logging = false; - - let log = common::build_log(log_level, enable_logging); - let spec = E::default_spec(); - - let rt = Arc::new(Runtime::new().unwrap()); - // get sender/receiver - rt.block_on(async { - let (mut sender, mut receiver) = common::build_node_pair( - Arc::downgrade(&rt), - &log, - ForkName::Base, - &spec, - Protocol::Tcp, - ) - .await; - - // BlocksByRoot Request - let rpc_request = Request::BlocksByRoot(BlocksByRootRequest::new( - // Must have at least one root for the request to create a stream - vec![Hash256::from_low_u64_be(0)], - &spec, - )); - - // build the sender future - let sender_future = async { - loop { - match sender.next_event().await { - NetworkEvent::PeerConnectedOutgoing(peer_id) => { - // Send a STATUS message - debug!(log, "Sending RPC"); - sender - .send_request(peer_id, 42, rpc_request.clone()) - .unwrap(); - } - NetworkEvent::RPCFailed { error, id: 42, .. } => match error { - RPCError::Disconnected => return, - other => panic!("received unexpected error {:?}", other), - }, - other => { - warn!(log, "Ignoring other event {:?}", other); - } - } - } - }; - - // determine messages to send (PeerId, RequestId). If some, indicates we still need to send - // messages - let mut sending_peer = None; - let receiver_future = async { - loop { - // this future either drives the sending/receiving or times out allowing messages to be - // sent in the timeout - match futures::future::select( - Box::pin(receiver.next_event()), - Box::pin(tokio::time::sleep(Duration::from_secs(1))), - ) - .await - { - futures::future::Either::Left((ev, _)) => match ev { - NetworkEvent::RequestReceived { peer_id, .. } => { - sending_peer = Some(peer_id); - } - other => { - warn!(log, "Ignoring other event {:?}", other); - } - }, - futures::future::Either::Right((_, _)) => {} // The timeout hit, send messages if required - } - - // if we need to send messages send them here. This will happen after a delay - if let Some(peer_id) = sending_peer.take() { - warn!(log, "Receiver got request, disconnecting peer"); - receiver.__hard_disconnect_testing_only(peer_id); - } - } - }; - - tokio::select! { - _ = sender_future => {} - _ = receiver_future => {} - _ = sleep(Duration::from_secs(30)) => { - panic!("Future timed out"); - } - } - }) -} - /// Establishes a pair of nodes and disconnects the pair based on the selected protocol via an RPC /// Goodbye message. fn goodbye_test(log_level: Level, enable_logging: bool, protocol: Protocol) { diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index ce7d04ac0a..fe133e8e1c 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -307,7 +307,11 @@ impl BackFillSync { /// A peer has disconnected. /// If the peer has active batches, those are considered failed and re-requested. #[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"] - pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Result<(), BackFillError> { + pub fn peer_disconnected( + &mut self, + peer_id: &PeerId, + network: &mut SyncNetworkContext, + ) -> Result<(), BackFillError> { if matches!( self.state(), BackFillState::Failed | BackFillState::NotRequired @@ -315,7 +319,37 @@ impl BackFillSync { return Ok(()); } - self.active_requests.remove(peer_id); + if let Some(batch_ids) = self.active_requests.remove(peer_id) { + // fail the batches. + for id in batch_ids { + if let Some(batch) = self.batches.get_mut(&id) { + match batch.download_failed(false) { + Ok(BatchOperationOutcome::Failed { blacklist: _ }) => { + self.fail_sync(BackFillError::BatchDownloadFailed(id))?; + } + Ok(BatchOperationOutcome::Continue) => {} + Err(e) => { + self.fail_sync(BackFillError::BatchInvalidState(id, e.0))?; + } + } + // If we have run out of peers in which to retry this batch, the backfill state + // transitions to a paused state. + // We still need to reset the state for all the affected batches, so we should not + // short circuit early. + if self.retry_batch_download(network, id).is_err() { + debug!( + self.log, + "Batch could not be retried"; + "batch_id" => id, + "error" => "no synced peers" + ); + } + } else { + debug!(self.log, "Batch not found while removing peer"; + "peer" => %peer_id, "batch" => id) + } + } + } // Remove the peer from the participation list self.participating_peers.remove(peer_id); @@ -369,7 +403,7 @@ impl BackFillSync { batch_id: BatchId, peer_id: &PeerId, request_id: Id, - beacon_block: Option>, + blocks: Vec>, ) -> Result { // check if we have this batch let batch = match self.batches.get_mut(&batch_id) { @@ -392,20 +426,14 @@ impl BackFillSync { } }; - if let Some(block) = beacon_block { - // This is not a stream termination, simply add the block to the request - if let Err(e) = batch.add_block(block) { - self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0))?; - } - Ok(ProcessResult::Successful) - } else { + { // A stream termination has been sent. This batch has ended. Process a completed batch. // Remove the request from the peer's active batches self.active_requests .get_mut(peer_id) .map(|active_requests| active_requests.remove(&batch_id)); - match batch.download_completed() { + match batch.download_completed(blocks) { Ok(received) => { let awaiting_batches = self.processing_target.saturating_sub(batch_id) / BACKFILL_EPOCHS_PER_BATCH; diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index f685b7e59d..66b2d808d9 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -1,3 +1,25 @@ +//! Implements block lookup sync. +//! +//! Block lookup sync is triggered when a peer claims to have imported a block we don't know about. +//! For example, a peer attesting to a head block root that is not in our fork-choice. Lookup sync +//! is recursive in nature, as we may discover that this attested head block root has a parent that +//! is also unknown to us. +//! +//! Block lookup is implemented as an event-driven state machine. It sends events to the network and +//! beacon processor, and expects some set of events back. A discrepancy in the expected event API +//! will result in lookups getting "stuck". A lookup becomes stuck when there is no future event +//! that will trigger the lookup to make progress. There's a fallback mechanism that drops lookups +//! that live for too long, logging the line "Notify the devs a sync lookup is stuck". +//! +//! The expected event API is documented in the code paths that are making assumptions with the +//! comment prefix "Lookup sync event safety:" +//! +//! Block lookup sync attempts to not re-download or re-process data that we already have. Block +//! components are cached temporarily in multiple places before they are imported into fork-choice. +//! Therefore, block lookup sync must peek these caches correctly to decide when to skip a download +//! or consider a lookup complete. These caches are read from the `SyncNetworkContext` and its state +//! returned to this module as `LookupRequestResult` variants. + use self::parent_chain::{compute_parent_chains, NodeChain}; pub use self::single_block_lookup::DownloadResult; use self::single_block_lookup::{LookupRequestError, LookupResult, SingleBlockLookup}; @@ -194,7 +216,15 @@ impl BlockLookups { let parent_chains = self.active_parent_lookups(); for (chain_idx, parent_chain) in parent_chains.iter().enumerate() { - if parent_chain.ancestor() == child_block_root_trigger + // `block_root_to_search` will trigger a new lookup, and it will extend a parent_chain + // beyond its max length + let block_would_extend_chain = parent_chain.ancestor() == child_block_root_trigger; + // `block_root_to_search` already has a lookup, and with the block trigger it extends + // the parent_chain beyond its length. This can happen because when creating a lookup + // for a new root we don't do any parent chain length checks + let trigger_is_chain_tip = parent_chain.tip == child_block_root_trigger; + + if (block_would_extend_chain || trigger_is_chain_tip) && parent_chain.len() >= PARENT_DEPTH_TOLERANCE { debug!(self.log, "Parent lookup chain too long"; "block_root" => ?block_root_to_search); @@ -269,7 +299,7 @@ impl BlockLookups { } } - if let Err(e) = self.add_peers_to_lookup_and_ancestors(lookup_id, peers) { + if let Err(e) = self.add_peers_to_lookup_and_ancestors(lookup_id, peers, cx) { warn!(self.log, "Error adding peers to ancestor lookup"; "error" => ?e); } @@ -375,6 +405,14 @@ impl BlockLookups { "response_type" => ?response_type, ); + // Here we could check if response extends a parent chain beyond its max length. + // However we defer that check to the handling of a processing error ParentUnknown. + // + // Here we could check if there's already a lookup for parent_root of `response`. In + // that case we know that sending the response for processing will likely result in + // a `ParentUnknown` error. However, for simplicity we choose to not implement this + // optimization. + // Register the download peer here. Once we have received some data over the wire we // attribute it to this peer for scoring latter regardless of how the request was // done. @@ -410,21 +448,9 @@ impl BlockLookups { /* Error responses */ pub fn peer_disconnected(&mut self, peer_id: &PeerId) { - self.single_block_lookups.retain(|_, lookup| { + for (_, lookup) in self.single_block_lookups.iter_mut() { lookup.remove_peer(peer_id); - - // Note: this condition should be removed in the future. It's not strictly necessary to drop a - // lookup if there are no peers left. Lookup should only be dropped if it can not make progress - if lookup.has_no_peers() { - debug!(self.log, - "Dropping single lookup after peer disconnection"; - "block_root" => ?lookup.block_root() - ); - false - } else { - true - } - }); + } } /* Processing responses */ @@ -787,12 +813,12 @@ impl BlockLookups { }; if stuck_lookup.id == ancestor_stuck_lookup.id { - warn!(self.log, "Notify the devs, a sync lookup is stuck"; + warn!(self.log, "Notify the devs a sync lookup is stuck"; "block_root" => ?stuck_lookup.block_root(), "lookup" => ?stuck_lookup, ); } else { - warn!(self.log, "Notify the devs, a sync lookup is stuck"; + warn!(self.log, "Notify the devs a sync lookup is stuck"; "block_root" => ?stuck_lookup.block_root(), "lookup" => ?stuck_lookup, "ancestor_block_root" => ?ancestor_stuck_lookup.block_root(), @@ -834,14 +860,17 @@ impl BlockLookups { &mut self, lookup_id: SingleLookupId, peers: &[PeerId], + cx: &mut SyncNetworkContext, ) -> Result<(), String> { let lookup = self .single_block_lookups .get_mut(&lookup_id) .ok_or(format!("Unknown lookup for id {lookup_id}"))?; + let mut added_some_peer = false; for peer in peers { if lookup.add_peer(*peer) { + added_some_peer = true; debug!(self.log, "Adding peer to existing single block lookup"; "block_root" => ?lookup.block_root(), "peer" => ?peer @@ -849,22 +878,25 @@ impl BlockLookups { } } - // We may choose to attempt to continue a lookup here. It is possible that a lookup had zero - // peers and after adding this set of peers it can make progress again. Note that this - // recursive function iterates from child to parent, so continuing the child first is weird. - // However, we choose to not attempt to continue the lookup for simplicity. It's not - // strictly required and just and optimization for a rare corner case. - if let Some(parent_root) = lookup.awaiting_parent() { if let Some((&child_id, _)) = self .single_block_lookups .iter() .find(|(_, l)| l.block_root() == parent_root) { - self.add_peers_to_lookup_and_ancestors(child_id, peers) + self.add_peers_to_lookup_and_ancestors(child_id, peers, cx) } else { Err(format!("Lookup references unknown parent {parent_root:?}")) } + } else if added_some_peer { + // If this lookup is not awaiting a parent and we added at least one peer, attempt to + // make progress. It is possible that a lookup is created with zero peers, attempted to + // make progress, and then receives peers. After that time the lookup will never be + // pruned with `drop_lookups_without_peers` because it has peers. This is rare corner + // case, but it can result in stuck lookups. + let result = lookup.continue_requests(cx); + self.on_lookup_result(lookup_id, result, "add_peers", cx); + Ok(()) } else { Ok(()) } diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 13efd36ab7..e17991286a 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -197,21 +197,36 @@ impl SingleBlockLookup { } let Some(peer_id) = self.use_rand_available_peer() else { - // Allow lookup to not have any peers. In that case do nothing. If the lookup does - // not have peers for some time, it will be dropped. + // Allow lookup to not have any peers and do nothing. This is an optimization to not + // lose progress of lookups created from a block with unknown parent before we receive + // attestations for said block. + // Lookup sync event safety: If a lookup requires peers to make progress, and does + // not receive any new peers for some time it will be dropped. If it receives a new + // peer it must attempt to make progress. + R::request_state_mut(self) + .get_state_mut() + .update_awaiting_download_status("no peers"); return Ok(()); }; let request = R::request_state_mut(self); match request.make_request(id, peer_id, downloaded_block_expected_blobs, cx)? { LookupRequestResult::RequestSent(req_id) => { + // Lookup sync event safety: If make_request returns `RequestSent`, we are + // guaranteed that `BlockLookups::on_download_response` will be called exactly + // with this `req_id`. request.get_state_mut().on_download_start(req_id)? } LookupRequestResult::NoRequestNeeded => { + // Lookup sync event safety: Advances this request to the terminal `Processed` + // state. If all requests reach this state, the request is marked as completed + // in `Self::continue_requests`. request.get_state_mut().on_completed_request()? } // Sync will receive a future event to make progress on the request, do nothing now LookupRequestResult::Pending(reason) => { + // Lookup sync event safety: Refer to the code paths constructing + // `LookupRequestResult::Pending` request .get_state_mut() .update_awaiting_download_status(reason); @@ -222,16 +237,28 @@ impl SingleBlockLookup { // Otherwise, attempt to progress awaiting processing // If this request is awaiting a parent lookup to be processed, do not send for processing. // The request will be rejected with unknown parent error. + // + // TODO: The condition `block_is_processed || Block` can be dropped after checking for + // unknown parent root when import RPC blobs } else if !awaiting_parent && (block_is_processed || matches!(R::response_type(), ResponseType::Block)) { // maybe_start_processing returns Some if state == AwaitingProcess. This pattern is // useful to conditionally access the result data. if let Some(result) = request.get_state_mut().maybe_start_processing() { + // Lookup sync event safety: If `send_for_processing` returns Ok() we are guaranteed + // that `BlockLookups::on_processing_result` will be called exactly once with this + // lookup_id return R::send_for_processing(id, result, cx); } + // Lookup sync event safety: If the request is not in `AwaitingDownload` or + // `AwaitingProcessing` state it is guaranteed to receive some event to make progress. } + // Lookup sync event safety: If a lookup is awaiting a parent we are guaranteed to either: + // (1) attempt to make progress with `BlockLookups::continue_child_lookups` if the parent + // lookup completes, or (2) get dropped if the parent fails and is dropped. + Ok(()) } @@ -246,10 +273,9 @@ impl SingleBlockLookup { self.peers.insert(peer_id) } - /// Remove peer from available peers. Return true if there are no more available peers and all - /// requests are not expecting any future event (AwaitingDownload). - pub fn remove_peer(&mut self, peer_id: &PeerId) -> bool { - self.peers.remove(peer_id) + /// Remove peer from available peers. + pub fn remove_peer(&mut self, peer_id: &PeerId) { + self.peers.remove(peer_id); } /// Returns true if this lookup has zero peers diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index a607151bde..470e10c55c 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -278,8 +278,11 @@ impl TestRig { } } - fn failed_chains_contains(&mut self, chain_hash: &Hash256) -> bool { - self.sync_manager.get_failed_chains().contains(chain_hash) + fn assert_failed_chain(&mut self, chain_hash: Hash256) { + let failed_chains = self.sync_manager.get_failed_chains(); + if !failed_chains.contains(&chain_hash) { + panic!("expected failed chains to contain {chain_hash:?}: {failed_chains:?}"); + } } fn find_single_lookup_for(&self, block_root: Hash256) -> Id { @@ -290,6 +293,7 @@ impl TestRig { .0 } + #[track_caller] fn expect_no_active_single_lookups(&self) { assert!( self.active_single_lookups().is_empty(), @@ -298,6 +302,7 @@ impl TestRig { ); } + #[track_caller] fn expect_no_active_lookups(&self) { self.expect_no_active_single_lookups(); } @@ -539,10 +544,6 @@ impl TestRig { }) } - fn peer_disconnected(&mut self, disconnected_peer_id: PeerId) { - self.send_sync_message(SyncMessage::Disconnect(disconnected_peer_id)); - } - /// Return RPCErrors for all active requests of peer fn rpc_error_all_active_requests(&mut self, disconnected_peer_id: PeerId) { self.drain_network_rx(); @@ -562,6 +563,10 @@ impl TestRig { } } + fn peer_disconnected(&mut self, peer_id: PeerId) { + self.send_sync_message(SyncMessage::Disconnect(peer_id)); + } + fn drain_network_rx(&mut self) { while let Ok(event) = self.network_rx.try_recv() { self.network_rx_queue.push(event); @@ -1026,6 +1031,28 @@ fn test_single_block_lookup_failure() { rig.expect_empty_network(); } +#[test] +fn test_single_block_lookup_peer_disconnected_then_rpc_error() { + let mut rig = TestRig::test_setup(); + + let block_hash = Hash256::random(); + let peer_id = rig.new_connected_peer(); + + // Trigger the request. + rig.trigger_unknown_block_from_attestation(block_hash, peer_id); + let id = rig.expect_block_lookup_request(block_hash); + + // The peer disconnect event reaches sync before the rpc error. + rig.peer_disconnected(peer_id); + // The lookup is not removed as it can still potentially make progress. + rig.assert_single_lookups_count(1); + // The request fails. + rig.single_lookup_failed(id, peer_id, RPCError::Disconnected); + rig.expect_block_lookup_request(block_hash); + // The request should be removed from the network context on disconnection. + rig.expect_empty_network(); +} + #[test] fn test_single_block_lookup_becomes_parent_request() { let mut rig = TestRig::test_setup(); @@ -1201,7 +1228,7 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { // Trigger the request rig.trigger_unknown_parent_block(peer_id, block.into()); for i in 1..=PARENT_FAIL_TOLERANCE { - assert!(!rig.failed_chains_contains(&block_root)); + rig.assert_not_failed_chain(block_root); let id = rig.expect_block_parent_request(parent_root); if i % 2 != 0 { // The request fails. It should be tried again. @@ -1214,8 +1241,8 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { } } - assert!(!rig.failed_chains_contains(&block_root)); - assert!(!rig.failed_chains_contains(&parent.canonical_root())); + rig.assert_not_failed_chain(block_root); + rig.assert_not_failed_chain(parent.canonical_root()); rig.expect_no_active_lookups_empty_network(); } @@ -1253,7 +1280,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { } #[test] -fn test_parent_lookup_too_deep() { +fn test_parent_lookup_too_deep_grow_ancestor() { let mut rig = TestRig::test_setup(); let mut blocks = rig.rand_blockchain(PARENT_DEPTH_TOLERANCE); @@ -1278,7 +1305,31 @@ fn test_parent_lookup_too_deep() { } rig.expect_penalty(peer_id, "chain_too_long"); - assert!(rig.failed_chains_contains(&chain_hash)); + rig.assert_failed_chain(chain_hash); +} + +#[test] +fn test_parent_lookup_too_deep_grow_tip() { + let mut rig = TestRig::test_setup(); + let blocks = rig.rand_blockchain(PARENT_DEPTH_TOLERANCE - 1); + let peer_id = rig.new_connected_peer(); + let tip = blocks.last().unwrap().clone(); + + for block in blocks.into_iter() { + let block_root = block.canonical_root(); + rig.trigger_unknown_block_from_attestation(block_root, peer_id); + let id = rig.expect_block_parent_request(block_root); + rig.single_lookup_block_response(id, peer_id, Some(block.clone())); + rig.single_lookup_block_response(id, peer_id, None); + rig.expect_block_process(ResponseType::Block); + rig.single_block_component_processed( + id.lookup_id, + BlockError::ParentUnknown(RpcBlock::new_without_blobs(None, block)).into(), + ); + } + + rig.expect_penalty(peer_id, "chain_too_long"); + rig.assert_failed_chain(tip.canonical_root()); } #[test] @@ -1289,19 +1340,9 @@ fn test_lookup_peer_disconnected_no_peers_left_while_request() { rig.trigger_unknown_parent_block(peer_id, trigger_block.into()); rig.peer_disconnected(peer_id); rig.rpc_error_all_active_requests(peer_id); - rig.expect_no_active_lookups(); -} - -#[test] -fn test_lookup_peer_disconnected_no_peers_left_not_while_request() { - let mut rig = TestRig::test_setup(); - let peer_id = rig.new_connected_peer(); - let trigger_block = rig.rand_block(); - rig.trigger_unknown_parent_block(peer_id, trigger_block.into()); - rig.peer_disconnected(peer_id); - // Note: this test case may be removed in the future. It's not strictly necessary to drop a - // lookup if there are no peers left. Lookup should only be dropped if it can not make progress - rig.expect_no_active_lookups(); + // Erroring all rpc requests and disconnecting the peer shouldn't remove the requests + // from the lookups map as they can still progress. + rig.assert_single_lookups_count(2); } #[test] diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs index d159733cbc..f31f2921ea 100644 --- a/beacon_node/network/src/sync/block_sidecar_coupling.rs +++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs @@ -1,4 +1,5 @@ use beacon_chain::block_verification_types::RpcBlock; +use lighthouse_network::PeerId; use ssz_types::VariableList; use std::{collections::VecDeque, sync::Arc}; use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; @@ -17,16 +18,19 @@ pub struct BlocksAndBlobsRequestInfo { is_sidecars_stream_terminated: bool, /// Used to determine if this accumulator should wait for a sidecars stream termination request_type: ByRangeRequestType, + /// The peer the request was made to. + pub(crate) peer_id: PeerId, } impl BlocksAndBlobsRequestInfo { - pub fn new(request_type: ByRangeRequestType) -> Self { + pub fn new(request_type: ByRangeRequestType, peer_id: PeerId) -> Self { Self { accumulated_blocks: <_>::default(), accumulated_sidecars: <_>::default(), is_blocks_stream_terminated: <_>::default(), is_sidecars_stream_terminated: <_>::default(), request_type, + peer_id, } } @@ -109,12 +113,14 @@ mod tests { use super::BlocksAndBlobsRequestInfo; use crate::sync::range_sync::ByRangeRequestType; use beacon_chain::test_utils::{generate_rand_block_and_blobs, NumBlobs}; + use lighthouse_network::PeerId; use rand::SeedableRng; use types::{test_utils::XorShiftRng, ForkName, MinimalEthSpec as E}; #[test] fn no_blobs_into_responses() { - let mut info = BlocksAndBlobsRequestInfo::::new(ByRangeRequestType::Blocks); + let peer_id = PeerId::random(); + let mut info = BlocksAndBlobsRequestInfo::::new(ByRangeRequestType::Blocks, peer_id); let mut rng = XorShiftRng::from_seed([42; 16]); let blocks = (0..4) .map(|_| generate_rand_block_and_blobs::(ForkName::Base, NumBlobs::None, &mut rng).0) @@ -133,7 +139,9 @@ mod tests { #[test] fn empty_blobs_into_responses() { - let mut info = BlocksAndBlobsRequestInfo::::new(ByRangeRequestType::BlocksAndBlobs); + let peer_id = PeerId::random(); + let mut info = + BlocksAndBlobsRequestInfo::::new(ByRangeRequestType::BlocksAndBlobs, peer_id); let mut rng = XorShiftRng::from_seed([42; 16]); let blocks = (0..4) .map(|_| { diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 4c1a1e6b67..9540709294 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -372,16 +372,39 @@ impl SyncManager { Err(_) => self.update_sync_state(), }, } + } else { + debug!( + self.log, + "RPC error for range request has no associated entry in network context, ungraceful disconnect"; + "peer_id" => %peer_id, + "request_id" => %id, + "error" => ?error, + ); } } } } + /// Handles a peer disconnect. + /// + /// It is important that a peer disconnect retries all the batches/lookups as + /// there is no way to guarantee that libp2p always emits a error along with + /// the disconnect. fn peer_disconnect(&mut self, peer_id: &PeerId) { + // Inject a Disconnected error on all requests associated with the disconnected peer + // to retry all batches/lookups + for request_id in self.network.peer_disconnected(peer_id) { + self.inject_error(*peer_id, request_id, RPCError::Disconnected); + } + + // Remove peer from all data structures self.range_sync.peer_disconnect(&mut self.network, peer_id); + let _ = self + .backfill_sync + .peer_disconnected(peer_id, &mut self.network); self.block_lookups.peer_disconnected(peer_id); + // Regardless of the outcome, we update the sync status. - let _ = self.backfill_sync.peer_disconnected(peer_id); self.update_sync_state(); } @@ -908,39 +931,32 @@ impl SyncManager { { match resp.responses { Ok(blocks) => { - for block in blocks - .into_iter() - .map(Some) - // chain the stream terminator - .chain(vec![None]) - { - match resp.sender_id { - RangeRequestId::RangeSync { chain_id, batch_id } => { - self.range_sync.blocks_by_range_response( - &mut self.network, - peer_id, - chain_id, - batch_id, - id, - block, - ); - self.update_sync_state(); - } - RangeRequestId::BackfillSync { batch_id } => { - match self.backfill_sync.on_block_response( - &mut self.network, - batch_id, - &peer_id, - id, - block, - ) { - Ok(ProcessResult::SyncCompleted) => self.update_sync_state(), - Ok(ProcessResult::Successful) => {} - Err(_error) => { - // The backfill sync has failed, errors are reported - // within. - self.update_sync_state(); - } + match resp.sender_id { + RangeRequestId::RangeSync { chain_id, batch_id } => { + self.range_sync.blocks_by_range_response( + &mut self.network, + peer_id, + chain_id, + batch_id, + id, + blocks, + ); + self.update_sync_state(); + } + RangeRequestId::BackfillSync { batch_id } => { + match self.backfill_sync.on_block_response( + &mut self.network, + batch_id, + &peer_id, + id, + blocks, + ) { + Ok(ProcessResult::SyncCompleted) => self.update_sync_state(), + Ok(ProcessResult::Successful) => {} + Err(_error) => { + // The backfill sync has failed, errors are reported + // within. + self.update_sync_state(); } } } @@ -951,7 +967,7 @@ impl SyncManager { self.network.insert_range_blocks_and_blobs_request( id, resp.sender_id, - BlocksAndBlobsRequestInfo::new(resp.request_type), + BlocksAndBlobsRequestInfo::new(resp.request_type, peer_id), ); // inform range that the request needs to be treated as failed // With time we will want to downgrade this log diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index f3f82ee011..6f89b954b3 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -177,6 +177,46 @@ impl SyncNetworkContext { } } + /// Returns the ids of all the requests made to the given peer_id. + pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Vec { + let failed_range_ids = + self.range_blocks_and_blobs_requests + .iter() + .filter_map(|(id, request)| { + if request.1.peer_id == *peer_id { + Some(SyncRequestId::RangeBlockAndBlobs { id: *id }) + } else { + None + } + }); + + let failed_block_ids = self + .blocks_by_root_requests + .iter() + .filter_map(|(id, request)| { + if request.peer_id == *peer_id { + Some(SyncRequestId::SingleBlock { id: *id }) + } else { + None + } + }); + let failed_blob_ids = self + .blobs_by_root_requests + .iter() + .filter_map(|(id, request)| { + if request.peer_id == *peer_id { + Some(SyncRequestId::SingleBlob { id: *id }) + } else { + None + } + }); + + failed_range_ids + .chain(failed_block_ids) + .chain(failed_blob_ids) + .collect() + } + pub fn network_globals(&self) -> &NetworkGlobals { &self.network_beacon_processor.network_globals } @@ -272,8 +312,13 @@ impl SyncNetworkContext { sender_id: RangeRequestId, ) -> Result { let id = self.blocks_by_range_request(peer_id, batch_type, request)?; - self.range_blocks_and_blobs_requests - .insert(id, (sender_id, BlocksAndBlobsRequestInfo::new(batch_type))); + self.range_blocks_and_blobs_requests.insert( + id, + ( + sender_id, + BlocksAndBlobsRequestInfo::new(batch_type, peer_id), + ), + ); Ok(id) } @@ -343,7 +388,10 @@ impl SyncNetworkContext { // Block is known are currently processing, expect a future event with the result of // processing. BlockProcessStatus::NotValidated { .. } => { - return Ok(LookupRequestResult::Pending("block in processing cache")) + // Lookup sync event safety: If the block is currently in the processing cache, we + // are guaranteed to receive a `SyncMessage::GossipBlockProcessResult` that will + // make progress on this lookup + return Ok(LookupRequestResult::Pending("block in processing cache")); } // Block is fully validated. If it's not yet imported it's waiting for missing block // components. Consider this request completed and do nothing. @@ -366,6 +414,12 @@ impl SyncNetworkContext { let request = BlocksByRootSingleRequest(block_root); + // Lookup sync event safety: If network_send.send() returns Ok(_) we are guaranteed that + // eventually at least one this 3 events will be received: + // - StreamTermination(request_id): handled by `Self::on_single_block_response` + // - RPCError(request_id): handled by `Self::on_single_block_response` + // - Disconnect(peer_id) handled by `Self::peer_disconnected``which converts it to a + // ` RPCError(request_id)`event handled by the above method self.network_send .send(NetworkMessage::SendRequest { peer_id, @@ -375,7 +429,7 @@ impl SyncNetworkContext { .map_err(|_| RpcRequestSendError::NetworkSendError)?; self.blocks_by_root_requests - .insert(id, ActiveBlocksByRootRequest::new(request)); + .insert(id, ActiveBlocksByRootRequest::new(request, peer_id)); Ok(LookupRequestResult::RequestSent(req_id)) } @@ -408,6 +462,13 @@ impl SyncNetworkContext { // latter handle the case where if the peer sent no blobs, penalize. // - if `downloaded_block_expected_blobs` is Some = block is downloading or processing. // - if `num_expected_blobs` returns Some = block is processed. + // + // Lookup sync event safety: Reaching this code means that a block is not in any pre-import + // cache nor in the request state of this lookup. Therefore, the block must either: (1) not + // be downloaded yet or (2) the block is already imported into the fork-choice. + // In case (1) the lookup must either successfully download the block or get dropped. + // In case (2) the block will be downloaded, processed, reach `BlockIsAlreadyKnown` and + // get dropped as completed. return Ok(LookupRequestResult::Pending("waiting for block download")); }; @@ -444,6 +505,7 @@ impl SyncNetworkContext { indices, }; + // Lookup sync event safety: Refer to `Self::block_lookup_request` `network_send.send` call self.network_send .send(NetworkMessage::SendRequest { peer_id, @@ -453,7 +515,7 @@ impl SyncNetworkContext { .map_err(|_| RpcRequestSendError::NetworkSendError)?; self.blobs_by_root_requests - .insert(id, ActiveBlobsByRootRequest::new(request)); + .insert(id, ActiveBlobsByRootRequest::new(request, peer_id)); Ok(LookupRequestResult::RequestSent(req_id)) } @@ -660,6 +722,8 @@ impl SyncNetworkContext { .ok_or(SendErrorProcessor::ProcessorNotAvailable)?; debug!(self.log, "Sending block for processing"; "block" => ?block_root, "id" => id); + // Lookup sync event safety: If `beacon_processor.send_rpc_beacon_block` returns Ok() sync + // must receive a single `SyncMessage::BlockComponentProcessed` with this process type beacon_processor .send_rpc_beacon_block( block_root, @@ -689,6 +753,8 @@ impl SyncNetworkContext { .ok_or(SendErrorProcessor::ProcessorNotAvailable)?; debug!(self.log, "Sending blobs for processing"; "block" => ?block_root, "id" => id); + // Lookup sync event safety: If `beacon_processor.send_rpc_blobs` returns Ok() sync + // must receive a single `SyncMessage::BlockComponentProcessed` event with this process type beacon_processor .send_rpc_blobs( block_root, diff --git a/beacon_node/network/src/sync/network_context/requests.rs b/beacon_node/network/src/sync/network_context/requests.rs index 6e4683701b..8387e9b0e1 100644 --- a/beacon_node/network/src/sync/network_context/requests.rs +++ b/beacon_node/network/src/sync/network_context/requests.rs @@ -1,5 +1,8 @@ use beacon_chain::get_block_root; -use lighthouse_network::rpc::{methods::BlobsByRootRequest, BlocksByRootRequest}; +use lighthouse_network::{ + rpc::{methods::BlobsByRootRequest, BlocksByRootRequest}, + PeerId, +}; use std::sync::Arc; use strum::IntoStaticStr; use types::{ @@ -20,13 +23,15 @@ pub enum LookupVerifyError { pub struct ActiveBlocksByRootRequest { request: BlocksByRootSingleRequest, resolved: bool, + pub(crate) peer_id: PeerId, } impl ActiveBlocksByRootRequest { - pub fn new(request: BlocksByRootSingleRequest) -> Self { + pub fn new(request: BlocksByRootSingleRequest, peer_id: PeerId) -> Self { Self { request, resolved: false, + peer_id, } } @@ -94,14 +99,16 @@ pub struct ActiveBlobsByRootRequest { request: BlobsByRootSingleBlockRequest, blobs: Vec>>, resolved: bool, + pub(crate) peer_id: PeerId, } impl ActiveBlobsByRootRequest { - pub fn new(request: BlobsByRootSingleBlockRequest) -> Self { + pub fn new(request: BlobsByRootSingleBlockRequest, peer_id: PeerId) -> Self { Self { request, blobs: vec![], resolved: false, + peer_id, } } diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs index 75cb49d176..baba8c9a62 100644 --- a/beacon_node/network/src/sync/range_sync/batch.rs +++ b/beacon_node/network/src/sync/range_sync/batch.rs @@ -116,7 +116,7 @@ pub enum BatchState { /// The batch has failed either downloading or processing, but can be requested again. AwaitingDownload, /// The batch is being downloaded. - Downloading(PeerId, Vec>, Id), + Downloading(PeerId, Id), /// The batch has been completely downloaded and is ready for processing. AwaitingProcessing(PeerId, Vec>), /// The batch is being processed. @@ -199,7 +199,7 @@ impl BatchInfo { /// Verifies if an incoming block belongs to this batch. pub fn is_expecting_block(&self, peer_id: &PeerId, request_id: &Id) -> bool { - if let BatchState::Downloading(expected_peer, _, expected_id) = &self.state { + if let BatchState::Downloading(expected_peer, expected_id) = &self.state { return peer_id == expected_peer && expected_id == request_id; } false @@ -209,7 +209,7 @@ impl BatchInfo { pub fn current_peer(&self) -> Option<&PeerId> { match &self.state { BatchState::AwaitingDownload | BatchState::Failed => None, - BatchState::Downloading(peer_id, _, _) + BatchState::Downloading(peer_id, _) | BatchState::AwaitingProcessing(peer_id, _) | BatchState::Processing(Attempt { peer_id, .. }) | BatchState::AwaitingValidation(Attempt { peer_id, .. }) => Some(peer_id), @@ -250,36 +250,18 @@ impl BatchInfo { &self.failed_processing_attempts } - /// Adds a block to a downloading batch. - pub fn add_block(&mut self, block: RpcBlock) -> Result<(), WrongState> { - match self.state.poison() { - BatchState::Downloading(peer, mut blocks, req_id) => { - blocks.push(block); - self.state = BatchState::Downloading(peer, blocks, req_id); - Ok(()) - } - BatchState::Poisoned => unreachable!("Poisoned batch"), - other => { - self.state = other; - Err(WrongState(format!( - "Add block for batch in wrong state {:?}", - self.state - ))) - } - } - } - /// Marks the batch as ready to be processed if the blocks are in the range. The number of /// received blocks is returned, or the wrong batch end on failure #[must_use = "Batch may have failed"] pub fn download_completed( &mut self, + blocks: Vec>, ) -> Result< usize, /* Received blocks */ Result<(Slot, Slot, BatchOperationOutcome), WrongState>, > { match self.state.poison() { - BatchState::Downloading(peer, blocks, _request_id) => { + BatchState::Downloading(peer, _request_id) => { // verify that blocks are in range if let Some(last_slot) = blocks.last().map(|b| b.slot()) { // the batch is non-empty @@ -336,7 +318,7 @@ impl BatchInfo { mark_failed: bool, ) -> Result { match self.state.poison() { - BatchState::Downloading(peer, _, _request_id) => { + BatchState::Downloading(peer, _request_id) => { // register the attempt and check if the batch can be tried again if mark_failed { self.failed_download_attempts.push(peer); @@ -369,7 +351,7 @@ impl BatchInfo { ) -> Result<(), WrongState> { match self.state.poison() { BatchState::AwaitingDownload => { - self.state = BatchState::Downloading(peer, Vec::new(), request_id); + self.state = BatchState::Downloading(peer, request_id); Ok(()) } BatchState::Poisoned => unreachable!("Poisoned batch"), @@ -536,13 +518,9 @@ impl std::fmt::Debug for BatchState { BatchState::AwaitingProcessing(ref peer, ref blocks) => { write!(f, "AwaitingProcessing({}, {} blocks)", peer, blocks.len()) } - BatchState::Downloading(peer, blocks, request_id) => write!( - f, - "Downloading({}, {} blocks, {})", - peer, - blocks.len(), - request_id - ), + BatchState::Downloading(peer, request_id) => { + write!(f, "Downloading({}, {})", peer, request_id) + } BatchState::Poisoned => f.write_str("Poisoned"), } } diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index 63cafa9aca..ea873bdca0 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -174,8 +174,30 @@ impl SyncingChain { /// Removes a peer from the chain. /// If the peer has active batches, those are considered failed and re-requested. - pub fn remove_peer(&mut self, peer_id: &PeerId) -> ProcessingResult { - self.peers.remove(peer_id); + pub fn remove_peer( + &mut self, + peer_id: &PeerId, + network: &mut SyncNetworkContext, + ) -> ProcessingResult { + if let Some(batch_ids) = self.peers.remove(peer_id) { + // fail the batches. + for id in batch_ids { + if let Some(batch) = self.batches.get_mut(&id) { + if let BatchOperationOutcome::Failed { blacklist } = + batch.download_failed(true)? + { + return Err(RemoveChain::ChainFailed { + blacklist, + failing_batch: id, + }); + } + self.retry_batch_download(network, id)?; + } else { + debug!(self.log, "Batch not found while removing peer"; + "peer" => %peer_id, "batch" => id) + } + } + } if self.peers.is_empty() { Err(RemoveChain::EmptyPeerPool) @@ -200,7 +222,7 @@ impl SyncingChain { batch_id: BatchId, peer_id: &PeerId, request_id: Id, - beacon_block: Option>, + blocks: Vec>, ) -> ProcessingResult { // check if we have this batch let batch = match self.batches.get_mut(&batch_id) { @@ -221,18 +243,14 @@ impl SyncingChain { } }; - if let Some(block) = beacon_block { - // This is not a stream termination, simply add the block to the request - batch.add_block(block)?; - Ok(KeepChain) - } else { + { // A stream termination has been sent. This batch has ended. Process a completed batch. // Remove the request from the peer's active batches self.peers .get_mut(peer_id) .map(|active_requests| active_requests.remove(&batch_id)); - match batch.download_completed() { + match batch.download_completed(blocks) { Ok(received) => { let awaiting_batches = batch_id .saturating_sub(self.optimistic_start.unwrap_or(self.processing_target)) diff --git a/beacon_node/network/src/sync/range_sync/range.rs b/beacon_node/network/src/sync/range_sync/range.rs index fe48db35b4..45d2918331 100644 --- a/beacon_node/network/src/sync/range_sync/range.rs +++ b/beacon_node/network/src/sync/range_sync/range.rs @@ -210,11 +210,11 @@ where chain_id: ChainId, batch_id: BatchId, request_id: Id, - beacon_block: Option>, + blocks: Vec>, ) { // check if this chunk removes the chain match self.chains.call_by_id(chain_id, |chain| { - chain.on_block_response(network, batch_id, &peer_id, request_id, beacon_block) + chain.on_block_response(network, batch_id, &peer_id, request_id, blocks) }) { Ok((removed_chain, sync_type)) => { if let Some((removed_chain, remove_reason)) = removed_chain { @@ -278,8 +278,9 @@ where /// for this peer. If so we mark the batch as failed. The batch may then hit it's maximum /// retries. In this case, we need to remove the chain. fn remove_peer(&mut self, network: &mut SyncNetworkContext, peer_id: &PeerId) { - for (removed_chain, sync_type, remove_reason) in - self.chains.call_all(|chain| chain.remove_peer(peer_id)) + for (removed_chain, sync_type, remove_reason) in self + .chains + .call_all(|chain| chain.remove_peer(peer_id, network)) { self.on_chain_removed( removed_chain, @@ -795,7 +796,7 @@ mod tests { rig.cx.update_execution_engine_state(EngineState::Offline); // send the response to the request - range.blocks_by_range_response(&mut rig.cx, peer1, chain1, batch1, id1, None); + range.blocks_by_range_response(&mut rig.cx, peer1, chain1, batch1, id1, vec![]); // the beacon processor shouldn't have received any work rig.expect_empty_processor(); @@ -809,7 +810,7 @@ mod tests { rig.complete_range_block_and_blobs_response(block_req, blob_req_opt); // send the response to the request - range.blocks_by_range_response(&mut rig.cx, peer2, chain2, batch2, id2, None); + range.blocks_by_range_response(&mut rig.cx, peer2, chain2, batch2, id2, vec![]); // the beacon processor shouldn't have received any work rig.expect_empty_processor(); diff --git a/boot_node/Cargo.toml b/boot_node/Cargo.toml index ee7cb96d21..4cde8ea270 100644 --- a/boot_node/Cargo.toml +++ b/boot_node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "boot_node" -version = "5.2.0" +version = "5.2.1" authors = ["Sigma Prime "] edition = { workspace = true } diff --git a/common/lighthouse_version/src/lib.rs b/common/lighthouse_version/src/lib.rs index 6fb06cc543..d32d799468 100644 --- a/common/lighthouse_version/src/lib.rs +++ b/common/lighthouse_version/src/lib.rs @@ -17,8 +17,8 @@ pub const VERSION: &str = git_version!( // NOTE: using --match instead of --exclude for compatibility with old Git "--match=thiswillnevermatchlol" ], - prefix = "Lighthouse/v5.2.0-", - fallback = "Lighthouse/v5.2.0" + prefix = "Lighthouse/v5.2.1-", + fallback = "Lighthouse/v5.2.1" ); /// Returns the first eight characters of the latest commit hash for this build. diff --git a/consensus/state_processing/src/per_block_processing/block_signature_verifier.rs b/consensus/state_processing/src/per_block_processing/block_signature_verifier.rs index a0c044219d..28ca8935e4 100644 --- a/consensus/state_processing/src/per_block_processing/block_signature_verifier.rs +++ b/consensus/state_processing/src/per_block_processing/block_signature_verifier.rs @@ -4,7 +4,6 @@ use super::signature_sets::{Error as SignatureSetError, *}; use crate::per_block_processing::errors::{AttestationInvalid, BlockOperationError}; use crate::{ConsensusContext, ContextError}; use bls::{verify_signature_sets, PublicKey, PublicKeyBytes, SignatureSet}; -use rayon::prelude::*; use std::borrow::Cow; use types::{ AbstractExecPayload, BeaconState, BeaconStateError, ChainSpec, EthSpec, Hash256, @@ -411,15 +410,10 @@ impl<'a> ParallelSignatureSets<'a> { /// It is not possible to know exactly _which_ signature is invalid here, just that /// _at least one_ was invalid. /// - /// Uses `rayon` to do a map-reduce of Vitalik's method across multiple cores. + /// Blst library spreads the signature verification work across multiple available cores, so + /// this function is already parallelized. #[must_use] pub fn verify(self) -> bool { - let num_sets = self.sets.len(); - let num_chunks = std::cmp::max(1, num_sets / rayon::current_num_threads()); - self.sets - .into_par_iter() - .chunks(num_chunks) - .map(|chunk| verify_signature_sets(chunk.iter())) - .reduce(|| true, |current, this| current && this) + verify_signature_sets(self.sets.iter()) } } diff --git a/consensus/state_processing/src/per_block_processing/process_operations.rs b/consensus/state_processing/src/per_block_processing/process_operations.rs index a2e576cf74..17607f7f33 100644 --- a/consensus/state_processing/src/per_block_processing/process_operations.rs +++ b/consensus/state_processing/src/per_block_processing/process_operations.rs @@ -39,7 +39,7 @@ pub fn process_operations>( process_bls_to_execution_changes(state, bls_to_execution_changes, verify_signatures, spec)?; } - if state.fork_name_unchecked() >= ForkName::Electra { + if state.fork_name_unchecked().electra_enabled() { let requests = block_body.execution_payload()?.withdrawal_requests()?; if let Some(requests) = requests { process_execution_layer_withdrawal_requests(state, &requests, spec)?; diff --git a/consensus/state_processing/src/per_epoch_processing/single_pass.rs b/consensus/state_processing/src/per_epoch_processing/single_pass.rs index f5d3f974a6..a1fe9efbef 100644 --- a/consensus/state_processing/src/per_epoch_processing/single_pass.rs +++ b/consensus/state_processing/src/per_epoch_processing/single_pass.rs @@ -161,7 +161,7 @@ pub fn process_epoch_single_pass( let mut next_epoch_cache = PreEpochCache::new_for_next_epoch(state)?; let pending_balance_deposits_ctxt = - if fork_name >= ForkName::Electra && conf.pending_balance_deposits { + if fork_name.electra_enabled() && conf.pending_balance_deposits { Some(PendingBalanceDepositsContext::new(state, spec)?) } else { None @@ -197,7 +197,7 @@ pub fn process_epoch_single_pass( // Compute shared values required for different parts of epoch processing. let rewards_ctxt = &RewardsAndPenaltiesContext::new(progressive_balances, state_ctxt, spec)?; - let mut activation_queues = if fork_name < ForkName::Electra { + let mut activation_queues = if !fork_name.electra_enabled() { let activation_queue = epoch_cache .activation_queue()? .get_validators_eligible_for_activation( @@ -325,7 +325,7 @@ pub fn process_epoch_single_pass( } } - if conf.registry_updates && fork_name >= ForkName::Electra { + if conf.registry_updates && fork_name.electra_enabled() { if let Ok(earliest_exit_epoch_state) = state.earliest_exit_epoch_mut() { *earliest_exit_epoch_state = earliest_exit_epoch.ok_or(Error::MissingEarliestExitEpoch)?; @@ -354,7 +354,7 @@ pub fn process_epoch_single_pass( // Process consolidations outside the single-pass loop, as they depend on balances for multiple // validators and cannot be computed accurately inside the loop. - if fork_name >= ForkName::Electra && conf.pending_consolidations { + if fork_name.electra_enabled() && conf.pending_consolidations { process_pending_consolidations( state, &mut next_epoch_cache, @@ -554,7 +554,7 @@ fn process_single_registry_update( exit_balance_to_consume: Option<&mut u64>, spec: &ChainSpec, ) -> Result<(), Error> { - if state_ctxt.fork_name < ForkName::Electra { + if !state_ctxt.fork_name.electra_enabled() { let (activation_queue, next_epoch_activation_queue) = activation_queues.ok_or(Error::SinglePassMissingActivationQueue)?; process_single_registry_update_pre_electra( @@ -664,7 +664,7 @@ fn initiate_validator_exit( return Ok(()); } - let exit_queue_epoch = if state_ctxt.fork_name >= ForkName::Electra { + let exit_queue_epoch = if state_ctxt.fork_name.electra_enabled() { compute_exit_epoch_and_update_churn( validator, state_ctxt, diff --git a/consensus/types/src/chain_spec.rs b/consensus/types/src/chain_spec.rs index a9d521d995..87e3fcdd0f 100644 --- a/consensus/types/src/chain_spec.rs +++ b/consensus/types/src/chain_spec.rs @@ -394,7 +394,7 @@ impl ChainSpec { state: &BeaconState, ) -> u64 { let fork_name = state.fork_name_unchecked(); - if fork_name >= ForkName::Electra { + if fork_name.electra_enabled() { self.whistleblower_reward_quotient_electra } else { self.whistleblower_reward_quotient @@ -402,7 +402,7 @@ impl ChainSpec { } pub fn max_effective_balance_for_fork(&self, fork_name: ForkName) -> u64 { - if fork_name >= ForkName::Electra { + if fork_name.electra_enabled() { self.max_effective_balance_electra } else { self.max_effective_balance diff --git a/lcli/Cargo.toml b/lcli/Cargo.toml index 73dd93dc3e..3cddd8ee60 100644 --- a/lcli/Cargo.toml +++ b/lcli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "lcli" description = "Lighthouse CLI (modeled after zcli)" -version = "5.2.0" +version = "5.2.1" authors = ["Paul Hauner "] edition = { workspace = true } diff --git a/lighthouse/Cargo.toml b/lighthouse/Cargo.toml index 20466b5de7..912602776a 100644 --- a/lighthouse/Cargo.toml +++ b/lighthouse/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lighthouse" -version = "5.2.0" +version = "5.2.1" authors = ["Sigma Prime "] edition = { workspace = true } autotests = false @@ -22,8 +22,16 @@ gnosis = [] slasher-mdbx = ["slasher/mdbx"] # Support slasher LMDB backend. slasher-lmdb = ["slasher/lmdb"] -# Use jemalloc. -jemalloc = ["malloc_utils/jemalloc"] +# Support slasher redb backend. +slasher-redb = ["slasher/redb"] +# Deprecated. This is now enabled by default on non windows targets. +jemalloc = [] + +[target.'cfg(not(target_os = "windows"))'.dependencies] +malloc_utils = { workspace = true, features = ["jemalloc"] } + +[target.'cfg(target_os = "windows")'.dependencies] +malloc_utils = { workspace = true } [dependencies] beacon_node = { workspace = true } diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index abee30737c..47b44d3828 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -51,10 +51,10 @@ fn bls_library_name() -> &'static str { } fn allocator_name() -> &'static str { - if cfg!(feature = "jemalloc") { - "jemalloc" - } else { + if cfg!(target_os = "windows") { "system" + } else { + "jemalloc" } } diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index caadf208e3..cd499f2ada 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -2242,6 +2242,8 @@ fn slasher_broadcast_flag_false() { assert!(!slasher_config.broadcast); }); } + +#[cfg(all(feature = "lmdb"))] #[test] fn slasher_backend_override_to_default() { // Hard to test this flag because all but one backend is disabled by default and the backend diff --git a/slasher/Cargo.toml b/slasher/Cargo.toml index 563c4599d8..01a8b9fb00 100644 --- a/slasher/Cargo.toml +++ b/slasher/Cargo.toml @@ -8,11 +8,13 @@ edition = { workspace = true } default = ["lmdb"] mdbx = ["dep:mdbx"] lmdb = ["lmdb-rkv", "lmdb-rkv-sys"] +redb = ["dep:redb"] portable = ["types/portable"] [dependencies] bincode = { workspace = true } byteorder = { workspace = true } +derivative = { workspace = true } ethereum_ssz = { workspace = true } ethereum_ssz_derive = { workspace = true } flate2 = { version = "1.0.14", features = ["zlib"], default-features = false } @@ -36,6 +38,8 @@ mdbx = { package = "libmdbx", git = "https://github.com/sigp/libmdbx-rs", tag = lmdb-rkv = { git = "https://github.com/sigp/lmdb-rs", rev = "f33845c6469b94265319aac0ed5085597862c27e", optional = true } lmdb-rkv-sys = { git = "https://github.com/sigp/lmdb-rs", rev = "f33845c6469b94265319aac0ed5085597862c27e", optional = true } +redb = { version = "2.1", optional = true } + [dev-dependencies] maplit = { workspace = true } rayon = { workspace = true } diff --git a/slasher/src/config.rs b/slasher/src/config.rs index 1851e2e441..33d68fa0e5 100644 --- a/slasher/src/config.rs +++ b/slasher/src/config.rs @@ -15,16 +15,19 @@ pub const DEFAULT_MAX_DB_SIZE: usize = 512 * 1024; // 512 GiB pub const DEFAULT_ATTESTATION_ROOT_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(100_000); pub const DEFAULT_BROADCAST: bool = false; -#[cfg(all(feature = "mdbx", not(feature = "lmdb")))] +#[cfg(all(feature = "mdbx", not(any(feature = "lmdb", feature = "redb"))))] pub const DEFAULT_BACKEND: DatabaseBackend = DatabaseBackend::Mdbx; #[cfg(feature = "lmdb")] pub const DEFAULT_BACKEND: DatabaseBackend = DatabaseBackend::Lmdb; -#[cfg(not(any(feature = "mdbx", feature = "lmdb")))] +#[cfg(all(feature = "redb", not(any(feature = "mdbx", feature = "lmdb"))))] +pub const DEFAULT_BACKEND: DatabaseBackend = DatabaseBackend::Redb; +#[cfg(not(any(feature = "mdbx", feature = "lmdb", feature = "redb")))] pub const DEFAULT_BACKEND: DatabaseBackend = DatabaseBackend::Disabled; pub const MAX_HISTORY_LENGTH: usize = 1 << 16; pub const MEGABYTE: usize = 1 << 20; pub const MDBX_DATA_FILENAME: &str = "mdbx.dat"; +pub const REDB_DATA_FILENAME: &str = "slasher.redb"; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Config { @@ -64,6 +67,8 @@ pub enum DatabaseBackend { Mdbx, #[cfg(feature = "lmdb")] Lmdb, + #[cfg(feature = "redb")] + Redb, Disabled, } diff --git a/slasher/src/database.rs b/slasher/src/database.rs index 801abe9283..4f4729a123 100644 --- a/slasher/src/database.rs +++ b/slasher/src/database.rs @@ -1,6 +1,7 @@ pub mod interface; mod lmdb_impl; mod mdbx_impl; +mod redb_impl; use crate::{ metrics, AttesterRecord, AttesterSlashingStatus, CompactAttesterRecord, Config, Error, @@ -489,8 +490,7 @@ impl SlasherDB { } // Store the new indexed attestation at the end of the current table. - let db = &self.databases.indexed_attestation_db; - let mut cursor = txn.cursor(db)?; + let mut cursor = txn.cursor(&self.databases.indexed_attestation_db)?; let indexed_att_id = match cursor.last_key()? { // First ID is 1 so that 0 can be used to represent `null` in `CompactAttesterRecord`. @@ -504,7 +504,6 @@ impl SlasherDB { cursor.put(attestation_key.as_ref(), &data)?; drop(cursor); - // Update the (epoch, hash) to ID mapping. self.put_indexed_attestation_id(txn, &id_key, attestation_key)?; @@ -743,21 +742,17 @@ impl SlasherDB { return Ok(()); } - loop { - let (key_bytes, _) = cursor.get_current()?.ok_or(Error::MissingProposerKey)?; - - let (slot, _) = ProposerKey::parse(key_bytes)?; + let should_delete = |key: &[u8]| -> Result { + let mut should_delete = false; + let (slot, _) = ProposerKey::parse(Cow::from(key))?; if slot < min_slot { - cursor.delete_current()?; - - // End the loop if there is no next entry. - if cursor.next_key()?.is_none() { - break; - } - } else { - break; + should_delete = true; } - } + + Ok(should_delete) + }; + + cursor.delete_while(should_delete)?; Ok(()) } @@ -771,9 +766,6 @@ impl SlasherDB { .saturating_add(1u64) .saturating_sub(self.config.history_length as u64); - // Collect indexed attestation IDs to delete. - let mut indexed_attestation_ids = vec![]; - let mut cursor = txn.cursor(&self.databases.indexed_attestation_id_db)?; // Position cursor at first key, bailing out if the database is empty. @@ -781,27 +773,20 @@ impl SlasherDB { return Ok(()); } - loop { - let (key_bytes, value) = cursor - .get_current()? - .ok_or(Error::MissingIndexedAttestationIdKey)?; - - let (target_epoch, _) = IndexedAttestationIdKey::parse(key_bytes)?; - + let should_delete = |key: &[u8]| -> Result { + let (target_epoch, _) = IndexedAttestationIdKey::parse(Cow::from(key))?; if target_epoch < min_epoch { - indexed_attestation_ids.push(IndexedAttestationId::new( - IndexedAttestationId::parse(value)?, - )); - - cursor.delete_current()?; - - if cursor.next_key()?.is_none() { - break; - } - } else { - break; + return Ok(true); } - } + + Ok(false) + }; + + let indexed_attestation_ids = cursor + .delete_while(should_delete)? + .into_iter() + .map(|id| IndexedAttestationId::parse(id).map(IndexedAttestationId::new)) + .collect::, Error>>()?; drop(cursor); // Delete the indexed attestations. diff --git a/slasher/src/database/interface.rs b/slasher/src/database/interface.rs index 5bb920383c..46cf9a4a0c 100644 --- a/slasher/src/database/interface.rs +++ b/slasher/src/database/interface.rs @@ -7,6 +7,8 @@ use std::path::PathBuf; use crate::database::lmdb_impl; #[cfg(feature = "mdbx")] use crate::database::mdbx_impl; +#[cfg(feature = "redb")] +use crate::database::redb_impl; #[derive(Debug)] pub enum Environment { @@ -14,6 +16,8 @@ pub enum Environment { Mdbx(mdbx_impl::Environment), #[cfg(feature = "lmdb")] Lmdb(lmdb_impl::Environment), + #[cfg(feature = "redb")] + Redb(redb_impl::Environment), Disabled, } @@ -23,6 +27,8 @@ pub enum RwTransaction<'env> { Mdbx(mdbx_impl::RwTransaction<'env>), #[cfg(feature = "lmdb")] Lmdb(lmdb_impl::RwTransaction<'env>), + #[cfg(feature = "redb")] + Redb(redb_impl::RwTransaction<'env>), Disabled(PhantomData<&'env ()>), } @@ -32,6 +38,8 @@ pub enum Database<'env> { Mdbx(mdbx_impl::Database<'env>), #[cfg(feature = "lmdb")] Lmdb(lmdb_impl::Database<'env>), + #[cfg(feature = "redb")] + Redb(redb_impl::Database<'env>), Disabled(PhantomData<&'env ()>), } @@ -54,6 +62,8 @@ pub enum Cursor<'env> { Mdbx(mdbx_impl::Cursor<'env>), #[cfg(feature = "lmdb")] Lmdb(lmdb_impl::Cursor<'env>), + #[cfg(feature = "redb")] + Redb(redb_impl::Cursor<'env>), Disabled(PhantomData<&'env ()>), } @@ -67,6 +77,8 @@ impl Environment { DatabaseBackend::Mdbx => mdbx_impl::Environment::new(config).map(Environment::Mdbx), #[cfg(feature = "lmdb")] DatabaseBackend::Lmdb => lmdb_impl::Environment::new(config).map(Environment::Lmdb), + #[cfg(feature = "redb")] + DatabaseBackend::Redb => redb_impl::Environment::new(config).map(Environment::Redb), DatabaseBackend::Disabled => Err(Error::SlasherDatabaseBackendDisabled), } } @@ -77,6 +89,8 @@ impl Environment { Self::Mdbx(env) => env.create_databases(), #[cfg(feature = "lmdb")] Self::Lmdb(env) => env.create_databases(), + #[cfg(feature = "redb")] + Self::Redb(env) => env.create_databases(), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -87,6 +101,8 @@ impl Environment { Self::Mdbx(env) => env.begin_rw_txn().map(RwTransaction::Mdbx), #[cfg(feature = "lmdb")] Self::Lmdb(env) => env.begin_rw_txn().map(RwTransaction::Lmdb), + #[cfg(feature = "redb")] + Self::Redb(env) => env.begin_rw_txn().map(RwTransaction::Redb), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -98,6 +114,8 @@ impl Environment { Self::Mdbx(env) => env.filenames(config), #[cfg(feature = "lmdb")] Self::Lmdb(env) => env.filenames(config), + #[cfg(feature = "redb")] + Self::Redb(env) => env.filenames(config), _ => vec![], } } @@ -106,7 +124,7 @@ impl Environment { impl<'env> RwTransaction<'env> { pub fn get + ?Sized>( &'env self, - db: &Database<'env>, + db: &'env Database, key: &K, ) -> Result>, Error> { match (self, db) { @@ -114,6 +132,8 @@ impl<'env> RwTransaction<'env> { (Self::Mdbx(txn), Database::Mdbx(db)) => txn.get(db, key), #[cfg(feature = "lmdb")] (Self::Lmdb(txn), Database::Lmdb(db)) => txn.get(db, key), + #[cfg(feature = "redb")] + (Self::Redb(txn), Database::Redb(db)) => txn.get(db, key), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -129,6 +149,8 @@ impl<'env> RwTransaction<'env> { (Self::Mdbx(txn), Database::Mdbx(db)) => txn.put(db, key, value), #[cfg(feature = "lmdb")] (Self::Lmdb(txn), Database::Lmdb(db)) => txn.put(db, key, value), + #[cfg(feature = "redb")] + (Self::Redb(txn), Database::Redb(db)) => txn.put(db, key, value), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -139,16 +161,8 @@ impl<'env> RwTransaction<'env> { (Self::Mdbx(txn), Database::Mdbx(db)) => txn.del(db, key), #[cfg(feature = "lmdb")] (Self::Lmdb(txn), Database::Lmdb(db)) => txn.del(db, key), - _ => Err(Error::MismatchedDatabaseVariant), - } - } - - pub fn cursor<'a>(&'a mut self, db: &Database) -> Result, Error> { - match (self, db) { - #[cfg(feature = "mdbx")] - (Self::Mdbx(txn), Database::Mdbx(db)) => txn.cursor(db).map(Cursor::Mdbx), - #[cfg(feature = "lmdb")] - (Self::Lmdb(txn), Database::Lmdb(db)) => txn.cursor(db).map(Cursor::Lmdb), + #[cfg(feature = "redb")] + (Self::Redb(txn), Database::Redb(db)) => txn.del(db, key), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -159,6 +173,20 @@ impl<'env> RwTransaction<'env> { Self::Mdbx(txn) => txn.commit(), #[cfg(feature = "lmdb")] Self::Lmdb(txn) => txn.commit(), + #[cfg(feature = "redb")] + Self::Redb(txn) => txn.commit(), + _ => Err(Error::MismatchedDatabaseVariant), + } + } + + pub fn cursor<'a>(&'a mut self, db: &'a Database) -> Result, Error> { + match (self, db) { + #[cfg(feature = "mdbx")] + (Self::Mdbx(txn), Database::Mdbx(db)) => txn.cursor(db).map(Cursor::Mdbx), + #[cfg(feature = "lmdb")] + (Self::Lmdb(txn), Database::Lmdb(db)) => txn.cursor(db).map(Cursor::Lmdb), + #[cfg(feature = "redb")] + (Self::Redb(txn), Database::Redb(db)) => txn.cursor(db).map(Cursor::Redb), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -172,6 +200,8 @@ impl<'env> Cursor<'env> { Cursor::Mdbx(cursor) => cursor.first_key(), #[cfg(feature = "lmdb")] Cursor::Lmdb(cursor) => cursor.first_key(), + #[cfg(feature = "redb")] + Cursor::Redb(cursor) => cursor.first_key(), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -183,6 +213,8 @@ impl<'env> Cursor<'env> { Cursor::Mdbx(cursor) => cursor.last_key(), #[cfg(feature = "lmdb")] Cursor::Lmdb(cursor) => cursor.last_key(), + #[cfg(feature = "redb")] + Cursor::Redb(cursor) => cursor.last_key(), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -193,17 +225,8 @@ impl<'env> Cursor<'env> { Cursor::Mdbx(cursor) => cursor.next_key(), #[cfg(feature = "lmdb")] Cursor::Lmdb(cursor) => cursor.next_key(), - _ => Err(Error::MismatchedDatabaseVariant), - } - } - - /// Get the key value pair at the current position. - pub fn get_current(&mut self) -> Result, Error> { - match self { - #[cfg(feature = "mdbx")] - Cursor::Mdbx(cursor) => cursor.get_current(), - #[cfg(feature = "lmdb")] - Cursor::Lmdb(cursor) => cursor.get_current(), + #[cfg(feature = "redb")] + Cursor::Redb(cursor) => cursor.next_key(), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -214,6 +237,8 @@ impl<'env> Cursor<'env> { Cursor::Mdbx(cursor) => cursor.delete_current(), #[cfg(feature = "lmdb")] Cursor::Lmdb(cursor) => cursor.delete_current(), + #[cfg(feature = "redb")] + Cursor::Redb(cursor) => cursor.delete_current(), _ => Err(Error::MismatchedDatabaseVariant), } } @@ -224,6 +249,23 @@ impl<'env> Cursor<'env> { Self::Mdbx(cursor) => cursor.put(key, value), #[cfg(feature = "lmdb")] Self::Lmdb(cursor) => cursor.put(key, value), + #[cfg(feature = "redb")] + Self::Redb(cursor) => cursor.put(key, value), + _ => Err(Error::MismatchedDatabaseVariant), + } + } + + pub fn delete_while( + &mut self, + f: impl Fn(&[u8]) -> Result, + ) -> Result>, Error> { + match self { + #[cfg(feature = "mdbx")] + Self::Mdbx(txn) => txn.delete_while(f), + #[cfg(feature = "lmdb")] + Self::Lmdb(txn) => txn.delete_while(f), + #[cfg(feature = "redb")] + Self::Redb(txn) => txn.delete_while(f), _ => Err(Error::MismatchedDatabaseVariant), } } diff --git a/slasher/src/database/lmdb_impl.rs b/slasher/src/database/lmdb_impl.rs index 78deaf1767..20d89a36fb 100644 --- a/slasher/src/database/lmdb_impl.rs +++ b/slasher/src/database/lmdb_impl.rs @@ -100,7 +100,7 @@ impl Environment { impl<'env> RwTransaction<'env> { pub fn get + ?Sized>( &'env self, - db: &Database<'env>, + db: &'env Database, key: &K, ) -> Result>, Error> { Ok(self.txn.get(db.db, key).optional()?.map(Cow::Borrowed)) @@ -182,6 +182,29 @@ impl<'env> Cursor<'env> { .put(&key, &value, RwTransaction::write_flags())?; Ok(()) } + + pub fn delete_while( + &mut self, + f: impl Fn(&[u8]) -> Result, + ) -> Result>, Error> { + let mut result = vec![]; + + loop { + let (key_bytes, value) = self.get_current()?.ok_or(Error::MissingKey)?; + + if f(&key_bytes)? { + result.push(value); + self.delete_current()?; + if self.next_key()?.is_none() { + break; + } + } else { + break; + } + } + + Ok(result) + } } /// Mix-in trait for loading values from LMDB that may or may not exist. diff --git a/slasher/src/database/mdbx_impl.rs b/slasher/src/database/mdbx_impl.rs index d25f17e7ac..e249de963f 100644 --- a/slasher/src/database/mdbx_impl.rs +++ b/slasher/src/database/mdbx_impl.rs @@ -113,7 +113,7 @@ impl<'env> RwTransaction<'env> { pub fn get + ?Sized>( &'env self, - db: &Database<'env>, + db: &'env Database, key: &K, ) -> Result>, Error> { Ok(self.txn.get(&db.db, key.as_ref())?) @@ -183,4 +183,27 @@ impl<'env> Cursor<'env> { .put(key.as_ref(), value.as_ref(), RwTransaction::write_flags())?; Ok(()) } + + pub fn delete_while( + &mut self, + f: impl Fn(&[u8]) -> Result, + ) -> Result>, Error> { + let mut result = vec![]; + + loop { + let (key_bytes, value) = self.get_current()?.ok_or(Error::MissingKey)?; + + if f(&key_bytes)? { + result.push(value); + self.delete_current()?; + if self.next_key()?.is_none() { + break; + } + } else { + break; + } + } + + Ok(result) + } } diff --git a/slasher/src/database/redb_impl.rs b/slasher/src/database/redb_impl.rs new file mode 100644 index 0000000000..da7b4e38ed --- /dev/null +++ b/slasher/src/database/redb_impl.rs @@ -0,0 +1,276 @@ +#![cfg(feature = "redb")] +use crate::{ + config::REDB_DATA_FILENAME, + database::{ + interface::{Key, OpenDatabases, Value}, + *, + }, + Config, Error, +}; +use derivative::Derivative; +use redb::{ReadableTable, TableDefinition}; +use std::{borrow::Cow, path::PathBuf}; + +#[derive(Debug)] +pub struct Environment { + _db_count: usize, + db: redb::Database, +} + +#[derive(Debug)] +pub struct Database<'env> { + table_name: String, + _phantom: PhantomData<&'env ()>, +} + +#[derive(Derivative)] +#[derivative(Debug)] +pub struct RwTransaction<'env> { + #[derivative(Debug = "ignore")] + txn: redb::WriteTransaction, + _phantom: PhantomData<&'env ()>, +} + +#[derive(Derivative)] +#[derivative(Debug)] +pub struct Cursor<'env> { + #[derivative(Debug = "ignore")] + txn: &'env redb::WriteTransaction, + db: &'env Database<'env>, + current_key: Option>, +} + +impl Environment { + pub fn new(config: &Config) -> Result { + let db_path = config.database_path.join(REDB_DATA_FILENAME); + let database = redb::Database::create(db_path)?; + + Ok(Environment { + _db_count: MAX_NUM_DBS, + db: database, + }) + } + + pub fn create_databases(&self) -> Result { + let indexed_attestation_db = self.create_table(INDEXED_ATTESTATION_DB)?; + let indexed_attestation_id_db = self.create_table(INDEXED_ATTESTATION_ID_DB)?; + let attesters_db = self.create_table(ATTESTERS_DB)?; + let attesters_max_targets_db = self.create_table(ATTESTERS_MAX_TARGETS_DB)?; + let min_targets_db = self.create_table(MIN_TARGETS_DB)?; + let max_targets_db = self.create_table(MAX_TARGETS_DB)?; + let current_epochs_db = self.create_table(CURRENT_EPOCHS_DB)?; + let proposers_db = self.create_table(PROPOSERS_DB)?; + let metadata_db = self.create_table(METADATA_DB)?; + + Ok(OpenDatabases { + indexed_attestation_db, + indexed_attestation_id_db, + attesters_db, + attesters_max_targets_db, + min_targets_db, + max_targets_db, + current_epochs_db, + proposers_db, + metadata_db, + }) + } + + pub fn create_table<'env>( + &'env self, + table_name: &'env str, + ) -> Result, Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = TableDefinition::new(table_name); + let tx = self.db.begin_write()?; + tx.open_table(table_definition)?; + tx.commit()?; + + Ok(crate::Database::Redb(Database { + table_name: table_name.to_string(), + _phantom: PhantomData, + })) + } + + pub fn filenames(&self, config: &Config) -> Vec { + vec![config.database_path.join(BASE_DB)] + } + + pub fn begin_rw_txn(&self) -> Result { + let mut txn = self.db.begin_write()?; + txn.set_durability(redb::Durability::Eventual); + Ok(RwTransaction { + txn, + _phantom: PhantomData, + }) + } +} + +impl<'env> RwTransaction<'env> { + pub fn get + ?Sized>( + &'env self, + db: &'env Database, + key: &K, + ) -> Result>, Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&db.table_name); + let table = self.txn.open_table(table_definition)?; + let result = table.get(key.as_ref())?; + if let Some(access_guard) = result { + let value = access_guard.value().to_vec(); + Ok(Some(Cow::from(value))) + } else { + Ok(None) + } + } + + pub fn put, V: AsRef<[u8]>>( + &mut self, + db: &Database, + key: K, + value: V, + ) -> Result<(), Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&db.table_name); + let mut table = self.txn.open_table(table_definition)?; + table.insert(key.as_ref(), value.as_ref())?; + + Ok(()) + } + + pub fn del>(&mut self, db: &Database, key: K) -> Result<(), Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&db.table_name); + let mut table = self.txn.open_table(table_definition)?; + table.remove(key.as_ref())?; + + Ok(()) + } + + pub fn commit(self) -> Result<(), Error> { + self.txn.commit()?; + Ok(()) + } + + pub fn cursor<'a>(&'a mut self, db: &'a Database) -> Result, Error> { + Ok(Cursor { + txn: &self.txn, + db, + current_key: None, + }) + } +} + +impl<'env> Cursor<'env> { + pub fn first_key(&mut self) -> Result, Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&self.db.table_name); + let table = self.txn.open_table(table_definition)?; + let first = table + .iter()? + .next() + .map(|x| x.map(|(key, _)| key.value().to_vec())); + + if let Some(owned_key) = first { + let owned_key = owned_key?; + self.current_key = Some(Cow::from(owned_key)); + Ok(self.current_key.clone()) + } else { + Ok(None) + } + } + + pub fn last_key(&mut self) -> Result>, Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&self.db.table_name); + let table = self.txn.open_table(table_definition)?; + let last = table + .iter()? + .next_back() + .map(|x| x.map(|(key, _)| key.value().to_vec())); + + if let Some(owned_key) = last { + let owned_key = owned_key?; + self.current_key = Some(Cow::from(owned_key)); + return Ok(self.current_key.clone()); + } + Ok(None) + } + + pub fn get_current(&self) -> Result, Value<'env>)>, Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&self.db.table_name); + let table = self.txn.open_table(table_definition)?; + if let Some(key) = &self.current_key { + let result = table.get(key.as_ref())?; + + if let Some(access_guard) = result { + let value = access_guard.value().to_vec(); + return Ok(Some((key.clone(), Cow::from(value)))); + } + } + Ok(None) + } + + pub fn next_key(&mut self) -> Result>, Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&self.db.table_name); + let table = self.txn.open_table(table_definition)?; + if let Some(current_key) = &self.current_key { + let range: std::ops::RangeFrom<&[u8]> = current_key..; + + let next = table + .range(range)? + .next() + .map(|x| x.map(|(key, _)| key.value().to_vec())); + + if let Some(owned_key) = next { + let owned_key = owned_key?; + self.current_key = Some(Cow::from(owned_key)); + return Ok(self.current_key.clone()); + } + } + Ok(None) + } + + pub fn delete_current(&self) -> Result<(), Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&self.db.table_name); + let mut table = self.txn.open_table(table_definition)?; + if let Some(key) = &self.current_key { + table.remove(key.as_ref())?; + } + Ok(()) + } + + pub fn delete_while( + &self, + f: impl Fn(&[u8]) -> Result, + ) -> Result>, Error> { + let mut deleted_values = vec![]; + if let Some(current_key) = &self.current_key { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&self.db.table_name); + + let mut table = self.txn.open_table(table_definition)?; + + let deleted = + table.extract_from_if(current_key.as_ref().., |key, _| f(key).unwrap_or(false))?; + + deleted.for_each(|result| { + if let Ok(item) = result { + let value = item.1.value().to_vec(); + deleted_values.push(Cow::from(value)); + } + }) + }; + Ok(deleted_values) + } + + pub fn put, V: AsRef<[u8]>>(&mut self, key: K, value: V) -> Result<(), Error> { + let table_definition: TableDefinition<'_, &[u8], &[u8]> = + TableDefinition::new(&self.db.table_name); + let mut table = self.txn.open_table(table_definition)?; + table.insert(key.as_ref(), value.as_ref())?; + + Ok(()) + } +} diff --git a/slasher/src/error.rs b/slasher/src/error.rs index 8d3295b22a..b2e32f3dcd 100644 --- a/slasher/src/error.rs +++ b/slasher/src/error.rs @@ -8,6 +8,8 @@ pub enum Error { DatabaseMdbxError(mdbx::Error), #[cfg(feature = "lmdb")] DatabaseLmdbError(lmdb::Error), + #[cfg(feature = "redb")] + DatabaseRedbError(redb::Error), SlasherDatabaseBackendDisabled, MismatchedDatabaseVariant, DatabaseIOError(io::Error), @@ -67,6 +69,7 @@ pub enum Error { MissingIndexedAttestationId, MissingIndexedAttestationIdKey, InconsistentAttestationDataRoot, + MissingKey, } #[cfg(feature = "mdbx")] @@ -89,6 +92,41 @@ impl From for Error { } } +#[cfg(feature = "redb")] +impl From for Error { + fn from(e: redb::TableError) -> Self { + Error::DatabaseRedbError(e.into()) + } +} + +#[cfg(feature = "redb")] +impl From for Error { + fn from(e: redb::TransactionError) -> Self { + Error::DatabaseRedbError(e.into()) + } +} + +#[cfg(feature = "redb")] +impl From for Error { + fn from(e: redb::DatabaseError) -> Self { + Error::DatabaseRedbError(e.into()) + } +} + +#[cfg(feature = "redb")] +impl From for Error { + fn from(e: redb::StorageError) -> Self { + Error::DatabaseRedbError(e.into()) + } +} + +#[cfg(feature = "redb")] +impl From for Error { + fn from(e: redb::CommitError) -> Self { + Error::DatabaseRedbError(e.into()) + } +} + impl From for Error { fn from(e: io::Error) -> Self { Error::DatabaseIOError(e) diff --git a/slasher/src/lib.rs b/slasher/src/lib.rs index 4d58fa7702..d3a26337d6 100644 --- a/slasher/src/lib.rs +++ b/slasher/src/lib.rs @@ -1,6 +1,6 @@ #![deny(missing_debug_implementations)] #![cfg_attr( - not(any(feature = "mdbx", feature = "lmdb")), + not(any(feature = "mdbx", feature = "lmdb", feature = "redb")), allow(unused, clippy::drop_non_drop) )] diff --git a/slasher/tests/attester_slashings.rs b/slasher/tests/attester_slashings.rs index 902141d971..cc6e57d95d 100644 --- a/slasher/tests/attester_slashings.rs +++ b/slasher/tests/attester_slashings.rs @@ -1,4 +1,4 @@ -#![cfg(any(feature = "mdbx", feature = "lmdb"))] +#![cfg(any(feature = "mdbx", feature = "lmdb", feature = "redb"))] use logging::test_logger; use maplit::hashset; diff --git a/slasher/tests/backend.rs b/slasher/tests/backend.rs index fd1a6ae14f..c24b861b18 100644 --- a/slasher/tests/backend.rs +++ b/slasher/tests/backend.rs @@ -1,4 +1,4 @@ -#![cfg(feature = "lmdb")] +#![cfg(any(feature = "lmdb", feature = "redb"))] use slasher::{config::MDBX_DATA_FILENAME, Config, DatabaseBackend, DatabaseBackendOverride}; use std::fs::File; @@ -41,7 +41,7 @@ fn no_override_with_existing_mdbx_db() { } #[test] -#[cfg(all(not(feature = "mdbx"), feature = "lmdb"))] +#[cfg(all(not(feature = "mdbx"), feature = "lmdb", not(feature = "redb")))] fn failed_override_with_existing_mdbx_db() { let tempdir = tempdir().unwrap(); let mut config = Config::new(tempdir.path().into()); @@ -55,3 +55,19 @@ fn failed_override_with_existing_mdbx_db() { ); assert_eq!(config.backend, DatabaseBackend::Lmdb); } + +#[test] +#[cfg(feature = "redb")] +fn failed_override_with_existing_mdbx_db() { + let tempdir = tempdir().unwrap(); + let mut config = Config::new(tempdir.path().into()); + + let filename = config.database_path.join(MDBX_DATA_FILENAME); + File::create(&filename).unwrap(); + + assert_eq!( + config.override_backend(), + DatabaseBackendOverride::Failure(filename) + ); + assert_eq!(config.backend, DatabaseBackend::Redb); +} diff --git a/slasher/tests/proposer_slashings.rs b/slasher/tests/proposer_slashings.rs index 2d2738087d..6d2a1f5176 100644 --- a/slasher/tests/proposer_slashings.rs +++ b/slasher/tests/proposer_slashings.rs @@ -1,4 +1,4 @@ -#![cfg(any(feature = "mdbx", feature = "lmdb"))] +#![cfg(any(feature = "mdbx", feature = "lmdb", feature = "redb"))] use logging::test_logger; use slasher::{ diff --git a/slasher/tests/random.rs b/slasher/tests/random.rs index ebfe0ef4e9..0aaaa63f65 100644 --- a/slasher/tests/random.rs +++ b/slasher/tests/random.rs @@ -1,4 +1,4 @@ -#![cfg(any(feature = "mdbx", feature = "lmdb"))] +#![cfg(any(feature = "mdbx", feature = "lmdb", feature = "redb"))] use logging::test_logger; use rand::prelude::*; diff --git a/slasher/tests/wrap_around.rs b/slasher/tests/wrap_around.rs index 9a42aeb60b..2ec56bc7d5 100644 --- a/slasher/tests/wrap_around.rs +++ b/slasher/tests/wrap_around.rs @@ -1,4 +1,4 @@ -#![cfg(any(feature = "mdbx", feature = "lmdb"))] +#![cfg(any(feature = "mdbx", feature = "lmdb", feature = "redb"))] use logging::test_logger; use slasher::{