diff --git a/.github/workflows/local-testnet.yml b/.github/workflows/local-testnet.yml index f719360c6a..d496cc6348 100644 --- a/.github/workflows/local-testnet.yml +++ b/.github/workflows/local-testnet.yml @@ -36,12 +36,11 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install dependencies + - name: Install Kurtosis run: | - sudo add-apt-repository ppa:rmescandon/yq echo "deb [trusted=yes] https://apt.fury.io/kurtosis-tech/ /" | sudo tee /etc/apt/sources.list.d/kurtosis.list sudo apt update - sudo apt install -y kurtosis-cli=1.3.1 yq + sudo apt install -y kurtosis-cli=1.3.1 kurtosis analytics disable - name: Download Docker image artifact @@ -83,12 +82,11 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install dependencies + - name: Install Kurtosis run: | - sudo add-apt-repository ppa:rmescandon/yq echo "deb [trusted=yes] https://apt.fury.io/kurtosis-tech/ /" | sudo tee /etc/apt/sources.list.d/kurtosis.list sudo apt update - sudo apt install -y kurtosis-cli=1.3.1 yq + sudo apt install -y kurtosis-cli=1.3.1 kurtosis analytics disable - name: Download Docker image artifact @@ -119,12 +117,11 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install dependencies + - name: Install Kurtosis run: | - sudo add-apt-repository ppa:rmescandon/yq echo "deb [trusted=yes] https://apt.fury.io/kurtosis-tech/ /" | sudo tee /etc/apt/sources.list.d/kurtosis.list sudo apt update - sudo apt install -y kurtosis-cli=1.3.1 yq + sudo apt install -y kurtosis-cli=1.3.1 kurtosis analytics disable - name: Download Docker image artifact diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index a80470cf16..d6ef180934 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -63,8 +63,8 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 - name: Install dependencies - run: apt update && apt install -y cmake - - name: Generate code coverage + run: apt update && apt install -y cmake libclang-dev + - name: Check for deadlocks run: | cargo lockbud -k deadlock -b -l tokio_util diff --git a/Cargo.lock b/Cargo.lock index 0d9da0c7fe..b7ba237ac7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -230,9 +230,9 @@ dependencies = [ [[package]] name = "alloy-rlp" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26154390b1d205a4a7ac7352aa2eb4f81f391399d4e2f546fb81a2f8bb383f62" +checksum = "da0822426598f95e45dd1ea32a738dac057529a709ee645fcc516ffa4cbde08f" dependencies = [ "alloy-rlp-derive", "arrayvec", @@ -241,9 +241,9 @@ dependencies = [ [[package]] name = "alloy-rlp-derive" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d0f2d905ebd295e7effec65e5f6868d153936130ae718352771de3e7d03c75c" +checksum = "2b09cae092c27b6f1bde952653a22708691802e57bfef4a2973b80bea21efd3f" dependencies = [ "proc-macro2", "quote", @@ -857,6 +857,23 @@ dependencies = [ "unused_port", ] +[[package]] +name = "beacon_node_fallback" +version = "0.1.0" +dependencies = [ + "environment", + "eth2", + "futures", + "itertools 0.10.5", + "serde", + "slog", + "slot_clock", + "strum", + "tokio", + "types", + "validator_metrics", +] + [[package]] name = "beacon_processor" version = "0.1.0" @@ -900,12 +917,15 @@ dependencies = [ "itertools 0.12.1", "lazy_static", "lazycell", + "log", + "prettyplease", "proc-macro2", "quote", "regex", "rustc-hash 1.1.0", "shlex", "syn 2.0.77", + "which", ] [[package]] @@ -1932,6 +1952,17 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "delay_map" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df941644b671f05f59433e481ba0d31ac10e3667de725236a4c0d587c496fba1" +dependencies = [ + "futures", + "tokio", + "tokio-util", +] + [[package]] name = "deposit_contract" version = "0.2.0" @@ -2166,20 +2197,20 @@ dependencies = [ [[package]] name = "discv5" -version = "0.7.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f569b8c367554666c8652305621e8bae3634a2ff5c6378081d5bd8c399c99f23" +checksum = "898d136ecb64116ec68aecf14d889bd30f8b1fe0c19e262953f7388dbe77052e" dependencies = [ "aes 0.8.4", "aes-gcm", "alloy-rlp", "arrayvec", "ctr 0.9.2", - "delay_map", + "delay_map 0.4.0", "enr", "fnv", "futures", - "hashlink 0.8.4", + "hashlink 0.9.1", "hex", "hkdf", "lazy_static", @@ -2187,13 +2218,13 @@ dependencies = [ "lru", "more-asserts", "multiaddr", - "parking_lot 0.11.2", + "parking_lot 0.12.3", "rand", "smallvec", - "socket2 0.4.10", + "socket2", "tokio", "tracing", - "uint", + "uint 0.10.0", "zeroize", ] @@ -2208,6 +2239,23 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "doppelganger_service" +version = "0.1.0" +dependencies = [ + "beacon_node_fallback", + "environment", + "eth2", + "futures", + "logging", + "parking_lot 0.12.3", + "slog", + "slot_clock", + "task_executor", + "tokio", + "types", +] + [[package]] name = "dsl_auto_type" version = "0.1.2" @@ -2373,12 +2421,12 @@ dependencies = [ [[package]] name = "enr" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "972070166c68827e64bd1ebc8159dd8e32d9bc2da7ebe8f20b61308f7974ad30" +checksum = "851bd664a3d3a3c175cff92b2f0df02df3c541b4895d0ae307611827aae46152" dependencies = [ "alloy-rlp", - "base64 0.21.7", + "base64 0.22.1", "bytes", "ed25519-dalek", "hex", @@ -2671,7 +2719,7 @@ dependencies = [ "serde_json", "sha3 0.9.1", "thiserror", - "uint", + "uint 0.9.5", ] [[package]] @@ -2688,7 +2736,7 @@ dependencies = [ "serde_json", "sha3 0.10.8", "thiserror", - "uint", + "uint 0.9.5", ] [[package]] @@ -2730,7 +2778,7 @@ dependencies = [ "impl-rlp", "impl-serde 0.3.2", "primitive-types 0.10.1", - "uint", + "uint 0.9.5", ] [[package]] @@ -2746,7 +2794,7 @@ dependencies = [ "impl-serde 0.4.0", "primitive-types 0.12.2", "scale-info", - "uint", + "uint 0.9.5", ] [[package]] @@ -3260,9 +3308,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -3270,9 +3318,9 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" @@ -3288,9 +3336,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-lite" @@ -3304,9 +3352,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", @@ -3326,15 +3374,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-ticker" @@ -3355,9 +3403,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -3498,6 +3546,18 @@ dependencies = [ "web-time", ] +[[package]] +name = "graffiti_file" +version = "0.1.0" +dependencies = [ + "bls", + "hex", + "serde", + "slog", + "tempfile", + "types", +] + [[package]] name = "group" version = "0.12.1" @@ -3704,7 +3764,7 @@ dependencies = [ "ipnet", "once_cell", "rand", - "socket2 0.5.7", + "socket2", "thiserror", "tinyvec", "tokio", @@ -3782,6 +3842,15 @@ dependencies = [ "hmac 0.8.1", ] +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "hostname" version = "0.3.1" @@ -3951,7 +4020,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.7", + "socket2", "tokio", "tower-service", "tracing", @@ -4200,6 +4269,31 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "initialized_validators" +version = "0.1.0" +dependencies = [ + "account_utils", + "bincode", + "bls", + "eth2_keystore", + "filesystem", + "lockfile", + "metrics", + "parking_lot 0.12.3", + "rand", + "reqwest", + "serde", + "serde_json", + "signing_method", + "slog", + "tokio", + "types", + "url", + "validator_dir", + "validator_metrics", +] + [[package]] name = "inout" version = "0.1.3" @@ -4256,7 +4350,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" dependencies = [ - "socket2 0.5.7", + "socket2", "widestring 1.1.0", "windows-sys 0.48.0", "winreg", @@ -4722,7 +4816,7 @@ dependencies = [ "libp2p-swarm", "rand", "smallvec", - "socket2 0.5.7", + "socket2", "tokio", "tracing", "void", @@ -4823,7 +4917,7 @@ dependencies = [ "rand", "ring 0.17.8", "rustls 0.23.13", - "socket2 0.5.7", + "socket2", "thiserror", "tokio", "tracing", @@ -4877,7 +4971,7 @@ dependencies = [ "libc", "libp2p-core", "libp2p-identity", - "socket2 0.5.7", + "socket2", "tokio", "tracing", ] @@ -5019,6 +5113,7 @@ dependencies = [ "account_manager", "account_utils", "beacon_node", + "beacon_node_fallback", "beacon_processor", "bls", "boot_node", @@ -5032,6 +5127,7 @@ dependencies = [ "eth2_network_config", "ethereum_hashing", "futures", + "initialized_validators", "lighthouse_network", "lighthouse_version", "logging", @@ -5061,7 +5157,7 @@ dependencies = [ "alloy-rlp", "async-channel", "bytes", - "delay_map", + "delay_map 0.3.0", "directory", "dirs", "discv5", @@ -5630,7 +5726,7 @@ dependencies = [ "beacon_chain", "beacon_processor", "bls", - "delay_map", + "delay_map 0.3.0", "derivative", "error-chain", "eth2", @@ -5697,6 +5793,7 @@ name = "node_test_rig" version = "0.2.0" dependencies = [ "beacon_node", + "beacon_node_fallback", "environment", "eth2", "execution_layer", @@ -6387,6 +6484,16 @@ dependencies = [ "sensitive_url", ] +[[package]] +name = "prettyplease" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +dependencies = [ + "proc-macro2", + "syn 2.0.77", +] + [[package]] name = "primeorder" version = "0.13.6" @@ -6406,7 +6513,7 @@ dependencies = [ "impl-codec 0.5.1", "impl-rlp", "impl-serde 0.3.2", - "uint", + "uint 0.9.5", ] [[package]] @@ -6420,7 +6527,7 @@ dependencies = [ "impl-rlp", "impl-serde 0.4.0", "scale-info", - "uint", + "uint 0.9.5", ] [[package]] @@ -6635,7 +6742,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.0.0", "rustls 0.23.13", - "socket2 0.5.7", + "socket2", "thiserror", "tokio", "tracing", @@ -6666,7 +6773,7 @@ checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" dependencies = [ "libc", "once_cell", - "socket2 0.5.7", + "socket2", "tracing", "windows-sys 0.59.0", ] @@ -7712,6 +7819,22 @@ dependencies = [ "rand_core", ] +[[package]] +name = "signing_method" +version = "0.1.0" +dependencies = [ + "eth2_keystore", + "ethereum_serde_utils", + "lockfile", + "parking_lot 0.12.3", + "reqwest", + "serde", + "task_executor", + "types", + "url", + "validator_metrics", +] + [[package]] name = "simple_asn1" version = "0.6.2" @@ -7968,16 +8091,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "socket2" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "socket2" version = "0.5.7" @@ -8094,23 +8207,31 @@ name = "store" version = "0.2.0" dependencies = [ "beacon_chain", + "bls", + "criterion", "db-key", "directory", "ethereum_ssz", "ethereum_ssz_derive", "itertools 0.10.5", "leveldb", + "logging", "lru", "metrics", "parking_lot 0.12.3", + "rand", "safe_arith", "serde", "slog", "sloggers", + "smallvec", "state_processing", "strum", + "superstruct", "tempfile", "types", + "xdelta3", + "zstd 0.13.1", ] [[package]] @@ -8572,7 +8693,7 @@ dependencies = [ "mio", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.7", + "socket2", "tokio-macros", "windows-sys 0.52.0", ] @@ -8628,7 +8749,7 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand", - "socket2 0.5.7", + "socket2", "tokio", "tokio-util", "whoami", @@ -9008,6 +9129,18 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "uint" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "909988d098b2f738727b161a106cfc7cab00c539c2687a8836f8e565976fb53e" +dependencies = [ + "byteorder", + "crunchy", + "hex", + "static_assertions", +] + [[package]] name = "unarray" version = "0.1.4" @@ -9147,54 +9280,34 @@ name = "validator_client" version = "0.3.5" dependencies = [ "account_utils", - "bincode", - "bls", + "beacon_node_fallback", "clap", "clap_utils", - "deposit_contract", "directory", "dirs", + "doppelganger_service", "environment", "eth2", - "eth2_keystore", - "ethereum_serde_utils", "fdlimit", - "filesystem", - "futures", - "hex", + "graffiti_file", "hyper 1.4.1", - "itertools 0.10.5", - "libsecp256k1", - "lighthouse_version", - "lockfile", - "logging", - "malloc_utils", + "initialized_validators", "metrics", "monitoring_api", "parking_lot 0.12.3", - "rand", "reqwest", - "ring 0.16.20", - "safe_arith", "sensitive_url", "serde", - "serde_json", "slashing_protection", "slog", "slot_clock", - "strum", - "sysinfo", - "system_health", - "task_executor", - "tempfile", "tokio", - "tokio-stream", - "tree_hash", "types", - "url", - "validator_dir", - "warp", - "warp_utils", + "validator_http_api", + "validator_http_metrics", + "validator_metrics", + "validator_services", + "validator_store", ] [[package]] @@ -9215,6 +9328,67 @@ dependencies = [ "types", ] +[[package]] +name = "validator_http_api" +version = "0.1.0" +dependencies = [ + "account_utils", + "beacon_node_fallback", + "bls", + "deposit_contract", + "doppelganger_service", + "eth2", + "eth2_keystore", + "ethereum_serde_utils", + "filesystem", + "futures", + "graffiti_file", + "initialized_validators", + "itertools 0.10.5", + "lighthouse_version", + "logging", + "parking_lot 0.12.3", + "rand", + "sensitive_url", + "serde", + "signing_method", + "slashing_protection", + "slog", + "slot_clock", + "sysinfo", + "system_health", + "task_executor", + "tempfile", + "tokio", + "tokio-stream", + "types", + "url", + "validator_dir", + "validator_services", + "validator_store", + "warp", + "warp_utils", +] + +[[package]] +name = "validator_http_metrics" +version = "0.1.0" +dependencies = [ + "lighthouse_version", + "malloc_utils", + "metrics", + "parking_lot 0.12.3", + "serde", + "slog", + "slot_clock", + "types", + "validator_metrics", + "validator_services", + "validator_store", + "warp", + "warp_utils", +] + [[package]] name = "validator_manager" version = "0.1.0" @@ -9236,7 +9410,54 @@ dependencies = [ "tokio", "tree_hash", "types", - "validator_client", + "validator_http_api", +] + +[[package]] +name = "validator_metrics" +version = "0.1.0" +dependencies = [ + "metrics", +] + +[[package]] +name = "validator_services" +version = "0.1.0" +dependencies = [ + "beacon_node_fallback", + "bls", + "doppelganger_service", + "environment", + "eth2", + "futures", + "graffiti_file", + "parking_lot 0.12.3", + "safe_arith", + "slog", + "slot_clock", + "tokio", + "tree_hash", + "types", + "validator_metrics", + "validator_store", +] + +[[package]] +name = "validator_store" +version = "0.1.0" +dependencies = [ + "account_utils", + "doppelganger_service", + "initialized_validators", + "parking_lot 0.12.3", + "serde", + "signing_method", + "slashing_protection", + "slog", + "slot_clock", + "task_executor", + "types", + "validator_metrics", ] [[package]] @@ -9516,19 +9737,21 @@ dependencies = [ "eth2_keystore", "eth2_network_config", "futures", + "initialized_validators", "logging", "parking_lot 0.12.3", "reqwest", "serde", "serde_json", "serde_yaml", + "slashing_protection", "slot_clock", "task_executor", "tempfile", "tokio", "types", "url", - "validator_client", + "validator_store", "zip", ] @@ -9538,6 +9761,18 @@ version = "0.25.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.37", +] + [[package]] name = "whoami" version = "1.5.2" @@ -9937,6 +10172,20 @@ dependencies = [ "time", ] +[[package]] +name = "xdelta3" +version = "0.1.5" +source = "git+http://github.com/sigp/xdelta3-rs?rev=50d63cdf1878e5cf3538e9aae5eed34a22c64e4a#50d63cdf1878e5cf3538e9aae5eed34a22c64e4a" +dependencies = [ + "bindgen", + "cc", + "futures-io", + "futures-util", + "libc", + "log", + "rand", +] + [[package]] name = "xml-rs" version = "0.8.22" @@ -10061,7 +10310,7 @@ dependencies = [ "pbkdf2 0.11.0", "sha1", "time", - "zstd", + "zstd 0.11.2+zstd.1.5.2", ] [[package]] @@ -10070,7 +10319,16 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe", + "zstd-safe 5.0.2+zstd.1.5.2", +] + +[[package]] +name = "zstd" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" +dependencies = [ + "zstd-safe 7.1.0", ] [[package]] @@ -10083,6 +10341,15 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" +dependencies = [ + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.13+zstd.1.5.6" diff --git a/Cargo.toml b/Cargo.toml index 7094ff6077..8cf4abb33e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,7 +83,17 @@ members = [ "testing/web3signer_tests", "validator_client", + "validator_client/beacon_node_fallback", + "validator_client/doppelganger_service", + "validator_client/graffiti_file", + "validator_client/http_api", + "validator_client/http_metrics", + "validator_client/initialized_validators", + "validator_client/signing_method", "validator_client/slashing_protection", + "validator_client/validator_metrics", + "validator_client/validator_services", + "validator_client/validator_store", "validator_manager", @@ -101,6 +111,7 @@ alloy-consensus = "0.3.0" anyhow = "1" arbitrary = { version = "1", features = ["derive"] } async-channel = "1.9.0" +axum = "0.7.7" bincode = "1" bitvec = "1" byteorder = "1" @@ -116,7 +127,7 @@ derivative = "2" dirs = "3" either = "1.9" rust_eth_kzg = "0.5.1" -discv5 = { version = "0.7", features = ["libp2p"] } +discv5 = { version = "0.9", features = ["libp2p"] } env_logger = "0.9" error-chain = "0.12" ethereum_hashing = "0.7.0" @@ -129,6 +140,7 @@ exit-future = "0.2" fnv = "1" fs2 = "0.4" futures = "0.3" +graffiti_file = { path = "validator_client/graffiti_file" } hex = "0.4" hashlink = "0.9.0" hyper = "1" @@ -170,7 +182,7 @@ superstruct = "0.8" syn = "1" sysinfo = "0.26" tempfile = "3" -tokio = { version = "1", features = ["rt-multi-thread", "sync", "signal"] } +tokio = { version = "1", features = ["rt-multi-thread", "sync", "signal", "macros"] } tokio-stream = { version = "0.1", features = ["sync"] } tokio-util = { version = "0.7", features = ["codec", "compat", "time"] } tracing = "0.1.40" @@ -190,12 +202,15 @@ zip = "0.6" account_utils = { path = "common/account_utils" } beacon_chain = { path = "beacon_node/beacon_chain" } beacon_node = { path = "beacon_node" } +beacon_node_fallback = { path = "validator_client/beacon_node_fallback" } beacon_processor = { path = "beacon_node/beacon_processor" } bls = { path = "crypto/bls" } clap_utils = { path = "common/clap_utils" } compare_fields = { path = "common/compare_fields" } deposit_contract = { path = "common/deposit_contract" } directory = { path = "common/directory" } +doppelganger_service = { path = "validator_client/doppelganger_service" } +validator_services = { path = "validator_client/validator_services" } environment = { path = "lighthouse/environment" } eth1 = { path = "beacon_node/eth1" } eth1_test_rig = { path = "testing/eth1_test_rig" } @@ -212,6 +227,7 @@ fork_choice = { path = "consensus/fork_choice" } genesis = { path = "beacon_node/genesis" } gossipsub = { path = "beacon_node/lighthouse_network/gossipsub/" } http_api = { path = "beacon_node/http_api" } +initialized_validators = { path = "validator_client/initialized_validators" } int_to_bytes = { path = "consensus/int_to_bytes" } kzg = { path = "crypto/kzg" } metrics = { path = "common/metrics" } @@ -229,18 +245,26 @@ pretty_reqwest_error = { path = "common/pretty_reqwest_error" } proto_array = { path = "consensus/proto_array" } safe_arith = { path = "consensus/safe_arith" } sensitive_url = { path = "common/sensitive_url" } +signing_method = { path = "validator_client/signing_method" } slasher = { path = "slasher", default-features = false } slashing_protection = { path = "validator_client/slashing_protection" } slot_clock = { path = "common/slot_clock" } state_processing = { path = "consensus/state_processing" } store = { path = "beacon_node/store" } swap_or_not_shuffle = { path = "consensus/swap_or_not_shuffle" } +system_health = { path = "common/system_health" } task_executor = { path = "common/task_executor" } types = { path = "consensus/types" } unused_port = { path = "common/unused_port" } validator_client = { path = "validator_client" } validator_dir = { path = "common/validator_dir" } +validator_http_api = { path = "validator_client/http_api" } +validator_http_metrics = { path = "validator_client/http_metrics" } +validator_metrics = { path = "validator_client/validator_metrics" } +validator_store= { path = "validator_client/validator_store" } warp_utils = { path = "common/warp_utils" } +xdelta3 = { git = "http://github.com/sigp/xdelta3-rs", rev = "50d63cdf1878e5cf3538e9aae5eed34a22c64e4a" } +zstd = "0.13" [profile.maxperf] inherits = "release" diff --git a/beacon_node/beacon_chain/benches/benches.rs b/beacon_node/beacon_chain/benches/benches.rs index b2f17062dc..c09af00be6 100644 --- a/beacon_node/beacon_chain/benches/benches.rs +++ b/beacon_node/beacon_chain/benches/benches.rs @@ -37,12 +37,15 @@ fn all_benches(c: &mut Criterion) { let kzg = get_kzg(&spec); for blob_count in [1, 2, 3, 6] { - let kzg = kzg.clone(); - let (signed_block, blob_sidecars) = create_test_block_and_blobs::(blob_count, &spec); + let (signed_block, blobs) = create_test_block_and_blobs::(blob_count, &spec); - let column_sidecars = - blobs_to_data_column_sidecars(&blob_sidecars, &signed_block, &kzg.clone(), &spec) - .unwrap(); + let column_sidecars = blobs_to_data_column_sidecars( + &blobs.iter().collect::>(), + &signed_block, + &kzg, + &spec, + ) + .unwrap(); let spec = spec.clone(); diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index f8dfbc5515..a78ae266e5 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -34,7 +34,6 @@ use crate::execution_payload::{get_execution_payload, NotifyExecutionLayer, Prep use crate::fork_choice_signal::{ForkChoiceSignalRx, ForkChoiceSignalTx, ForkChoiceWaitResult}; use crate::graffiti_calculator::GraffitiCalculator; use crate::head_tracker::{HeadTracker, HeadTrackerReader, SszHeadTracker}; -use crate::historical_blocks::HistoricalBlockError; use crate::light_client_finality_update_verification::{ Error as LightClientFinalityUpdateError, VerifiedLightClientFinalityUpdate, }; @@ -89,7 +88,7 @@ use kzg::Kzg; use operation_pool::{ CompactAttestationRef, OperationPool, PersistedOperationPool, ReceivedPreCapella, }; -use parking_lot::{Mutex, RwLock}; +use parking_lot::{Mutex, RwLock, RwLockWriteGuard}; use proto_array::{DoNotReOrg, ProposerHeadError}; use safe_arith::SafeArith; use slasher::Slasher; @@ -121,6 +120,7 @@ use store::{ DatabaseBlock, Error as DBError, HotColdDB, KeyValueStore, KeyValueStoreOp, StoreItem, StoreOp, }; use task_executor::{ShutdownReason, TaskExecutor}; +use tokio::sync::mpsc::Receiver; use tokio_stream::Stream; use tree_hash::TreeHash; use types::blob_sidecar::FixedBlobSidecarList; @@ -755,12 +755,10 @@ impl BeaconChain { ) -> Result> + '_, Error> { let oldest_block_slot = self.store.get_oldest_block_slot(); if start_slot < oldest_block_slot { - return Err(Error::HistoricalBlockError( - HistoricalBlockError::BlockOutOfRange { - slot: start_slot, - oldest_block_slot, - }, - )); + return Err(Error::HistoricalBlockOutOfRange { + slot: start_slot, + oldest_block_slot, + }); } let local_head = self.head_snapshot(); @@ -769,7 +767,6 @@ impl BeaconChain { start_slot, local_head.beacon_state.clone(), local_head.beacon_block_root, - &self.spec, )?; Ok(iter.map(|result| result.map_err(Into::into))) @@ -785,21 +782,18 @@ impl BeaconChain { ) -> Result> + '_, Error> { let oldest_block_slot = self.store.get_oldest_block_slot(); if start_slot < oldest_block_slot { - return Err(Error::HistoricalBlockError( - HistoricalBlockError::BlockOutOfRange { - slot: start_slot, - oldest_block_slot, - }, - )); + return Err(Error::HistoricalBlockOutOfRange { + slot: start_slot, + oldest_block_slot, + }); } self.with_head(move |head| { - let iter = self.store.forwards_block_roots_iterator_until( - start_slot, - end_slot, - || Ok((head.beacon_state.clone(), head.beacon_block_root)), - &self.spec, - )?; + let iter = + self.store + .forwards_block_roots_iterator_until(start_slot, end_slot, || { + Ok((head.beacon_state.clone(), head.beacon_block_root)) + })?; Ok(iter .map(|result| result.map_err(Into::into)) .take_while(move |result| { @@ -869,7 +863,6 @@ impl BeaconChain { start_slot, local_head.beacon_state_root(), local_head.beacon_state.clone(), - &self.spec, )?; Ok(iter.map(|result| result.map_err(Into::into))) @@ -886,12 +879,11 @@ impl BeaconChain { end_slot: Slot, ) -> Result> + '_, Error> { self.with_head(move |head| { - let iter = self.store.forwards_state_roots_iterator_until( - start_slot, - end_slot, - || Ok((head.beacon_state.clone(), head.beacon_state_root())), - &self.spec, - )?; + let iter = + self.store + .forwards_state_roots_iterator_until(start_slot, end_slot, || { + Ok((head.beacon_state.clone(), head.beacon_state_root())) + })?; Ok(iter .map(|result| result.map_err(Into::into)) .take_while(move |result| { @@ -991,7 +983,7 @@ impl BeaconChain { WhenSlotSkipped::Prev => self.block_root_at_slot_skips_prev(request_slot), } .or_else(|e| match e { - Error::HistoricalBlockError(_) => Ok(None), + Error::HistoricalBlockOutOfRange { .. } => Ok(None), e => Err(e), }) } @@ -2976,7 +2968,6 @@ impl BeaconChain { pub async fn process_gossip_blob( self: &Arc, blob: GossipVerifiedBlob, - publish_fn: impl FnOnce() -> Result<(), BlockError>, ) -> Result { let block_root = blob.block_root(); @@ -2995,17 +2986,9 @@ impl BeaconChain { return Err(BlockError::BlobNotRequired(blob.slot())); } - if let Some(event_handler) = self.event_handler.as_ref() { - if event_handler.has_blob_sidecar_subscribers() { - event_handler.register(EventKind::BlobSidecar(SseBlobSidecar::from_blob_sidecar( - blob.as_blob(), - ))); - } - } + self.emit_sse_blob_sidecar_events(&block_root, std::iter::once(blob.as_blob())); - let r = self - .check_gossip_blob_availability_and_import(blob, publish_fn) - .await; + let r = self.check_gossip_blob_availability_and_import(blob).await; self.remove_notified(&block_root, r) } @@ -3083,20 +3066,63 @@ impl BeaconChain { } } + self.emit_sse_blob_sidecar_events(&block_root, blobs.iter().flatten().map(Arc::as_ref)); + + let r = self + .check_rpc_blob_availability_and_import(slot, block_root, blobs) + .await; + self.remove_notified(&block_root, r) + } + + /// Process blobs retrieved from the EL and returns the `AvailabilityProcessingStatus`. + /// + /// `data_column_recv`: An optional receiver for `DataColumnSidecarList`. + /// If PeerDAS is enabled, this receiver will be provided and used to send + /// the `DataColumnSidecar`s once they have been successfully computed. + pub async fn process_engine_blobs( + self: &Arc, + slot: Slot, + block_root: Hash256, + blobs: FixedBlobSidecarList, + data_column_recv: Option>>, + ) -> Result { + // If this block has already been imported to forkchoice it must have been available, so + // we don't need to process its blobs again. + if self + .canonical_head + .fork_choice_read_lock() + .contains_block(&block_root) + { + return Err(BlockError::DuplicateFullyImported(block_root)); + } + + self.emit_sse_blob_sidecar_events(&block_root, blobs.iter().flatten().map(Arc::as_ref)); + + let r = self + .check_engine_blob_availability_and_import(slot, block_root, blobs, data_column_recv) + .await; + self.remove_notified(&block_root, r) + } + + fn emit_sse_blob_sidecar_events<'a, I>(self: &Arc, block_root: &Hash256, blobs_iter: I) + where + I: Iterator>, + { if let Some(event_handler) = self.event_handler.as_ref() { if event_handler.has_blob_sidecar_subscribers() { - for blob in blobs.iter().filter_map(|maybe_blob| maybe_blob.as_ref()) { + let imported_blobs = self + .data_availability_checker + .cached_blob_indexes(block_root) + .unwrap_or_default(); + let new_blobs = blobs_iter.filter(|b| !imported_blobs.contains(&b.index)); + + for blob in new_blobs { event_handler.register(EventKind::BlobSidecar( SseBlobSidecar::from_blob_sidecar(blob), )); } } } - - let r = self - .check_rpc_blob_availability_and_import(slot, block_root, blobs) - .await; - self.remove_notified(&block_root, r) } /// Cache the columns in the processing cache, process it, then evict it from the cache if it was @@ -3186,7 +3212,7 @@ impl BeaconChain { }; let r = self - .process_availability(slot, availability, || Ok(())) + .process_availability(slot, availability, None, || Ok(())) .await; self.remove_notified(&block_root, r) .map(|availability_processing_status| { @@ -3314,7 +3340,7 @@ impl BeaconChain { match executed_block { ExecutedBlock::Available(block) => { - self.import_available_block(Box::new(block)).await + self.import_available_block(Box::new(block), None).await } ExecutedBlock::AvailabilityPending(block) => { self.check_block_availability_and_import(block).await @@ -3446,7 +3472,7 @@ impl BeaconChain { let availability = self .data_availability_checker .put_pending_executed_block(block)?; - self.process_availability(slot, availability, || Ok(())) + self.process_availability(slot, availability, None, || Ok(())) .await } @@ -3455,7 +3481,6 @@ impl BeaconChain { async fn check_gossip_blob_availability_and_import( self: &Arc, blob: GossipVerifiedBlob, - publish_fn: impl FnOnce() -> Result<(), BlockError>, ) -> Result { let slot = blob.slot(); if let Some(slasher) = self.slasher.as_ref() { @@ -3463,7 +3488,7 @@ impl BeaconChain { } let availability = self.data_availability_checker.put_gossip_blob(blob)?; - self.process_availability(slot, availability, publish_fn) + self.process_availability(slot, availability, None, || Ok(())) .await } @@ -3482,16 +3507,41 @@ impl BeaconChain { } } - let availability = self.data_availability_checker.put_gossip_data_columns( - slot, - block_root, - data_columns, - )?; + let availability = self + .data_availability_checker + .put_gossip_data_columns(block_root, data_columns)?; - self.process_availability(slot, availability, publish_fn) + self.process_availability(slot, availability, None, publish_fn) .await } + fn check_blobs_for_slashability( + self: &Arc, + block_root: Hash256, + blobs: &FixedBlobSidecarList, + ) -> Result<(), BlockError> { + let mut slashable_cache = self.observed_slashable.write(); + for header in blobs + .iter() + .filter_map(|b| b.as_ref().map(|b| b.signed_block_header.clone())) + .unique() + { + if verify_header_signature::(self, &header).is_ok() { + slashable_cache + .observe_slashable( + header.message.slot, + header.message.proposer_index, + block_root, + ) + .map_err(|e| BlockError::BeaconChainError(e.into()))?; + if let Some(slasher) = self.slasher.as_ref() { + slasher.accept_block_header(header); + } + } + } + Ok(()) + } + /// Checks if the provided blobs can make any cached blocks available, and imports immediately /// if so, otherwise caches the blob in the data availability checker. async fn check_rpc_blob_availability_and_import( @@ -3500,35 +3550,28 @@ impl BeaconChain { block_root: Hash256, blobs: FixedBlobSidecarList, ) -> Result { - // Need to scope this to ensure the lock is dropped before calling `process_availability` - // Even an explicit drop is not enough to convince the borrow checker. - { - let mut slashable_cache = self.observed_slashable.write(); - for header in blobs - .iter() - .filter_map(|b| b.as_ref().map(|b| b.signed_block_header.clone())) - .unique() - { - if verify_header_signature::(self, &header).is_ok() { - slashable_cache - .observe_slashable( - header.message.slot, - header.message.proposer_index, - block_root, - ) - .map_err(|e| BlockError::BeaconChainError(e.into()))?; - if let Some(slasher) = self.slasher.as_ref() { - slasher.accept_block_header(header); - } - } - } - } - let epoch = slot.epoch(T::EthSpec::slots_per_epoch()); + self.check_blobs_for_slashability(block_root, &blobs)?; let availability = self .data_availability_checker - .put_rpc_blobs(block_root, epoch, blobs)?; + .put_rpc_blobs(block_root, blobs)?; - self.process_availability(slot, availability, || Ok(())) + self.process_availability(slot, availability, None, || Ok(())) + .await + } + + async fn check_engine_blob_availability_and_import( + self: &Arc, + slot: Slot, + block_root: Hash256, + blobs: FixedBlobSidecarList, + data_column_recv: Option>>, + ) -> Result { + self.check_blobs_for_slashability(block_root, &blobs)?; + let availability = self + .data_availability_checker + .put_engine_blobs(block_root, blobs)?; + + self.process_availability(slot, availability, data_column_recv, || Ok(())) .await } @@ -3564,13 +3607,11 @@ impl BeaconChain { // This slot value is purely informative for the consumers of // `AvailabilityProcessingStatus::MissingComponents` to log an error with a slot. - let availability = self.data_availability_checker.put_rpc_custody_columns( - block_root, - slot.epoch(T::EthSpec::slots_per_epoch()), - custody_columns, - )?; + let availability = self + .data_availability_checker + .put_rpc_custody_columns(block_root, custody_columns)?; - self.process_availability(slot, availability, || Ok(())) + self.process_availability(slot, availability, None, || Ok(())) .await } @@ -3582,13 +3623,14 @@ impl BeaconChain { self: &Arc, slot: Slot, availability: Availability, + recv: Option>>, publish_fn: impl FnOnce() -> Result<(), BlockError>, ) -> Result { match availability { Availability::Available(block) => { publish_fn()?; // Block is fully available, import into fork choice - self.import_available_block(block).await + self.import_available_block(block, recv).await } Availability::MissingComponents(block_root) => Ok( AvailabilityProcessingStatus::MissingComponents(slot, block_root), @@ -3599,6 +3641,7 @@ impl BeaconChain { pub async fn import_available_block( self: &Arc, block: Box>, + data_column_recv: Option>>, ) -> Result { let AvailableExecutedBlock { block, @@ -3640,6 +3683,7 @@ impl BeaconChain { parent_block, parent_eth1_finalization_data, consensus_context, + data_column_recv, ) }, "payload_verification_handle", @@ -3678,6 +3722,7 @@ impl BeaconChain { parent_block: SignedBlindedBeaconBlock, parent_eth1_finalization_data: Eth1FinalizationData, mut consensus_context: ConsensusContext, + data_column_recv: Option>>, ) -> Result { // ----------------------------- BLOCK NOT YET ATTESTABLE ---------------------------------- // Everything in this initial section is on the hot path between processing the block and @@ -3823,7 +3868,6 @@ impl BeaconChain { // state if we returned early without committing. In other words, an error here would // corrupt the node's database permanently. // ----------------------------------------------------------------------------------------- - self.import_block_update_shuffling_cache(block_root, &mut state); self.import_block_observe_attestations( block, @@ -3840,15 +3884,53 @@ impl BeaconChain { ); self.import_block_update_slasher(block, &state, &mut consensus_context); - let db_write_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_DB_WRITE); - // Store the block and its state, and execute the confirmation batch for the intermediate // states, which will delete their temporary flags. // If the write fails, revert fork choice to the version from disk, else we can // end up with blocks in fork choice that are missing from disk. // See https://github.com/sigp/lighthouse/issues/2028 let (_, signed_block, blobs, data_columns) = signed_block.deconstruct(); + // TODO(das) we currently store all subnet sampled columns. Tracking issue to exclude non + // custody columns: https://github.com/sigp/lighthouse/issues/6465 + let custody_columns_count = self.data_availability_checker.get_sampling_column_count(); + // if block is made available via blobs, dropped the data columns. + let data_columns = data_columns.filter(|columns| columns.len() == custody_columns_count); + + let data_columns = match (data_columns, data_column_recv) { + // If the block was made available via custody columns received from gossip / rpc, use them + // since we already have them. + (Some(columns), _) => Some(columns), + // Otherwise, it means blobs were likely available via fetching from EL, in this case we + // wait for the data columns to be computed (blocking). + (None, Some(mut data_column_recv)) => { + let _column_recv_timer = + metrics::start_timer(&metrics::BLOCK_PROCESSING_DATA_COLUMNS_WAIT); + // Unable to receive data columns from sender, sender is either dropped or + // failed to compute data columns from blobs. We restore fork choice here and + // return to avoid inconsistency in database. + if let Some(columns) = data_column_recv.blocking_recv() { + Some(columns) + } else { + let err_msg = "Did not receive data columns from sender"; + error!( + self.log, + "Failed to store data columns into the database"; + "msg" => "Restoring fork choice from disk", + "error" => err_msg, + ); + return Err(self + .handle_import_block_db_write_error(fork_choice) + .err() + .unwrap_or(BlockError::InternalError(err_msg.to_string()))); + } + } + // No data columns present and compute data columns task was not spawned. + // Could either be no blobs in the block or before PeerDAS activation. + (None, None) => None, + }; + let block = signed_block.message(); + let db_write_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_DB_WRITE); ops.extend( confirmed_state_roots .into_iter() @@ -3890,33 +3972,10 @@ impl BeaconChain { "msg" => "Restoring fork choice from disk", "error" => ?e, ); - - // Clear the early attester cache to prevent attestations which we would later be unable - // to verify due to the failure. - self.early_attester_cache.clear(); - - // Since the write failed, try to revert the canonical head back to what was stored - // in the database. This attempts to prevent inconsistency between the database and - // fork choice. - if let Err(e) = self.canonical_head.restore_from_store( - fork_choice, - ResetPayloadStatuses::always_reset_conditionally( - self.config.always_reset_payload_statuses, - ), - &self.store, - &self.spec, - &self.log, - ) { - crit!( - self.log, - "No stored fork choice found to restore from"; - "error" => ?e, - "warning" => "The database is likely corrupt now, consider --purge-db" - ); - return Err(BlockError::BeaconChainError(e)); - } - - return Err(e.into()); + return Err(self + .handle_import_block_db_write_error(fork_choice) + .err() + .unwrap_or(e.into())); } drop(txn_lock); @@ -3984,6 +4043,41 @@ impl BeaconChain { Ok(block_root) } + fn handle_import_block_db_write_error( + &self, + // We don't actually need this value, however it's always present when we call this function + // and it needs to be dropped to prevent a dead-lock. Requiring it to be passed here is + // defensive programming. + fork_choice_write_lock: RwLockWriteGuard>, + ) -> Result<(), BlockError> { + // Clear the early attester cache to prevent attestations which we would later be unable + // to verify due to the failure. + self.early_attester_cache.clear(); + + // Since the write failed, try to revert the canonical head back to what was stored + // in the database. This attempts to prevent inconsistency between the database and + // fork choice. + if let Err(e) = self.canonical_head.restore_from_store( + fork_choice_write_lock, + ResetPayloadStatuses::always_reset_conditionally( + self.config.always_reset_payload_statuses, + ), + &self.store, + &self.spec, + &self.log, + ) { + crit!( + self.log, + "No stored fork choice found to restore from"; + "error" => ?e, + "warning" => "The database is likely corrupt now, consider --purge-db" + ); + Err(BlockError::BeaconChainError(e)) + } else { + Ok(()) + } + } + /// Check block's consistentency with any configured weak subjectivity checkpoint. fn check_block_against_weak_subjectivity_checkpoint( &self, diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 743748a76d..6c87deb826 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -1,5 +1,6 @@ use derivative::Derivative; use slot_clock::SlotClock; +use std::marker::PhantomData; use std::sync::Arc; use crate::beacon_chain::{BeaconChain, BeaconChainTypes}; @@ -8,11 +9,11 @@ use crate::block_verification::{ BlockSlashInfo, }; use crate::kzg_utils::{validate_blob, validate_blobs}; +use crate::observed_data_sidecars::{DoNotObserve, ObservationStrategy, Observe}; use crate::{metrics, BeaconChainError}; use kzg::{Error as KzgError, Kzg, KzgCommitment}; use slog::debug; use ssz_derive::{Decode, Encode}; -use ssz_types::VariableList; use std::time::Duration; use tree_hash::TreeHash; use types::blob_sidecar::BlobIdentifier; @@ -156,20 +157,16 @@ impl From for GossipBlobError { } } -pub type GossipVerifiedBlobList = VariableList< - GossipVerifiedBlob, - <::EthSpec as EthSpec>::MaxBlobsPerBlock, ->; - /// A wrapper around a `BlobSidecar` that indicates it has been approved for re-gossiping on /// the p2p network. #[derive(Debug)] -pub struct GossipVerifiedBlob { +pub struct GossipVerifiedBlob { block_root: Hash256, blob: KzgVerifiedBlob, + _phantom: PhantomData, } -impl GossipVerifiedBlob { +impl GossipVerifiedBlob { pub fn new( blob: Arc>, subnet_id: u64, @@ -178,7 +175,7 @@ impl GossipVerifiedBlob { let header = blob.signed_block_header.clone(); // We only process slashing info if the gossip verification failed // since we do not process the blob any further in that case. - validate_blob_sidecar_for_gossip(blob, subnet_id, chain).map_err(|e| { + validate_blob_sidecar_for_gossip::(blob, subnet_id, chain).map_err(|e| { process_block_slash_info::<_, GossipBlobError>( chain, BlockSlashInfo::from_early_error_blob(header, e), @@ -195,6 +192,7 @@ impl GossipVerifiedBlob { blob, seen_timestamp: Duration::from_secs(0), }, + _phantom: PhantomData, } } pub fn id(&self) -> BlobIdentifier { @@ -335,6 +333,25 @@ impl KzgVerifiedBlobList { verified_blobs: blobs, }) } + + /// Create a `KzgVerifiedBlobList` from `blobs` that are already KZG verified. + /// + /// This should be used with caution, as used incorrectly it could result in KZG verification + /// being skipped and invalid blobs being deemed valid. + pub fn from_verified>>>( + blobs: I, + seen_timestamp: Duration, + ) -> Self { + Self { + verified_blobs: blobs + .into_iter() + .map(|blob| KzgVerifiedBlob { + blob, + seen_timestamp, + }) + .collect(), + } + } } impl IntoIterator for KzgVerifiedBlobList { @@ -364,11 +381,11 @@ where validate_blobs::(kzg, commitments.as_slice(), blobs, proofs.as_slice()) } -pub fn validate_blob_sidecar_for_gossip( +pub fn validate_blob_sidecar_for_gossip( blob_sidecar: Arc>, subnet: u64, chain: &BeaconChain, -) -> Result, GossipBlobError> { +) -> Result, GossipBlobError> { let blob_slot = blob_sidecar.slot(); let blob_index = blob_sidecar.index; let block_parent_root = blob_sidecar.block_parent_root(); @@ -568,16 +585,45 @@ pub fn validate_blob_sidecar_for_gossip( ) .map_err(|e| GossipBlobError::BeaconChainError(e.into()))?; + if O::observe() { + observe_gossip_blob(&kzg_verified_blob.blob, chain)?; + } + + Ok(GossipVerifiedBlob { + block_root, + blob: kzg_verified_blob, + _phantom: PhantomData, + }) +} + +impl GossipVerifiedBlob { + pub fn observe( + self, + chain: &BeaconChain, + ) -> Result, GossipBlobError> { + observe_gossip_blob(&self.blob.blob, chain)?; + Ok(GossipVerifiedBlob { + block_root: self.block_root, + blob: self.blob, + _phantom: PhantomData, + }) + } +} + +fn observe_gossip_blob( + blob_sidecar: &BlobSidecar, + chain: &BeaconChain, +) -> Result<(), GossipBlobError> { // Now the signature is valid, store the proposal so we don't accept another blob sidecar - // with the same `BlobIdentifier`. - // It's important to double-check that the proposer still hasn't been observed so we don't - // have a race-condition when verifying two blocks simultaneously. + // with the same `BlobIdentifier`. It's important to double-check that the proposer still + // hasn't been observed so we don't have a race-condition when verifying two blocks + // simultaneously. // - // Note: If this BlobSidecar goes on to fail full verification, we do not evict it from the seen_cache - // as alternate blob_sidecars for the same identifier can still be retrieved - // over rpc. Evicting them from this cache would allow faster propagation over gossip. So we allow - // retrieval of potentially valid blocks over rpc, but try to punish the proposer for signing - // invalid messages. Issue for more background + // Note: If this BlobSidecar goes on to fail full verification, we do not evict it from the + // seen_cache as alternate blob_sidecars for the same identifier can still be retrieved over + // rpc. Evicting them from this cache would allow faster propagation over gossip. So we + // allow retrieval of potentially valid blocks over rpc, but try to punish the proposer for + // signing invalid messages. Issue for more background // https://github.com/ethereum/consensus-specs/issues/3261 if chain .observed_blob_sidecars @@ -586,16 +632,12 @@ pub fn validate_blob_sidecar_for_gossip( .map_err(|e| GossipBlobError::BeaconChainError(e.into()))? { return Err(GossipBlobError::RepeatBlob { - proposer: proposer_index as u64, - slot: blob_slot, - index: blob_index, + proposer: blob_sidecar.block_proposer_index(), + slot: blob_sidecar.slot(), + index: blob_sidecar.index, }); } - - Ok(GossipVerifiedBlob { - block_root, - blob: kzg_verified_blob, - }) + Ok(()) } /// Returns the canonical root of the given `blob`. diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 527462ab64..3ae19430aa 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -683,7 +683,7 @@ pub struct SignatureVerifiedBlock { consensus_context: ConsensusContext, } -/// Used to await the result of executing payload with a remote EE. +/// Used to await the result of executing payload with an EE. type PayloadVerificationHandle = JoinHandle>>; /// A wrapper around a `SignedBeaconBlock` that indicates that this block is fully verified and @@ -750,7 +750,8 @@ pub fn build_blob_data_column_sidecars( &metrics::DATA_COLUMN_SIDECAR_COMPUTATION, &[&blobs.len().to_string()], ); - let sidecars = blobs_to_data_column_sidecars(&blobs, block, &chain.kzg, &chain.spec) + let blob_refs = blobs.iter().collect::>(); + let sidecars = blobs_to_data_column_sidecars(&blob_refs, block, &chain.kzg, &chain.spec) .discard_timer_on_break(&mut timer)?; drop(timer); Ok(sidecars) @@ -838,9 +839,6 @@ impl GossipVerifiedBlock { let block_root = get_block_header_root(block_header); - // Disallow blocks that conflict with the anchor (weak subjectivity checkpoint), if any. - check_block_against_anchor_slot(block.message(), chain)?; - // Do not gossip a block from a finalized slot. check_block_against_finalized_slot(block.message(), block_root, chain)?; @@ -1073,9 +1071,6 @@ impl SignatureVerifiedBlock { .fork_name(&chain.spec) .map_err(BlockError::InconsistentFork)?; - // Check the anchor slot before loading the parent, to avoid spurious lookups. - check_block_against_anchor_slot(block.message(), chain)?; - let (mut parent, block) = load_parent(block, chain)?; let state = cheap_state_advance_to_obtain_committees::<_, BlockError>( @@ -1343,7 +1338,6 @@ impl ExecutionPendingBlock { /* * Perform cursory checks to see if the block is even worth processing. */ - check_block_relevancy(block.as_block(), block_root, chain)?; // Define a future that will verify the execution payload with an execution engine. @@ -1688,19 +1682,6 @@ impl ExecutionPendingBlock { } } -/// Returns `Ok(())` if the block's slot is greater than the anchor block's slot (if any). -fn check_block_against_anchor_slot( - block: BeaconBlockRef<'_, T::EthSpec>, - chain: &BeaconChain, -) -> Result<(), BlockError> { - if let Some(anchor_slot) = chain.store.get_anchor_slot() { - if block.slot() <= anchor_slot { - return Err(BlockError::WeakSubjectivityConflict); - } - } - Ok(()) -} - /// Returns `Ok(())` if the block is later than the finalized slot on `chain`. /// /// Returns an error if the block is earlier or equal to the finalized slot, or there was an error diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 5f1e94fc8c..589db0af50 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -363,6 +363,10 @@ where store .put_block(&beacon_block_root, beacon_block.clone()) .map_err(|e| format!("Failed to store genesis block: {:?}", e))?; + store + .store_frozen_block_root_at_skip_slots(Slot::new(0), Slot::new(1), beacon_block_root) + .and_then(|ops| store.cold_db.do_atomically(ops)) + .map_err(|e| format!("Failed to store genesis block root: {e:?}"))?; // Store the genesis block under the `ZERO_HASH` key. store diff --git a/beacon_node/beacon_chain/src/chain_config.rs b/beacon_node/beacon_chain/src/chain_config.rs index 20edfbf31a..b8a607c886 100644 --- a/beacon_node/beacon_chain/src/chain_config.rs +++ b/beacon_node/beacon_chain/src/chain_config.rs @@ -88,6 +88,12 @@ pub struct ChainConfig { pub malicious_withhold_count: usize, /// Enable peer sampling on blocks. pub enable_sampling: bool, + /// Number of batches that the node splits blobs or data columns into during publication. + /// This doesn't apply if the node is the block proposer. For PeerDAS only. + pub blob_publication_batches: usize, + /// The delay in milliseconds applied by the node between sending each blob or data column batch. + /// This doesn't apply if the node is the block proposer. + pub blob_publication_batch_interval: Duration, } impl Default for ChainConfig { @@ -121,6 +127,8 @@ impl Default for ChainConfig { enable_light_client_server: false, malicious_withhold_count: 0, enable_sampling: false, + blob_publication_batches: 4, + blob_publication_batch_interval: Duration::from_millis(300), } } } diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 047764d705..72806a74d2 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -18,7 +18,7 @@ use task_executor::TaskExecutor; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{ BlobSidecarList, ChainSpec, DataColumnIdentifier, DataColumnSidecar, DataColumnSidecarList, - Epoch, EthSpec, Hash256, RuntimeVariableList, SignedBeaconBlock, Slot, + Epoch, EthSpec, Hash256, RuntimeVariableList, SignedBeaconBlock, }; mod error; @@ -146,6 +146,10 @@ impl DataAvailabilityChecker { self.availability_cache.sampling_column_count() } + pub(crate) fn is_supernode(&self) -> bool { + self.get_sampling_column_count() == self.spec.number_of_columns + } + /// Checks if the block root is currenlty in the availability cache awaiting import because /// of missing components. pub fn get_execution_valid_block( @@ -201,7 +205,6 @@ impl DataAvailabilityChecker { pub fn put_rpc_blobs( &self, block_root: Hash256, - epoch: Epoch, blobs: FixedBlobSidecarList, ) -> Result, AvailabilityCheckError> { let seen_timestamp = self @@ -212,15 +215,12 @@ impl DataAvailabilityChecker { // Note: currently not reporting which specific blob is invalid because we fetch all blobs // from the same peer for both lookup and range sync. - let verified_blobs = KzgVerifiedBlobList::new( - Vec::from(blobs).into_iter().flatten(), - &self.kzg, - seen_timestamp, - ) - .map_err(AvailabilityCheckError::InvalidBlobs)?; + let verified_blobs = + KzgVerifiedBlobList::new(blobs.iter().flatten().cloned(), &self.kzg, seen_timestamp) + .map_err(AvailabilityCheckError::InvalidBlobs)?; self.availability_cache - .put_kzg_verified_blobs(block_root, epoch, verified_blobs, &self.log) + .put_kzg_verified_blobs(block_root, verified_blobs, &self.log) } /// Put a list of custody columns received via RPC into the availability cache. This performs KZG @@ -229,7 +229,6 @@ impl DataAvailabilityChecker { pub fn put_rpc_custody_columns( &self, block_root: Hash256, - epoch: Epoch, custody_columns: DataColumnSidecarList, ) -> Result, AvailabilityCheckError> { // TODO(das): report which column is invalid for proper peer scoring @@ -248,12 +247,32 @@ impl DataAvailabilityChecker { self.availability_cache.put_kzg_verified_data_columns( block_root, - epoch, verified_custody_columns, &self.log, ) } + /// Put a list of blobs received from the EL pool into the availability cache. + /// + /// This DOES NOT perform KZG verification because the KZG proofs should have been constructed + /// immediately prior to calling this function so they are assumed to be valid. + pub fn put_engine_blobs( + &self, + block_root: Hash256, + blobs: FixedBlobSidecarList, + ) -> Result, AvailabilityCheckError> { + let seen_timestamp = self + .slot_clock + .now_duration() + .ok_or(AvailabilityCheckError::SlotClockError)?; + + let verified_blobs = + KzgVerifiedBlobList::from_verified(blobs.iter().flatten().cloned(), seen_timestamp); + + self.availability_cache + .put_kzg_verified_blobs(block_root, verified_blobs, &self.log) + } + /// Check if we've cached other blobs for this block. If it completes a set and we also /// have a block cached, return the `Availability` variant triggering block import. /// Otherwise cache the blob sidecar. @@ -265,7 +284,6 @@ impl DataAvailabilityChecker { ) -> Result, AvailabilityCheckError> { self.availability_cache.put_kzg_verified_blobs( gossip_blob.block_root(), - gossip_blob.epoch(), vec![gossip_blob.into_inner()], &self.log, ) @@ -279,12 +297,9 @@ impl DataAvailabilityChecker { #[allow(clippy::type_complexity)] pub fn put_gossip_data_columns( &self, - slot: Slot, block_root: Hash256, gossip_data_columns: Vec>, ) -> Result, AvailabilityCheckError> { - let epoch = slot.epoch(T::EthSpec::slots_per_epoch()); - let custody_columns = gossip_data_columns .into_iter() .map(|c| KzgVerifiedCustodyDataColumn::from_asserted_custody(c.into_inner())) @@ -292,7 +307,6 @@ impl DataAvailabilityChecker { self.availability_cache.put_kzg_verified_data_columns( block_root, - epoch, custody_columns, &self.log, ) @@ -595,12 +609,7 @@ impl DataAvailabilityChecker { ); self.availability_cache - .put_kzg_verified_data_columns( - *block_root, - slot.epoch(T::EthSpec::slots_per_epoch()), - data_columns_to_publish.clone(), - &self.log, - ) + .put_kzg_verified_data_columns(*block_root, data_columns_to_publish.clone(), &self.log) .map(|availability| { DataColumnReconstructionResult::Success(( availability, diff --git a/beacon_node/beacon_chain/src/data_availability_checker/error.rs b/beacon_node/beacon_chain/src/data_availability_checker/error.rs index dbfa00e6e2..cfdb3cfe91 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/error.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/error.rs @@ -10,7 +10,6 @@ pub enum Error { blob_commitment: KzgCommitment, block_commitment: KzgCommitment, }, - UnableToDetermineImportRequirement, Unexpected, SszTypes(ssz_types::Error), MissingBlobs, @@ -44,7 +43,6 @@ impl Error { | Error::Unexpected | Error::ParentStateMissing(_) | Error::BlockReplayError(_) - | Error::UnableToDetermineImportRequirement | Error::RebuildingStateCaches(_) | Error::SlotClockError => ErrorCategory::Internal, Error::InvalidBlobs { .. } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 6d4636e8ed..40361574af 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -10,7 +10,7 @@ use crate::BeaconChainTypes; use lru::LruCache; use parking_lot::RwLock; use slog::{debug, Logger}; -use ssz_types::{FixedVector, VariableList}; +use ssz_types::FixedVector; use std::num::NonZeroUsize; use std::sync::Arc; use types::blob_sidecar::BlobIdentifier; @@ -34,11 +34,6 @@ pub struct PendingComponents { pub reconstruction_started: bool, } -pub enum BlockImportRequirement { - AllBlobs, - ColumnSampling(usize), -} - impl PendingComponents { /// Returns an immutable reference to the cached block. pub fn get_cached_block(&self) -> &Option> { @@ -199,63 +194,49 @@ impl PendingComponents { /// /// Returns `true` if both the block exists and the number of received blobs / custody columns /// matches the number of expected blobs / custody columns. - pub fn is_available( - &self, - block_import_requirement: &BlockImportRequirement, - log: &Logger, - ) -> bool { + pub fn is_available(&self, custody_column_count: usize, log: &Logger) -> bool { let block_kzg_commitments_count_opt = self.block_kzg_commitments_count(); + let expected_blobs_msg = block_kzg_commitments_count_opt + .as_ref() + .map(|num| num.to_string()) + .unwrap_or("unknown".to_string()); - match block_import_requirement { - BlockImportRequirement::AllBlobs => { - let received_blobs = self.num_received_blobs(); - let expected_blobs_msg = block_kzg_commitments_count_opt - .as_ref() - .map(|num| num.to_string()) - .unwrap_or("unknown".to_string()); - - debug!(log, - "Component(s) added to data availability checker"; - "block_root" => ?self.block_root, - "received_block" => block_kzg_commitments_count_opt.is_some(), - "received_blobs" => received_blobs, - "expected_blobs" => expected_blobs_msg, - ); - - block_kzg_commitments_count_opt.map_or(false, |num_expected_blobs| { - num_expected_blobs == received_blobs - }) + // No data columns when there are 0 blobs + let expected_columns_opt = block_kzg_commitments_count_opt.map(|blob_count| { + if blob_count > 0 { + custody_column_count + } else { + 0 } - BlockImportRequirement::ColumnSampling(num_expected_columns) => { - // No data columns when there are 0 blobs - let expected_columns_opt = block_kzg_commitments_count_opt.map(|blob_count| { - if blob_count > 0 { - *num_expected_columns - } else { - 0 - } - }); + }); + let expected_columns_msg = expected_columns_opt + .as_ref() + .map(|num| num.to_string()) + .unwrap_or("unknown".to_string()); - let expected_columns_msg = expected_columns_opt - .as_ref() - .map(|num| num.to_string()) - .unwrap_or("unknown".to_string()); + let num_received_blobs = self.num_received_blobs(); + let num_received_columns = self.num_received_data_columns(); - let num_received_columns = self.num_received_data_columns(); + debug!( + log, + "Component(s) added to data availability checker"; + "block_root" => ?self.block_root, + "received_blobs" => num_received_blobs, + "expected_blobs" => expected_blobs_msg, + "received_columns" => num_received_columns, + "expected_columns" => expected_columns_msg, + ); - debug!(log, - "Component(s) added to data availability checker"; - "block_root" => ?self.block_root, - "received_block" => block_kzg_commitments_count_opt.is_some(), - "received_columns" => num_received_columns, - "expected_columns" => expected_columns_msg, - ); + let all_blobs_received = block_kzg_commitments_count_opt + .map_or(false, |num_expected_blobs| { + num_expected_blobs == num_received_blobs + }); - expected_columns_opt.map_or(false, |num_expected_columns| { - num_expected_columns == num_received_columns - }) - } - } + let all_columns_received = expected_columns_opt.map_or(false, |num_expected_columns| { + num_expected_columns == num_received_columns + }); + + all_blobs_received || all_columns_received } /// Returns an empty `PendingComponents` object with the given block root. @@ -277,7 +258,6 @@ impl PendingComponents { /// reconstructed from disk. Ensure you are not holding any write locks while calling this. pub fn make_available( self, - block_import_requirement: BlockImportRequirement, spec: &Arc, recover: R, ) -> Result, AvailabilityCheckError> @@ -304,26 +284,25 @@ impl PendingComponents { return Err(AvailabilityCheckError::Unexpected); }; - let (blobs, data_columns) = match block_import_requirement { - BlockImportRequirement::AllBlobs => { - let num_blobs_expected = diet_executed_block.num_blobs_expected(); - let Some(verified_blobs) = verified_blobs - .into_iter() - .map(|b| b.map(|b| b.to_blob())) - .take(num_blobs_expected) - .collect::>>() - else { - return Err(AvailabilityCheckError::Unexpected); - }; - (Some(VariableList::new(verified_blobs)?), None) - } - BlockImportRequirement::ColumnSampling(_) => { - let verified_data_columns = verified_data_columns - .into_iter() - .map(|d| d.into_inner()) - .collect(); - (None, Some(verified_data_columns)) - } + let is_peer_das_enabled = spec.is_peer_das_enabled_for_epoch(diet_executed_block.epoch()); + let (blobs, data_columns) = if is_peer_das_enabled { + let data_columns = verified_data_columns + .into_iter() + .map(|d| d.into_inner()) + .collect::>(); + (None, Some(data_columns)) + } else { + let num_blobs_expected = diet_executed_block.num_blobs_expected(); + let Some(verified_blobs) = verified_blobs + .into_iter() + .map(|b| b.map(|b| b.to_blob())) + .take(num_blobs_expected) + .collect::>>() + .map(Into::into) + else { + return Err(AvailabilityCheckError::Unexpected); + }; + (Some(verified_blobs), None) }; let executed_block = recover(diet_executed_block)?; @@ -475,24 +454,9 @@ impl DataAvailabilityCheckerInner { f(self.critical.read().peek(block_root)) } - fn block_import_requirement( - &self, - epoch: Epoch, - ) -> Result { - let peer_das_enabled = self.spec.is_peer_das_enabled_for_epoch(epoch); - if peer_das_enabled { - Ok(BlockImportRequirement::ColumnSampling( - self.sampling_column_count, - )) - } else { - Ok(BlockImportRequirement::AllBlobs) - } - } - pub fn put_kzg_verified_blobs>>( &self, block_root: Hash256, - epoch: Epoch, kzg_verified_blobs: I, log: &Logger, ) -> Result, AvailabilityCheckError> { @@ -515,12 +479,11 @@ impl DataAvailabilityCheckerInner { // Merge in the blobs. pending_components.merge_blobs(fixed_blobs); - let block_import_requirement = self.block_import_requirement(epoch)?; - if pending_components.is_available(&block_import_requirement, log) { + if pending_components.is_available(self.sampling_column_count, log) { write_lock.put(block_root, pending_components.clone()); // No need to hold the write lock anymore drop(write_lock); - pending_components.make_available(block_import_requirement, &self.spec, |diet_block| { + pending_components.make_available(&self.spec, |diet_block| { self.state_cache.recover_pending_executed_block(diet_block) }) } else { @@ -535,7 +498,6 @@ impl DataAvailabilityCheckerInner { >( &self, block_root: Hash256, - epoch: Epoch, kzg_verified_data_columns: I, log: &Logger, ) -> Result, AvailabilityCheckError> { @@ -550,13 +512,11 @@ impl DataAvailabilityCheckerInner { // Merge in the data columns. pending_components.merge_data_columns(kzg_verified_data_columns)?; - let block_import_requirement = self.block_import_requirement(epoch)?; - - if pending_components.is_available(&block_import_requirement, log) { + if pending_components.is_available(self.sampling_column_count, log) { write_lock.put(block_root, pending_components.clone()); // No need to hold the write lock anymore drop(write_lock); - pending_components.make_available(block_import_requirement, &self.spec, |diet_block| { + pending_components.make_available(&self.spec, |diet_block| { self.state_cache.recover_pending_executed_block(diet_block) }) } else { @@ -625,7 +585,6 @@ impl DataAvailabilityCheckerInner { ) -> Result, AvailabilityCheckError> { let mut write_lock = self.critical.write(); let block_root = executed_block.import_data.block_root; - let epoch = executed_block.block.epoch(); // register the block to get the diet block let diet_executed_block = self @@ -642,12 +601,11 @@ impl DataAvailabilityCheckerInner { pending_components.merge_block(diet_executed_block); // Check if we have all components and entire set is consistent. - let block_import_requirement = self.block_import_requirement(epoch)?; - if pending_components.is_available(&block_import_requirement, log) { + if pending_components.is_available(self.sampling_column_count, log) { write_lock.put(block_root, pending_components.clone()); // No need to hold the write lock anymore drop(write_lock); - pending_components.make_available(block_import_requirement, &self.spec, |diet_block| { + pending_components.make_available(&self.spec, |diet_block| { self.state_cache.recover_pending_executed_block(diet_block) }) } else { @@ -703,6 +661,7 @@ impl DataAvailabilityCheckerInner { #[cfg(test)] mod test { use super::*; + use crate::{ blob_verification::GossipVerifiedBlob, block_verification::PayloadVerificationOutcome, @@ -712,6 +671,7 @@ mod test { test_utils::{BaseHarnessType, BeaconChainHarness, DiskHarnessType}, }; use fork_choice::PayloadVerificationStatus; + use logging::test_logger; use slog::{info, Logger}; use state_processing::ConsensusContext; @@ -931,7 +891,6 @@ mod test { let (pending_block, blobs) = availability_pending_block(&harness).await; let root = pending_block.import_data.block_root; - let epoch = pending_block.block.epoch(); let blobs_expected = pending_block.num_blobs_expected(); assert_eq!( @@ -980,7 +939,7 @@ mod test { for (blob_index, gossip_blob) in blobs.into_iter().enumerate() { kzg_verified_blobs.push(gossip_blob.into_inner()); let availability = cache - .put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone(), harness.logger()) + .put_kzg_verified_blobs(root, kzg_verified_blobs.clone(), harness.logger()) .expect("should put blob"); if blob_index == blobs_expected - 1 { assert!(matches!(availability, Availability::Available(_))); @@ -1002,12 +961,11 @@ mod test { "should have expected number of blobs" ); let root = pending_block.import_data.block_root; - let epoch = pending_block.block.epoch(); let mut kzg_verified_blobs = vec![]; for gossip_blob in blobs { kzg_verified_blobs.push(gossip_blob.into_inner()); let availability = cache - .put_kzg_verified_blobs(root, epoch, kzg_verified_blobs.clone(), harness.logger()) + .put_kzg_verified_blobs(root, kzg_verified_blobs.clone(), harness.logger()) .expect("should put blob"); assert_eq!( availability, diff --git a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs index 03e3289118..5b9b7c7023 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs @@ -57,6 +57,11 @@ impl DietAvailabilityPendingExecutedBlock { .cloned() .unwrap_or_default() } + + /// Returns the epoch corresponding to `self.slot()`. + pub fn epoch(&self) -> Epoch { + self.block.slot().epoch(E::slots_per_epoch()) + } } /// This LRU cache holds BeaconStates used for block import. If the cache overflows, diff --git a/beacon_node/beacon_chain/src/data_column_verification.rs b/beacon_node/beacon_chain/src/data_column_verification.rs index a4e83b2751..6cfd26786a 100644 --- a/beacon_node/beacon_chain/src/data_column_verification.rs +++ b/beacon_node/beacon_chain/src/data_column_verification.rs @@ -3,6 +3,7 @@ use crate::block_verification::{ BlockSlashInfo, }; use crate::kzg_utils::{reconstruct_data_columns, validate_data_columns}; +use crate::observed_data_sidecars::{ObservationStrategy, Observe}; use crate::{metrics, BeaconChain, BeaconChainError, BeaconChainTypes}; use derivative::Derivative; use fork_choice::ProtoBlock; @@ -13,6 +14,7 @@ use slog::debug; use slot_clock::SlotClock; use ssz_derive::{Decode, Encode}; use std::iter; +use std::marker::PhantomData; use std::sync::Arc; use types::data_column_sidecar::{ColumnIndex, DataColumnIdentifier}; use types::{ @@ -160,17 +162,16 @@ impl From for GossipDataColumnError { } } -pub type GossipVerifiedDataColumnList = RuntimeVariableList>; - /// A wrapper around a `DataColumnSidecar` that indicates it has been approved for re-gossiping on /// the p2p network. #[derive(Debug)] -pub struct GossipVerifiedDataColumn { +pub struct GossipVerifiedDataColumn { block_root: Hash256, data_column: KzgVerifiedDataColumn, + _phantom: PhantomData, } -impl GossipVerifiedDataColumn { +impl GossipVerifiedDataColumn { pub fn new( column_sidecar: Arc>, subnet_id: u64, @@ -179,12 +180,14 @@ impl GossipVerifiedDataColumn { let header = column_sidecar.signed_block_header.clone(); // We only process slashing info if the gossip verification failed // since we do not process the data column any further in that case. - validate_data_column_sidecar_for_gossip(column_sidecar, subnet_id, chain).map_err(|e| { - process_block_slash_info::<_, GossipDataColumnError>( - chain, - BlockSlashInfo::from_early_error_data_column(header, e), - ) - }) + validate_data_column_sidecar_for_gossip::(column_sidecar, subnet_id, chain).map_err( + |e| { + process_block_slash_info::<_, GossipDataColumnError>( + chain, + BlockSlashInfo::from_early_error_data_column(header, e), + ) + }, + ) } pub fn id(&self) -> DataColumnIdentifier { @@ -375,11 +378,11 @@ where Ok(()) } -pub fn validate_data_column_sidecar_for_gossip( +pub fn validate_data_column_sidecar_for_gossip( data_column: Arc>, subnet: u64, chain: &BeaconChain, -) -> Result, GossipDataColumnError> { +) -> Result, GossipDataColumnError> { let column_slot = data_column.slot(); verify_data_column_sidecar(&data_column, &chain.spec)?; verify_index_matches_subnet(&data_column, subnet, &chain.spec)?; @@ -404,9 +407,14 @@ pub fn validate_data_column_sidecar_for_gossip( ) .map_err(|e| GossipDataColumnError::BeaconChainError(e.into()))?; + if O::observe() { + observe_gossip_data_column(&kzg_verified_data_column.data, chain)?; + } + Ok(GossipVerifiedDataColumn { block_root: data_column.block_root(), data_column: kzg_verified_data_column, + _phantom: PhantomData, }) } @@ -648,11 +656,42 @@ fn verify_sidecar_not_from_future_slot( Ok(()) } +pub fn observe_gossip_data_column( + data_column_sidecar: &DataColumnSidecar, + chain: &BeaconChain, +) -> Result<(), GossipDataColumnError> { + // Now the signature is valid, store the proposal so we don't accept another data column sidecar + // with the same `DataColumnIdentifier`. It's important to double-check that the proposer still + // hasn't been observed so we don't have a race-condition when verifying two blocks + // simultaneously. + // + // Note: If this DataColumnSidecar goes on to fail full verification, we do not evict it from the + // seen_cache as alternate data_column_sidecars for the same identifier can still be retrieved over + // rpc. Evicting them from this cache would allow faster propagation over gossip. So we + // allow retrieval of potentially valid blocks over rpc, but try to punish the proposer for + // signing invalid messages. Issue for more background + // https://github.com/ethereum/consensus-specs/issues/3261 + if chain + .observed_column_sidecars + .write() + .observe_sidecar(data_column_sidecar) + .map_err(|e| GossipDataColumnError::BeaconChainError(e.into()))? + { + return Err(GossipDataColumnError::PriorKnown { + proposer: data_column_sidecar.block_proposer_index(), + slot: data_column_sidecar.slot(), + index: data_column_sidecar.index, + }); + } + Ok(()) +} + #[cfg(test)] mod test { use crate::data_column_verification::{ validate_data_column_sidecar_for_gossip, GossipDataColumnError, }; + use crate::observed_data_sidecars::Observe; use crate::test_utils::BeaconChainHarness; use types::{DataColumnSidecar, EthSpec, ForkName, MainnetEthSpec}; @@ -691,8 +730,11 @@ mod test { .unwrap(), }; - let result = - validate_data_column_sidecar_for_gossip(column_sidecar.into(), index, &harness.chain); + let result = validate_data_column_sidecar_for_gossip::<_, Observe>( + column_sidecar.into(), + index, + &harness.chain, + ); assert!(matches!( result.err(), Some(GossipDataColumnError::UnexpectedDataColumn) diff --git a/beacon_node/beacon_chain/src/errors.rs b/beacon_node/beacon_chain/src/errors.rs index a26d755316..2a8fd4cd01 100644 --- a/beacon_node/beacon_chain/src/errors.rs +++ b/beacon_node/beacon_chain/src/errors.rs @@ -4,7 +4,6 @@ use crate::beacon_chain::ForkChoiceError; use crate::beacon_fork_choice_store::Error as ForkChoiceStoreError; use crate::data_availability_checker::AvailabilityCheckError; use crate::eth1_chain::Error as Eth1ChainError; -use crate::historical_blocks::HistoricalBlockError; use crate::migrate::PruningError; use crate::naive_aggregation_pool::Error as NaiveAggregationError; use crate::observed_aggregates::Error as ObservedAttestationsError; @@ -123,7 +122,11 @@ pub enum BeaconChainError { block_slot: Slot, state_slot: Slot, }, - HistoricalBlockError(HistoricalBlockError), + /// Block is not available (only returned when fetching historic blocks). + HistoricalBlockOutOfRange { + slot: Slot, + oldest_block_slot: Slot, + }, InvalidStateForShuffling { state_epoch: Epoch, shuffling_epoch: Epoch, @@ -245,7 +248,6 @@ easy_from_to!(BlockSignatureVerifierError, BeaconChainError); easy_from_to!(PruningError, BeaconChainError); easy_from_to!(ArithError, BeaconChainError); easy_from_to!(ForkChoiceStoreError, BeaconChainError); -easy_from_to!(HistoricalBlockError, BeaconChainError); easy_from_to!(StateAdvanceError, BeaconChainError); easy_from_to!(BlockReplayError, BeaconChainError); easy_from_to!(InconsistentFork, BeaconChainError); diff --git a/beacon_node/beacon_chain/src/fetch_blobs.rs b/beacon_node/beacon_chain/src/fetch_blobs.rs new file mode 100644 index 0000000000..f740b693fb --- /dev/null +++ b/beacon_node/beacon_chain/src/fetch_blobs.rs @@ -0,0 +1,308 @@ +//! This module implements an optimisation to fetch blobs via JSON-RPC from the EL. +//! If a blob has already been seen in the public mempool, then it is often unnecessary to wait for +//! it to arrive on P2P gossip. This PR uses a new JSON-RPC method (`engine_getBlobsV1`) which +//! allows the CL to load the blobs quickly from the EL's blob pool. +//! +//! Once the node fetches the blobs from EL, it then publishes the remaining blobs that it hasn't seen +//! on P2P gossip to the network. From PeerDAS onwards, together with the increase in blob count, +//! broadcasting blobs requires a much higher bandwidth, and is only done by high capacity +//! supernodes. +use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob}; +use crate::kzg_utils::blobs_to_data_column_sidecars; +use crate::observed_data_sidecars::DoNotObserve; +use crate::{metrics, AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError}; +use execution_layer::json_structures::BlobAndProofV1; +use execution_layer::Error as ExecutionLayerError; +use metrics::{inc_counter, inc_counter_by, TryExt}; +use slog::{debug, error, o, Logger}; +use ssz_types::FixedVector; +use state_processing::per_block_processing::deneb::kzg_commitment_to_versioned_hash; +use std::sync::Arc; +use tokio::sync::mpsc::Receiver; +use types::blob_sidecar::{BlobSidecarError, FixedBlobSidecarList}; +use types::{ + BeaconStateError, BlobSidecar, DataColumnSidecar, DataColumnSidecarList, EthSpec, FullPayload, + Hash256, SignedBeaconBlock, SignedBeaconBlockHeader, +}; + +pub enum BlobsOrDataColumns { + Blobs(Vec>), + DataColumns(DataColumnSidecarList), +} + +#[derive(Debug)] +pub enum FetchEngineBlobError { + BeaconStateError(BeaconStateError), + BlobProcessingError(BlockError), + BlobSidecarError(BlobSidecarError), + ExecutionLayerMissing, + InternalError(String), + GossipBlob(GossipBlobError), + RequestFailed(ExecutionLayerError), + RuntimeShutdown, +} + +/// Fetches blobs from the EL mempool and processes them. It also broadcasts unseen blobs or +/// data columns (PeerDAS onwards) to the network, using the supplied `publish_fn`. +pub async fn fetch_and_process_engine_blobs( + chain: Arc>, + block_root: Hash256, + block: Arc>>, + publish_fn: impl Fn(BlobsOrDataColumns) + Send + 'static, +) -> Result, FetchEngineBlobError> { + let block_root_str = format!("{:?}", block_root); + let log = chain + .log + .new(o!("service" => "fetch_engine_blobs", "block_root" => block_root_str)); + + let versioned_hashes = if let Some(kzg_commitments) = block + .message() + .body() + .blob_kzg_commitments() + .ok() + .filter(|blobs| !blobs.is_empty()) + { + kzg_commitments + .iter() + .map(kzg_commitment_to_versioned_hash) + .collect::>() + } else { + debug!( + log, + "Fetch blobs not triggered - none required"; + ); + return Ok(None); + }; + + let num_expected_blobs = versioned_hashes.len(); + + let execution_layer = chain + .execution_layer + .as_ref() + .ok_or(FetchEngineBlobError::ExecutionLayerMissing)?; + + debug!( + log, + "Fetching blobs from the EL"; + "num_expected_blobs" => num_expected_blobs, + ); + let response = execution_layer + .get_blobs(versioned_hashes) + .await + .map_err(FetchEngineBlobError::RequestFailed)?; + + if response.is_empty() { + debug!( + log, + "No blobs fetched from the EL"; + "num_expected_blobs" => num_expected_blobs, + ); + inc_counter(&metrics::BLOBS_FROM_EL_MISS_TOTAL); + return Ok(None); + } else { + inc_counter(&metrics::BLOBS_FROM_EL_HIT_TOTAL); + } + + let (signed_block_header, kzg_commitments_proof) = block + .signed_block_header_and_kzg_commitments_proof() + .map_err(FetchEngineBlobError::BeaconStateError)?; + + let fixed_blob_sidecar_list = build_blob_sidecars( + &block, + response, + signed_block_header, + &kzg_commitments_proof, + )?; + + let num_fetched_blobs = fixed_blob_sidecar_list + .iter() + .filter(|b| b.is_some()) + .count(); + + inc_counter_by( + &metrics::BLOBS_FROM_EL_EXPECTED_TOTAL, + num_expected_blobs as u64, + ); + inc_counter_by( + &metrics::BLOBS_FROM_EL_RECEIVED_TOTAL, + num_fetched_blobs as u64, + ); + + // Gossip verify blobs before publishing. This prevents blobs with invalid KZG proofs from + // the EL making it into the data availability checker. We do not immediately add these + // blobs to the observed blobs/columns cache because we want to allow blobs/columns to arrive on gossip + // and be accepted (and propagated) while we are waiting to publish. Just before publishing + // we will observe the blobs/columns and only proceed with publishing if they are not yet seen. + let blobs_to_import_and_publish = fixed_blob_sidecar_list + .iter() + .filter_map(|opt_blob| { + let blob = opt_blob.as_ref()?; + match GossipVerifiedBlob::::new(blob.clone(), blob.index, &chain) { + Ok(verified) => Some(Ok(verified)), + // Ignore already seen blobs. + Err(GossipBlobError::RepeatBlob { .. }) => None, + Err(e) => Some(Err(e)), + } + }) + .collect::, _>>() + .map_err(FetchEngineBlobError::GossipBlob)?; + + let peer_das_enabled = chain.spec.is_peer_das_enabled_for_epoch(block.epoch()); + + let data_columns_receiver_opt = if peer_das_enabled { + // Partial blobs response isn't useful for PeerDAS, so we don't bother building and publishing data columns. + if num_fetched_blobs != num_expected_blobs { + debug!( + log, + "Not all blobs fetched from the EL"; + "info" => "Unable to compute data columns", + "num_fetched_blobs" => num_fetched_blobs, + "num_expected_blobs" => num_expected_blobs, + ); + return Ok(None); + } + + let data_columns_receiver = spawn_compute_and_publish_data_columns_task( + &chain, + block.clone(), + fixed_blob_sidecar_list.clone(), + publish_fn, + log.clone(), + ); + + Some(data_columns_receiver) + } else { + if !blobs_to_import_and_publish.is_empty() { + publish_fn(BlobsOrDataColumns::Blobs(blobs_to_import_and_publish)); + } + + None + }; + + debug!( + log, + "Processing engine blobs"; + "num_fetched_blobs" => num_fetched_blobs, + ); + + let availability_processing_status = chain + .process_engine_blobs( + block.slot(), + block_root, + fixed_blob_sidecar_list.clone(), + data_columns_receiver_opt, + ) + .await + .map_err(FetchEngineBlobError::BlobProcessingError)?; + + Ok(Some(availability_processing_status)) +} + +/// Spawn a blocking task here for long computation tasks, so it doesn't block processing, and it +/// allows blobs / data columns to propagate without waiting for processing. +/// +/// An `mpsc::Sender` is then used to send the produced data columns to the `beacon_chain` for it +/// to be persisted, **after** the block is made attestable. +/// +/// The reason for doing this is to make the block available and attestable as soon as possible, +/// while maintaining the invariant that block and data columns are persisted atomically. +fn spawn_compute_and_publish_data_columns_task( + chain: &Arc>, + block: Arc>>, + blobs: FixedBlobSidecarList, + publish_fn: impl Fn(BlobsOrDataColumns) + Send + 'static, + log: Logger, +) -> Receiver>>> { + let chain_cloned = chain.clone(); + let (data_columns_sender, data_columns_receiver) = tokio::sync::mpsc::channel(1); + + chain.task_executor.spawn_blocking( + move || { + let mut timer = metrics::start_timer_vec( + &metrics::DATA_COLUMN_SIDECAR_COMPUTATION, + &[&blobs.len().to_string()], + ); + let blob_refs = blobs + .iter() + .filter_map(|b| b.as_ref().map(|b| &b.blob)) + .collect::>(); + let data_columns_result = blobs_to_data_column_sidecars( + &blob_refs, + &block, + &chain_cloned.kzg, + &chain_cloned.spec, + ) + .discard_timer_on_break(&mut timer); + drop(timer); + + let all_data_columns = match data_columns_result { + Ok(d) => d, + Err(e) => { + error!( + log, + "Failed to build data column sidecars from blobs"; + "error" => ?e + ); + return; + } + }; + + if let Err(e) = data_columns_sender.try_send(all_data_columns.clone()) { + error!(log, "Failed to send computed data columns"; "error" => ?e); + }; + + // Check indices from cache before sending the columns, to make sure we don't + // publish components already seen on gossip. + let is_supernode = chain_cloned.data_availability_checker.is_supernode(); + + // At the moment non supernodes are not required to publish any columns. + // TODO(das): we could experiment with having full nodes publish their custodied + // columns here. + if !is_supernode { + return; + } + + publish_fn(BlobsOrDataColumns::DataColumns(all_data_columns)); + }, + "compute_and_publish_data_columns", + ); + + data_columns_receiver +} + +fn build_blob_sidecars( + block: &Arc>>, + response: Vec>>, + signed_block_header: SignedBeaconBlockHeader, + kzg_commitments_inclusion_proof: &FixedVector, +) -> Result, FetchEngineBlobError> { + let mut fixed_blob_sidecar_list = FixedBlobSidecarList::default(); + for (index, blob_and_proof) in response + .into_iter() + .enumerate() + .filter_map(|(i, opt_blob)| Some((i, opt_blob?))) + { + match BlobSidecar::new_with_existing_proof( + index, + blob_and_proof.blob, + block, + signed_block_header.clone(), + kzg_commitments_inclusion_proof, + blob_and_proof.proof, + ) { + Ok(blob) => { + if let Some(blob_mut) = fixed_blob_sidecar_list.get_mut(index) { + *blob_mut = Some(Arc::new(blob)); + } else { + return Err(FetchEngineBlobError::InternalError(format!( + "Blobs from EL contains blob with invalid index {index}" + ))); + } + } + Err(e) => { + return Err(FetchEngineBlobError::BlobSidecarError(e)); + } + } + } + Ok(fixed_blob_sidecar_list) +} diff --git a/beacon_node/beacon_chain/src/historical_blocks.rs b/beacon_node/beacon_chain/src/historical_blocks.rs index a23b6ddc1e..ddae54f464 100644 --- a/beacon_node/beacon_chain/src/historical_blocks.rs +++ b/beacon_node/beacon_chain/src/historical_blocks.rs @@ -1,5 +1,5 @@ use crate::data_availability_checker::AvailableBlock; -use crate::{errors::BeaconChainError as Error, metrics, BeaconChain, BeaconChainTypes}; +use crate::{metrics, BeaconChain, BeaconChainTypes}; use itertools::Itertools; use slog::debug; use state_processing::{ @@ -10,7 +10,11 @@ use std::borrow::Cow; use std::iter; use std::time::Duration; use store::metadata::DataColumnInfo; -use store::{chunked_vector::BlockRoots, AnchorInfo, BlobInfo, ChunkWriter, KeyValueStore}; +use store::{ + get_key_for_col, AnchorInfo, BlobInfo, DBColumn, Error as StoreError, KeyValueStore, + KeyValueStoreOp, +}; +use strum::IntoStaticStr; use types::{FixedBytesExtended, Hash256, Slot}; /// Use a longer timeout on the pubkey cache. @@ -18,10 +22,8 @@ use types::{FixedBytesExtended, Hash256, Slot}; /// It's ok if historical sync is stalled due to writes from forwards block processing. const PUBKEY_CACHE_LOCK_TIMEOUT: Duration = Duration::from_secs(30); -#[derive(Debug)] +#[derive(Debug, IntoStaticStr)] pub enum HistoricalBlockError { - /// Block is not available (only returned when fetching historic blocks). - BlockOutOfRange { slot: Slot, oldest_block_slot: Slot }, /// Block root mismatch, caller should retry with different blocks. MismatchedBlockRoot { block_root: Hash256, @@ -33,10 +35,16 @@ pub enum HistoricalBlockError { InvalidSignature, /// Transitory error, caller should retry with the same blocks. ValidatorPubkeyCacheTimeout, - /// No historical sync needed. - NoAnchorInfo, /// Logic error: should never occur. IndexOutOfBounds, + /// Internal store error + StoreError(StoreError), +} + +impl From for HistoricalBlockError { + fn from(e: StoreError) -> Self { + Self::StoreError(e) + } } impl BeaconChain { @@ -61,11 +69,8 @@ impl BeaconChain { pub fn import_historical_block_batch( &self, mut blocks: Vec>, - ) -> Result { - let anchor_info = self - .store - .get_anchor_info() - .ok_or(HistoricalBlockError::NoAnchorInfo)?; + ) -> Result { + let anchor_info = self.store.get_anchor_info(); let blob_info = self.store.get_blob_info(); let data_column_info = self.store.get_data_column_info(); @@ -109,8 +114,6 @@ impl BeaconChain { let mut expected_block_root = anchor_info.oldest_block_parent; let mut prev_block_slot = anchor_info.oldest_block_slot; - let mut chunk_writer = - ChunkWriter::::new(&self.store.cold_db, prev_block_slot.as_usize())?; let mut new_oldest_blob_slot = blob_info.oldest_blob_slot; let mut new_oldest_data_column_slot = data_column_info.oldest_data_column_slot; @@ -127,8 +130,7 @@ impl BeaconChain { return Err(HistoricalBlockError::MismatchedBlockRoot { block_root, expected_block_root, - } - .into()); + }); } let blinded_block = block.clone_as_blinded(); @@ -149,8 +151,11 @@ impl BeaconChain { } // Store block roots, including at all skip slots in the freezer DB. - for slot in (block.slot().as_usize()..prev_block_slot.as_usize()).rev() { - chunk_writer.set(slot, block_root, &mut cold_batch)?; + for slot in (block.slot().as_u64()..prev_block_slot.as_u64()).rev() { + cold_batch.push(KeyValueStoreOp::PutKeyValue( + get_key_for_col(DBColumn::BeaconBlockRoots.into(), &slot.to_be_bytes()), + block_root.as_slice().to_vec(), + )); } prev_block_slot = block.slot(); @@ -162,15 +167,17 @@ impl BeaconChain { // completion. if expected_block_root == self.genesis_block_root { let genesis_slot = self.spec.genesis_slot; - for slot in genesis_slot.as_usize()..prev_block_slot.as_usize() { - chunk_writer.set(slot, self.genesis_block_root, &mut cold_batch)?; + for slot in genesis_slot.as_u64()..prev_block_slot.as_u64() { + cold_batch.push(KeyValueStoreOp::PutKeyValue( + get_key_for_col(DBColumn::BeaconBlockRoots.into(), &slot.to_be_bytes()), + self.genesis_block_root.as_slice().to_vec(), + )); } prev_block_slot = genesis_slot; expected_block_root = Hash256::zero(); break; } } - chunk_writer.write(&mut cold_batch)?; // these were pushed in reverse order so we reverse again signed_blocks.reverse(); @@ -212,7 +219,7 @@ impl BeaconChain { let verify_timer = metrics::start_timer(&metrics::BACKFILL_SIGNATURE_VERIFY_TIMES); if !signature_set.verify() { - return Err(HistoricalBlockError::InvalidSignature.into()); + return Err(HistoricalBlockError::InvalidSignature); } drop(verify_timer); drop(sig_timer); @@ -262,7 +269,7 @@ impl BeaconChain { let backfill_complete = new_anchor.block_backfill_complete(self.genesis_backfill_slot); anchor_and_blob_batch.push( self.store - .compare_and_set_anchor_info(Some(anchor_info), Some(new_anchor))?, + .compare_and_set_anchor_info(anchor_info, new_anchor)?, ); self.store.hot_db.do_atomically(anchor_and_blob_batch)?; diff --git a/beacon_node/beacon_chain/src/kzg_utils.rs b/beacon_node/beacon_chain/src/kzg_utils.rs index 91c1098f81..1680c0298d 100644 --- a/beacon_node/beacon_chain/src/kzg_utils.rs +++ b/beacon_node/beacon_chain/src/kzg_utils.rs @@ -7,8 +7,8 @@ use std::sync::Arc; use types::beacon_block_body::KzgCommitments; use types::data_column_sidecar::{Cell, DataColumn, DataColumnSidecarError}; use types::{ - Blob, BlobsList, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, - Hash256, KzgCommitment, KzgProof, KzgProofs, SignedBeaconBlock, SignedBeaconBlockHeader, + Blob, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, Hash256, + KzgCommitment, KzgProof, KzgProofs, SignedBeaconBlock, SignedBeaconBlockHeader, }; /// Converts a blob ssz List object to an array to be used with the kzg @@ -146,7 +146,7 @@ pub fn verify_kzg_proof( /// Build data column sidecars from a signed beacon block and its blobs. pub fn blobs_to_data_column_sidecars( - blobs: &BlobsList, + blobs: &[&Blob], block: &SignedBeaconBlock, kzg: &Kzg, spec: &ChainSpec, @@ -154,6 +154,7 @@ pub fn blobs_to_data_column_sidecars( if blobs.is_empty() { return Ok(vec![]); } + let kzg_commitments = block .message() .body() @@ -312,19 +313,21 @@ mod test { #[track_caller] fn test_build_data_columns_empty(kzg: &Kzg, spec: &ChainSpec) { let num_of_blobs = 0; - let (signed_block, blob_sidecars) = create_test_block_and_blobs::(num_of_blobs, spec); + let (signed_block, blobs) = create_test_block_and_blobs::(num_of_blobs, spec); + let blob_refs = blobs.iter().collect::>(); let column_sidecars = - blobs_to_data_column_sidecars(&blob_sidecars, &signed_block, kzg, spec).unwrap(); + blobs_to_data_column_sidecars(&blob_refs, &signed_block, kzg, spec).unwrap(); assert!(column_sidecars.is_empty()); } #[track_caller] fn test_build_data_columns(kzg: &Kzg, spec: &ChainSpec) { let num_of_blobs = 6; - let (signed_block, blob_sidecars) = create_test_block_and_blobs::(num_of_blobs, spec); + let (signed_block, blobs) = create_test_block_and_blobs::(num_of_blobs, spec); + let blob_refs = blobs.iter().collect::>(); let column_sidecars = - blobs_to_data_column_sidecars(&blob_sidecars, &signed_block, kzg, spec).unwrap(); + blobs_to_data_column_sidecars(&blob_refs, &signed_block, kzg, spec).unwrap(); let block_kzg_commitments = signed_block .message() @@ -358,9 +361,10 @@ mod test { #[track_caller] fn test_reconstruct_data_columns(kzg: &Kzg, spec: &ChainSpec) { let num_of_blobs = 6; - let (signed_block, blob_sidecars) = create_test_block_and_blobs::(num_of_blobs, spec); + let (signed_block, blobs) = create_test_block_and_blobs::(num_of_blobs, spec); + let blob_refs = blobs.iter().collect::>(); let column_sidecars = - blobs_to_data_column_sidecars(&blob_sidecars, &signed_block, kzg, spec).unwrap(); + blobs_to_data_column_sidecars(&blob_refs, &signed_block, kzg, spec).unwrap(); // Now reconstruct let reconstructed_columns = reconstruct_data_columns( diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index b89c00e0af..2953516fb1 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -28,6 +28,7 @@ pub mod eth1_chain; mod eth1_finalization_cache; pub mod events; pub mod execution_payload; +pub mod fetch_blobs; pub mod fork_choice_signal; pub mod fork_revert; pub mod graffiti_calculator; @@ -43,7 +44,7 @@ mod naive_aggregation_pool; pub mod observed_aggregates; mod observed_attesters; pub mod observed_block_producers; -mod observed_data_sidecars; +pub mod observed_data_sidecars; pub mod observed_operations; mod observed_slashable; pub mod otb_verification_service; diff --git a/beacon_node/beacon_chain/src/light_client_server_cache.rs b/beacon_node/beacon_chain/src/light_client_server_cache.rs index e0ddd8c882..78442d8df0 100644 --- a/beacon_node/beacon_chain/src/light_client_server_cache.rs +++ b/beacon_node/beacon_chain/src/light_client_server_cache.rs @@ -85,6 +85,7 @@ impl LightClientServerCache { log: &Logger, chain_spec: &ChainSpec, ) -> Result<(), BeaconChainError> { + metrics::inc_counter(&metrics::LIGHT_CLIENT_SERVER_CACHE_PROCESSING_REQUESTS); let _timer = metrics::start_timer(&metrics::LIGHT_CLIENT_SERVER_CACHE_RECOMPUTE_UPDATES_TIMES); @@ -205,6 +206,7 @@ impl LightClientServerCache { *self.latest_light_client_update.write() = Some(new_light_client_update); } + metrics::inc_counter(&metrics::LIGHT_CLIENT_SERVER_CACHE_PROCESSING_SUCCESSES); Ok(()) } @@ -280,6 +282,11 @@ impl LightClientServerCache { let (sync_committee_bytes, light_client_update_bytes) = res?; let sync_committee_period = u64::from_ssz_bytes(&sync_committee_bytes) .map_err(store::errors::Error::SszDecodeError)?; + + if sync_committee_period >= start_period + count { + break; + } + let epoch = sync_committee_period .safe_mul(chain_spec.epochs_per_sync_committee_period.into())?; @@ -290,10 +297,6 @@ impl LightClientServerCache { .map_err(store::errors::Error::SszDecodeError)?; light_client_updates.push(light_client_update); - - if sync_committee_period >= start_period + count { - break; - } } Ok(light_client_updates) } diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index f73775d678..c6aa9fbcac 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -111,6 +111,13 @@ pub static BLOCK_PROCESSING_POST_EXEC_PROCESSING: LazyLock> = linear_buckets(5e-3, 5e-3, 10), ) }); +pub static BLOCK_PROCESSING_DATA_COLUMNS_WAIT: LazyLock> = LazyLock::new(|| { + try_create_histogram_with_buckets( + "beacon_block_processing_data_columns_wait_seconds", + "Time spent waiting for data columns to be computed before starting database write", + exponential_buckets(0.01, 2.0, 10), + ) +}); pub static BLOCK_PROCESSING_DB_WRITE: LazyLock> = LazyLock::new(|| { try_create_histogram( "beacon_block_processing_db_write_seconds", @@ -1691,6 +1698,34 @@ pub static DATA_COLUMNS_SIDECAR_PROCESSING_SUCCESSES: LazyLock> = LazyLock::new(|| { + try_create_int_counter( + "beacon_blobs_from_el_hit_total", + "Number of blob batches fetched from the execution layer", + ) +}); + +pub static BLOBS_FROM_EL_MISS_TOTAL: LazyLock> = LazyLock::new(|| { + try_create_int_counter( + "beacon_blobs_from_el_miss_total", + "Number of blob batches failed to fetch from the execution layer", + ) +}); + +pub static BLOBS_FROM_EL_EXPECTED_TOTAL: LazyLock> = LazyLock::new(|| { + try_create_int_counter( + "beacon_blobs_from_el_expected_total", + "Number of blobs expected from the execution layer", + ) +}); + +pub static BLOBS_FROM_EL_RECEIVED_TOTAL: LazyLock> = LazyLock::new(|| { + try_create_int_counter( + "beacon_blobs_from_el_received_total", + "Number of blobs fetched from the execution layer", + ) +}); + /* * Light server message verification */ @@ -1937,6 +1972,22 @@ pub static LIGHT_CLIENT_SERVER_CACHE_PREV_BLOCK_CACHE_MISS: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "beacon_light_client_server_cache_processing_requests", + "Count of all requests to recompute and cache updates", + ) + }); + +pub static LIGHT_CLIENT_SERVER_CACHE_PROCESSING_SUCCESSES: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "beacon_light_client_server_cache_processing_successes", + "Count of all successful requests to recompute and cache updates", + ) + }); + /// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot, /// head state info, etc) and update the Prometheus `DEFAULT_REGISTRY`. pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { @@ -1953,6 +2004,7 @@ pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { let attestation_stats = beacon_chain.op_pool.attestation_stats(); let chain_metrics = beacon_chain.metrics(); + // Kept duplicated for backwards compatibility set_gauge_by_usize( &BLOCK_PROCESSING_SNAPSHOT_CACHE_SIZE, beacon_chain.store.state_cache_len(), @@ -2016,6 +2068,8 @@ pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { .canonical_head .fork_choice_read_lock() .scrape_for_metrics(); + + beacon_chain.store.register_metrics(); } /// Scrape the given `state` assuming it's the head state, updating the `DEFAULT_REGISTRY`. diff --git a/beacon_node/beacon_chain/src/migrate.rs b/beacon_node/beacon_chain/src/migrate.rs index f83df7b446..37a2e8917b 100644 --- a/beacon_node/beacon_chain/src/migrate.rs +++ b/beacon_node/beacon_chain/src/migrate.rs @@ -24,6 +24,10 @@ const MAX_COMPACTION_PERIOD_SECONDS: u64 = 604800; const MIN_COMPACTION_PERIOD_SECONDS: u64 = 7200; /// Compact after a large finality gap, if we respect `MIN_COMPACTION_PERIOD_SECONDS`. const COMPACTION_FINALITY_DISTANCE: u64 = 1024; +/// Maximum number of blocks applied in each reconstruction burst. +/// +/// This limits the amount of time that the finalization migration is paused for. +const BLOCKS_PER_RECONSTRUCTION: usize = 8192 * 4; /// Default number of epochs to wait between finalization migrations. pub const DEFAULT_EPOCHS_PER_MIGRATION: u64 = 1; @@ -188,7 +192,9 @@ impl, Cold: ItemStore> BackgroundMigrator, Cold: ItemStore> BackgroundMigrator>, log: &Logger) { - if let Err(e) = db.reconstruct_historic_states() { - error!( - log, - "State reconstruction failed"; - "error" => ?e, - ); + pub fn run_reconstruction( + db: Arc>, + opt_tx: Option>, + log: &Logger, + ) { + match db.reconstruct_historic_states(Some(BLOCKS_PER_RECONSTRUCTION)) { + Ok(()) => { + // Schedule another reconstruction batch if required and we have access to the + // channel for requeueing. + if let Some(tx) = opt_tx { + if !db.get_anchor_info().all_historic_states_stored() { + if let Err(e) = tx.send(Notification::Reconstruction) { + error!( + log, + "Unable to requeue reconstruction notification"; + "error" => ?e + ); + } + } + } + } + Err(e) => { + error!( + log, + "State reconstruction failed"; + "error" => ?e, + ); + } } } @@ -388,6 +415,7 @@ impl, Cold: ItemStore> BackgroundMigrator (mpsc::Sender, thread::JoinHandle<()>) { let (tx, rx) = mpsc::channel(); + let inner_tx = tx.clone(); let thread = thread::spawn(move || { while let Ok(notif) = rx.recv() { let mut reconstruction_notif = None; @@ -418,16 +446,17 @@ impl, Cold: ItemStore> BackgroundMigrator ObservedDataSidecars { } } +/// Abstraction to control "observation" of gossip messages (currently just blobs and data columns). +/// +/// If a type returns `false` for `observe` then the message will not be immediately added to its +/// respective gossip observation cache. Unobserved messages should usually be observed later. +pub trait ObservationStrategy { + fn observe() -> bool; +} + +/// Type for messages that are observed immediately. +pub struct Observe; +/// Type for messages that have not been observed. +pub struct DoNotObserve; + +impl ObservationStrategy for Observe { + fn observe() -> bool { + true + } +} + +impl ObservationStrategy for DoNotObserve { + fn observe() -> bool { + false + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/beacon_node/beacon_chain/src/schema_change.rs b/beacon_node/beacon_chain/src/schema_change.rs index 4f7770e22c..9504901229 100644 --- a/beacon_node/beacon_chain/src/schema_change.rs +++ b/beacon_node/beacon_chain/src/schema_change.rs @@ -1,24 +1,23 @@ //! Utilities for managing database schema changes. mod migration_schema_v20; mod migration_schema_v21; +mod migration_schema_v22; use crate::beacon_chain::BeaconChainTypes; -use crate::types::ChainSpec; use slog::Logger; use std::sync::Arc; use store::hot_cold_store::{HotColdDB, HotColdDBError}; use store::metadata::{SchemaVersion, CURRENT_SCHEMA_VERSION}; use store::Error as StoreError; +use types::Hash256; /// Migrate the database from one schema version to another, applying all requisite mutations. -#[allow(clippy::only_used_in_recursion)] // spec is not used but likely to be used in future pub fn migrate_schema( db: Arc>, - deposit_contract_deploy_block: u64, + genesis_state_root: Option, from: SchemaVersion, to: SchemaVersion, log: Logger, - spec: &ChainSpec, ) -> Result<(), StoreError> { match (from, to) { // Migrating from the current schema version to itself is always OK, a no-op. @@ -26,28 +25,14 @@ pub fn migrate_schema( // Upgrade across multiple versions by recursively migrating one step at a time. (_, _) if from.as_u64() + 1 < to.as_u64() => { let next = SchemaVersion(from.as_u64() + 1); - migrate_schema::( - db.clone(), - deposit_contract_deploy_block, - from, - next, - log.clone(), - spec, - )?; - migrate_schema::(db, deposit_contract_deploy_block, next, to, log, spec) + migrate_schema::(db.clone(), genesis_state_root, from, next, log.clone())?; + migrate_schema::(db, genesis_state_root, next, to, log) } // Downgrade across multiple versions by recursively migrating one step at a time. (_, _) if to.as_u64() + 1 < from.as_u64() => { let next = SchemaVersion(from.as_u64() - 1); - migrate_schema::( - db.clone(), - deposit_contract_deploy_block, - from, - next, - log.clone(), - spec, - )?; - migrate_schema::(db, deposit_contract_deploy_block, next, to, log, spec) + migrate_schema::(db.clone(), genesis_state_root, from, next, log.clone())?; + migrate_schema::(db, genesis_state_root, next, to, log) } // @@ -69,6 +54,11 @@ pub fn migrate_schema( let ops = migration_schema_v21::downgrade_from_v21::(db.clone(), log)?; db.store_schema_version_atomically(to, ops) } + (SchemaVersion(21), SchemaVersion(22)) => { + // This migration needs to sync data between hot and cold DBs. The schema version is + // bumped inside the upgrade_to_v22 fn + migration_schema_v22::upgrade_to_v22::(db.clone(), genesis_state_root, log) + } // Anything else is an error. (_, _) => Err(HotColdDBError::UnsupportedSchemaVersion { target_version: to, diff --git a/beacon_node/beacon_chain/src/schema_change/migration_schema_v22.rs b/beacon_node/beacon_chain/src/schema_change/migration_schema_v22.rs new file mode 100644 index 0000000000..f532c0e672 --- /dev/null +++ b/beacon_node/beacon_chain/src/schema_change/migration_schema_v22.rs @@ -0,0 +1,212 @@ +use crate::beacon_chain::BeaconChainTypes; +use slog::{info, Logger}; +use std::sync::Arc; +use store::chunked_iter::ChunkedVectorIter; +use store::{ + chunked_vector::BlockRootsChunked, + get_key_for_col, + metadata::{ + SchemaVersion, ANCHOR_FOR_ARCHIVE_NODE, ANCHOR_UNINITIALIZED, STATE_UPPER_LIMIT_NO_RETAIN, + }, + partial_beacon_state::PartialBeaconState, + AnchorInfo, DBColumn, Error, HotColdDB, KeyValueStore, KeyValueStoreOp, +}; +use types::{BeaconState, Hash256, Slot}; + +const LOG_EVERY: usize = 200_000; + +fn load_old_schema_frozen_state( + db: &HotColdDB, + state_root: Hash256, +) -> Result>, Error> { + let Some(partial_state_bytes) = db + .cold_db + .get_bytes(DBColumn::BeaconState.into(), state_root.as_slice())? + else { + return Ok(None); + }; + let mut partial_state: PartialBeaconState = + PartialBeaconState::from_ssz_bytes(&partial_state_bytes, db.get_chain_spec())?; + + // Fill in the fields of the partial state. + partial_state.load_block_roots(&db.cold_db, db.get_chain_spec())?; + partial_state.load_state_roots(&db.cold_db, db.get_chain_spec())?; + partial_state.load_historical_roots(&db.cold_db, db.get_chain_spec())?; + partial_state.load_randao_mixes(&db.cold_db, db.get_chain_spec())?; + partial_state.load_historical_summaries(&db.cold_db, db.get_chain_spec())?; + + partial_state.try_into().map(Some) +} + +pub fn upgrade_to_v22( + db: Arc>, + genesis_state_root: Option, + log: Logger, +) -> Result<(), Error> { + info!(log, "Upgrading from v21 to v22"); + + let old_anchor = db.get_anchor_info(); + + // If the anchor was uninitialized in the old schema (`None`), this represents a full archive + // node. + let effective_anchor = if old_anchor == ANCHOR_UNINITIALIZED { + ANCHOR_FOR_ARCHIVE_NODE + } else { + old_anchor.clone() + }; + + let split_slot = db.get_split_slot(); + let genesis_state_root = genesis_state_root.ok_or(Error::GenesisStateUnknown)?; + + let mut cold_ops = vec![]; + + // Load the genesis state in the previous chunked format, BEFORE we go deleting or rewriting + // anything. + let mut genesis_state = load_old_schema_frozen_state::(&db, genesis_state_root)? + .ok_or(Error::MissingGenesisState)?; + let genesis_state_root = genesis_state.update_tree_hash_cache()?; + let genesis_block_root = genesis_state.get_latest_block_root(genesis_state_root); + + // Store the genesis state in the new format, prior to updating the schema version on disk. + // In case of a crash no data is lost because we will re-load it in the old format and re-do + // this write. + if split_slot > 0 { + info!( + log, + "Re-storing genesis state"; + "state_root" => ?genesis_state_root, + ); + db.store_cold_state(&genesis_state_root, &genesis_state, &mut cold_ops)?; + } + + // Write the block roots in the new format in a new column. Similar to above, we do this + // separately from deleting the old format block roots so that this is crash safe. + let oldest_block_slot = effective_anchor.oldest_block_slot; + write_new_schema_block_roots::( + &db, + genesis_block_root, + oldest_block_slot, + split_slot, + &mut cold_ops, + &log, + )?; + + // Commit this first batch of non-destructive cold database ops. + db.cold_db.do_atomically(cold_ops)?; + + // Now we update the anchor and the schema version atomically in the hot database. + // + // If we crash after commiting this change, then there will be some leftover cruft left in the + // freezer database, but no corruption because all the new-format data has already been written + // above. + let new_anchor = AnchorInfo { + state_upper_limit: STATE_UPPER_LIMIT_NO_RETAIN, + state_lower_limit: Slot::new(0), + ..effective_anchor.clone() + }; + let hot_ops = vec![db.compare_and_set_anchor_info(old_anchor, new_anchor)?]; + db.store_schema_version_atomically(SchemaVersion(22), hot_ops)?; + + // Finally, clean up the old-format data from the freezer database. + delete_old_schema_freezer_data::(&db, &log)?; + + Ok(()) +} + +pub fn delete_old_schema_freezer_data( + db: &Arc>, + log: &Logger, +) -> Result<(), Error> { + let mut cold_ops = vec![]; + + let columns = [ + DBColumn::BeaconState, + // Cold state summaries indexed by state root were stored in this column. + DBColumn::BeaconStateSummary, + // Mapping from restore point number to state root was stored in this column. + DBColumn::BeaconRestorePoint, + // Chunked vector values were stored in these columns. + DBColumn::BeaconHistoricalRoots, + DBColumn::BeaconRandaoMixes, + DBColumn::BeaconHistoricalSummaries, + DBColumn::BeaconBlockRootsChunked, + DBColumn::BeaconStateRootsChunked, + ]; + + for column in columns { + for res in db.cold_db.iter_column_keys::>(column) { + let key = res?; + cold_ops.push(KeyValueStoreOp::DeleteKey(get_key_for_col( + column.as_str(), + &key, + ))); + } + } + let delete_ops = cold_ops.len(); + + info!( + log, + "Deleting historic states"; + "delete_ops" => delete_ops, + ); + db.cold_db.do_atomically(cold_ops)?; + + // In order to reclaim space, we need to compact the freezer DB as well. + db.cold_db.compact()?; + + Ok(()) +} + +pub fn write_new_schema_block_roots( + db: &HotColdDB, + genesis_block_root: Hash256, + oldest_block_slot: Slot, + split_slot: Slot, + cold_ops: &mut Vec, + log: &Logger, +) -> Result<(), Error> { + info!( + log, + "Starting beacon block root migration"; + "oldest_block_slot" => oldest_block_slot, + "genesis_block_root" => ?genesis_block_root, + ); + + // Store the genesis block root if it would otherwise not be stored. + if oldest_block_slot != 0 { + cold_ops.push(KeyValueStoreOp::PutKeyValue( + get_key_for_col(DBColumn::BeaconBlockRoots.into(), &0u64.to_be_bytes()), + genesis_block_root.as_slice().to_vec(), + )); + } + + // Block roots are available from the `oldest_block_slot` to the `split_slot`. + let start_vindex = oldest_block_slot.as_usize(); + let block_root_iter = ChunkedVectorIter::::new( + db, + start_vindex, + split_slot, + db.get_chain_spec(), + ); + + // OK to hold these in memory (10M slots * 43 bytes per KV ~= 430 MB). + for (i, (slot, block_root)) in block_root_iter.enumerate() { + cold_ops.push(KeyValueStoreOp::PutKeyValue( + get_key_for_col( + DBColumn::BeaconBlockRoots.into(), + &(slot as u64).to_be_bytes(), + ), + block_root.as_slice().to_vec(), + )); + + if i > 0 && i % LOG_EVERY == 0 { + info!( + log, + "Beacon block root migration in progress"; + "roots_migrated" => i + ); + } + } + + Ok(()) +} diff --git a/beacon_node/beacon_chain/src/test_utils.rs b/beacon_node/beacon_chain/src/test_utils.rs index 9be3b4cc2f..093ee0c44b 100644 --- a/beacon_node/beacon_chain/src/test_utils.rs +++ b/beacon_node/beacon_chain/src/test_utils.rs @@ -2894,7 +2894,6 @@ pub fn generate_rand_block_and_blobs( (block, blob_sidecars) } -#[allow(clippy::type_complexity)] pub fn generate_rand_block_and_data_columns( fork_name: ForkName, num_blobs: NumBlobs, @@ -2902,12 +2901,12 @@ pub fn generate_rand_block_and_data_columns( spec: &ChainSpec, ) -> ( SignedBeaconBlock>, - Vec>>, + DataColumnSidecarList, ) { let kzg = get_kzg(spec); let (block, blobs) = generate_rand_block_and_blobs(fork_name, num_blobs, rng); - let blob: BlobsList = blobs.into_iter().map(|b| b.blob).collect::>().into(); - let data_columns = blobs_to_data_column_sidecars(&blob, &block, &kzg, spec).unwrap(); + let blob_refs = blobs.iter().map(|b| &b.blob).collect::>(); + let data_columns = blobs_to_data_column_sidecars(&blob_refs, &block, &kzg, spec).unwrap(); (block, data_columns) } diff --git a/beacon_node/beacon_chain/tests/block_verification.rs b/beacon_node/beacon_chain/tests/block_verification.rs index d239f5089a..f094a173ee 100644 --- a/beacon_node/beacon_chain/tests/block_verification.rs +++ b/beacon_node/beacon_chain/tests/block_verification.rs @@ -976,7 +976,7 @@ async fn block_gossip_verification() { harness .chain - .process_gossip_blob(gossip_verified, || Ok(())) + .process_gossip_blob(gossip_verified) .await .expect("should import valid gossip verified blob"); } @@ -1247,7 +1247,7 @@ async fn verify_block_for_gossip_slashing_detection() { .unwrap(); harness .chain - .process_gossip_blob(verified_blob, || Ok(())) + .process_gossip_blob(verified_blob) .await .unwrap(); } @@ -1726,7 +1726,7 @@ async fn import_execution_pending_block( .unwrap() { ExecutedBlock::Available(block) => chain - .import_available_block(Box::from(block)) + .import_available_block(Box::from(block), None) .await .map_err(|e| format!("{e:?}")), ExecutedBlock::AvailabilityPending(_) => { diff --git a/beacon_node/beacon_chain/tests/events.rs b/beacon_node/beacon_chain/tests/events.rs index 31e69f0524..ab784d3be4 100644 --- a/beacon_node/beacon_chain/tests/events.rs +++ b/beacon_node/beacon_chain/tests/events.rs @@ -35,7 +35,7 @@ async fn blob_sidecar_event_on_process_gossip_blob() { let _ = harness .chain - .process_gossip_blob(gossip_verified_blob, || Ok(())) + .process_gossip_blob(gossip_verified_blob) .await .unwrap(); diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index 119722b693..522020e476 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -25,13 +25,10 @@ use std::collections::HashSet; use std::convert::TryInto; use std::sync::{Arc, LazyLock}; use std::time::Duration; -use store::chunked_vector::Chunk; use store::metadata::{SchemaVersion, CURRENT_SCHEMA_VERSION, STATE_UPPER_LIMIT_NO_RETAIN}; use store::{ - chunked_vector::{chunk_key, Field}, - get_key_for_col, iter::{BlockRootsIterator, StateRootsIterator}, - BlobInfo, DBColumn, HotColdDB, KeyValueStore, KeyValueStoreOp, LevelDB, StoreConfig, + BlobInfo, DBColumn, HotColdDB, LevelDB, StoreConfig, }; use tempfile::{tempdir, TempDir}; use tokio::time::sleep; @@ -58,8 +55,8 @@ fn get_store_generic( config: StoreConfig, spec: ChainSpec, ) -> Arc, LevelDB>> { - let hot_path = db_path.path().join("hot_db"); - let cold_path = db_path.path().join("cold_db"); + let hot_path = db_path.path().join("chain_db"); + let cold_path = db_path.path().join("freezer_db"); let blobs_path = db_path.path().join("blobs_db"); let log = test_logger(); @@ -232,253 +229,6 @@ async fn light_client_updates_test() { assert_eq!(lc_updates.len(), 2); } -/// Tests that `store.heal_freezer_block_roots_at_split` inserts block roots between last restore point -/// slot and the split slot. -#[tokio::test] -async fn heal_freezer_block_roots_at_split() { - // chunk_size is hard-coded to 128 - let num_blocks_produced = E::slots_per_epoch() * 20; - let db_path = tempdir().unwrap(); - let store = get_store_generic( - &db_path, - StoreConfig { - slots_per_restore_point: 2 * E::slots_per_epoch(), - ..Default::default() - }, - test_spec::(), - ); - let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); - - harness - .extend_chain( - num_blocks_produced as usize, - BlockStrategy::OnCanonicalHead, - AttestationStrategy::AllValidators, - ) - .await; - - let split_slot = store.get_split_slot(); - assert_eq!(split_slot, 18 * E::slots_per_epoch()); - - // Do a heal before deleting to make sure that it doesn't break. - let last_restore_point_slot = Slot::new(16 * E::slots_per_epoch()); - store.heal_freezer_block_roots_at_split().unwrap(); - check_freezer_block_roots(&harness, last_restore_point_slot, split_slot); - - // Delete block roots between `last_restore_point_slot` and `split_slot`. - let chunk_index = >::chunk_index( - last_restore_point_slot.as_usize(), - ); - let key_chunk = get_key_for_col(DBColumn::BeaconBlockRoots.as_str(), &chunk_key(chunk_index)); - store - .cold_db - .do_atomically(vec![KeyValueStoreOp::DeleteKey(key_chunk)]) - .unwrap(); - - let block_root_err = store - .forwards_block_roots_iterator_until( - last_restore_point_slot, - last_restore_point_slot + 1, - || unreachable!(), - &harness.chain.spec, - ) - .unwrap() - .next() - .unwrap() - .unwrap_err(); - - assert!(matches!(block_root_err, store::Error::NoContinuationData)); - - // Re-insert block roots - store.heal_freezer_block_roots_at_split().unwrap(); - check_freezer_block_roots(&harness, last_restore_point_slot, split_slot); - - // Run for another two epochs to check that the invariant is maintained. - let additional_blocks_produced = 2 * E::slots_per_epoch(); - harness - .extend_slots(additional_blocks_produced as usize) - .await; - - check_finalization(&harness, num_blocks_produced + additional_blocks_produced); - check_split_slot(&harness, store); - check_chain_dump( - &harness, - num_blocks_produced + additional_blocks_produced + 1, - ); - check_iterators(&harness); -} - -/// Tests that `store.heal_freezer_block_roots` inserts block roots between last restore point -/// slot and the split slot. -#[tokio::test] -async fn heal_freezer_block_roots_with_skip_slots() { - // chunk_size is hard-coded to 128 - let num_blocks_produced = E::slots_per_epoch() * 20; - let db_path = tempdir().unwrap(); - let store = get_store_generic( - &db_path, - StoreConfig { - slots_per_restore_point: 2 * E::slots_per_epoch(), - ..Default::default() - }, - test_spec::(), - ); - let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); - - let mut current_state = harness.get_current_state(); - let state_root = current_state.canonical_root().unwrap(); - let all_validators = &harness.get_all_validators(); - harness - .add_attested_blocks_at_slots( - current_state, - state_root, - &(1..=num_blocks_produced) - .filter(|i| i % 12 != 0) - .map(Slot::new) - .collect::>(), - all_validators, - ) - .await; - - // split slot should be 18 here - let split_slot = store.get_split_slot(); - assert_eq!(split_slot, 18 * E::slots_per_epoch()); - - let last_restore_point_slot = Slot::new(16 * E::slots_per_epoch()); - let chunk_index = >::chunk_index( - last_restore_point_slot.as_usize(), - ); - let key_chunk = get_key_for_col(DBColumn::BeaconBlockRoots.as_str(), &chunk_key(chunk_index)); - store - .cold_db - .do_atomically(vec![KeyValueStoreOp::DeleteKey(key_chunk)]) - .unwrap(); - - let block_root_err = store - .forwards_block_roots_iterator_until( - last_restore_point_slot, - last_restore_point_slot + 1, - || unreachable!(), - &harness.chain.spec, - ) - .unwrap() - .next() - .unwrap() - .unwrap_err(); - - assert!(matches!(block_root_err, store::Error::NoContinuationData)); - - // heal function - store.heal_freezer_block_roots_at_split().unwrap(); - check_freezer_block_roots(&harness, last_restore_point_slot, split_slot); - - // Run for another two epochs to check that the invariant is maintained. - let additional_blocks_produced = 2 * E::slots_per_epoch(); - harness - .extend_slots(additional_blocks_produced as usize) - .await; - - check_finalization(&harness, num_blocks_produced + additional_blocks_produced); - check_split_slot(&harness, store); - check_iterators(&harness); -} - -/// Tests that `store.heal_freezer_block_roots_at_genesis` replaces 0x0 block roots between slot -/// 0 and the first non-skip slot with genesis block root. -#[tokio::test] -async fn heal_freezer_block_roots_at_genesis() { - // Run for a few epochs to ensure we're past finalization. - let num_blocks_produced = E::slots_per_epoch() * 4; - let db_path = tempdir().unwrap(); - let store = get_store(&db_path); - let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); - - // Start with 2 skip slots. - harness.advance_slot(); - harness.advance_slot(); - - harness - .extend_chain( - num_blocks_produced as usize, - BlockStrategy::OnCanonicalHead, - AttestationStrategy::AllValidators, - ) - .await; - - // Do a heal before deleting to make sure that it doesn't break. - store.heal_freezer_block_roots_at_genesis().unwrap(); - check_freezer_block_roots( - &harness, - Slot::new(0), - Epoch::new(1).end_slot(E::slots_per_epoch()), - ); - - // Write 0x0 block roots at slot 1 and slot 2. - let chunk_index = 0; - let chunk_db_key = chunk_key(chunk_index); - let mut chunk = - Chunk::::load(&store.cold_db, DBColumn::BeaconBlockRoots, &chunk_db_key) - .unwrap() - .unwrap(); - - chunk.values[1] = Hash256::zero(); - chunk.values[2] = Hash256::zero(); - - let mut ops = vec![]; - chunk - .store(DBColumn::BeaconBlockRoots, &chunk_db_key, &mut ops) - .unwrap(); - store.cold_db.do_atomically(ops).unwrap(); - - // Ensure the DB is corrupted - let block_roots = store - .forwards_block_roots_iterator_until( - Slot::new(1), - Slot::new(2), - || unreachable!(), - &harness.chain.spec, - ) - .unwrap() - .map(Result::unwrap) - .take(2) - .collect::>(); - assert_eq!( - block_roots, - vec![ - (Hash256::zero(), Slot::new(1)), - (Hash256::zero(), Slot::new(2)) - ] - ); - - // Insert genesis block roots at skip slots before first block slot - store.heal_freezer_block_roots_at_genesis().unwrap(); - check_freezer_block_roots( - &harness, - Slot::new(0), - Epoch::new(1).end_slot(E::slots_per_epoch()), - ); -} - -fn check_freezer_block_roots(harness: &TestHarness, start_slot: Slot, end_slot: Slot) { - for slot in (start_slot.as_u64()..end_slot.as_u64()).map(Slot::new) { - let (block_root, result_slot) = harness - .chain - .store - .forwards_block_roots_iterator_until(slot, slot, || unreachable!(), &harness.chain.spec) - .unwrap() - .next() - .unwrap() - .unwrap(); - assert_eq!(slot, result_slot); - let expected_block_root = harness - .chain - .block_root_at_slot(slot, WhenSlotSkipped::Prev) - .unwrap() - .unwrap(); - assert_eq!(expected_block_root, block_root); - } -} - #[tokio::test] async fn full_participation_no_skips() { let num_blocks_produced = E::slots_per_epoch() * 5; @@ -741,11 +491,12 @@ async fn epoch_boundary_state_attestation_processing() { .load_epoch_boundary_state(&block.state_root()) .expect("no error") .expect("epoch boundary state exists"); - let ebs_state_root = epoch_boundary_state.canonical_root().unwrap(); - let ebs_of_ebs = store + let ebs_state_root = epoch_boundary_state.update_tree_hash_cache().unwrap(); + let mut ebs_of_ebs = store .load_epoch_boundary_state(&ebs_state_root) .expect("no error") .expect("ebs of ebs exists"); + ebs_of_ebs.apply_pending_mutations().unwrap(); assert_eq!(epoch_boundary_state, ebs_of_ebs); // If the attestation is pre-finalization it should be rejected. @@ -807,10 +558,19 @@ async fn forwards_iter_block_and_state_roots_until() { check_finalization(&harness, num_blocks_produced); check_split_slot(&harness, store.clone()); - // The last restore point slot is the point at which the hybrid forwards iterator behaviour + // The freezer upper bound slot is the point at which the hybrid forwards iterator behaviour // changes. - let last_restore_point_slot = store.get_latest_restore_point_slot().unwrap(); - assert!(last_restore_point_slot > 0); + let block_upper_bound = store + .freezer_upper_bound_for_column(DBColumn::BeaconBlockRoots, Slot::new(0)) + .unwrap() + .unwrap(); + assert!(block_upper_bound > 0); + let state_upper_bound = store + .freezer_upper_bound_for_column(DBColumn::BeaconStateRoots, Slot::new(0)) + .unwrap() + .unwrap(); + assert!(state_upper_bound > 0); + assert_eq!(state_upper_bound, block_upper_bound); let chain = &harness.chain; let head_state = harness.get_current_state(); @@ -835,14 +595,12 @@ async fn forwards_iter_block_and_state_roots_until() { }; let split_slot = store.get_split_slot(); - assert!(split_slot > last_restore_point_slot); + assert_eq!(split_slot, block_upper_bound); - test_range(Slot::new(0), last_restore_point_slot); - test_range(last_restore_point_slot, last_restore_point_slot); - test_range(last_restore_point_slot - 1, last_restore_point_slot); - test_range(Slot::new(0), last_restore_point_slot - 1); test_range(Slot::new(0), split_slot); - test_range(last_restore_point_slot - 1, split_slot); + test_range(split_slot, split_slot); + test_range(split_slot - 1, split_slot); + test_range(Slot::new(0), split_slot - 1); test_range(Slot::new(0), head_state.slot()); } @@ -2567,7 +2325,7 @@ async fn weak_subjectivity_sync_test(slots: Vec, checkpoint_slot: Slot) { .await; let (shutdown_tx, _shutdown_rx) = futures::channel::mpsc::channel(1); - let log = test_logger(); + let log = harness.chain.logger().clone(); let temp2 = tempdir().unwrap(); let store = get_store(&temp2); let spec = test_spec::(); @@ -2669,9 +2427,7 @@ async fn weak_subjectivity_sync_test(slots: Vec, checkpoint_slot: Slot) { // Forwards iterator from 0 should fail as we lack blocks. assert!(matches!( beacon_chain.forwards_iter_block_roots(Slot::new(0)), - Err(BeaconChainError::HistoricalBlockError( - HistoricalBlockError::BlockOutOfRange { .. } - )) + Err(BeaconChainError::HistoricalBlockOutOfRange { .. }) )); // Simulate processing of a `StatusMessage` with an older finalized epoch by calling @@ -2739,7 +2495,7 @@ async fn weak_subjectivity_sync_test(slots: Vec, checkpoint_slot: Slot) { beacon_chain .import_historical_block_batch(batch_with_invalid_first_block) .unwrap_err(), - BeaconChainError::HistoricalBlockError(HistoricalBlockError::InvalidSignature) + HistoricalBlockError::InvalidSignature )); // Importing the batch with valid signatures should succeed. @@ -2794,11 +2550,11 @@ async fn weak_subjectivity_sync_test(slots: Vec, checkpoint_slot: Slot) { } // Anchor slot is still set to the slot of the checkpoint block. - assert_eq!(store.get_anchor_slot(), Some(wss_block.slot())); + assert_eq!(store.get_anchor_info().anchor_slot, wss_block.slot()); // Reconstruct states. - store.clone().reconstruct_historic_states().unwrap(); - assert_eq!(store.get_anchor_slot(), None); + store.clone().reconstruct_historic_states(None).unwrap(); + assert_eq!(store.get_anchor_info().anchor_slot, 0); } /// Test that blocks and attestations that refer to states around an unaligned split state are @@ -3224,7 +2980,6 @@ async fn schema_downgrade_to_min_version() { let db_path = tempdir().unwrap(); let store = get_store(&db_path); let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); - let spec = &harness.chain.spec.clone(); harness .extend_chain( @@ -3234,7 +2989,8 @@ async fn schema_downgrade_to_min_version() { ) .await; - let min_version = SchemaVersion(19); + let min_version = SchemaVersion(22); + let genesis_state_root = Some(harness.chain.genesis_state_root); // Save the slot clock so that the new harness doesn't revert in time. let slot_clock = harness.chain.slot_clock.clone(); @@ -3247,25 +3003,22 @@ async fn schema_downgrade_to_min_version() { let store = get_store(&db_path); // Downgrade. - let deposit_contract_deploy_block = 0; migrate_schema::>( store.clone(), - deposit_contract_deploy_block, + genesis_state_root, CURRENT_SCHEMA_VERSION, min_version, store.logger().clone(), - spec, ) .expect("schema downgrade to minimum version should work"); // Upgrade back. migrate_schema::>( store.clone(), - deposit_contract_deploy_block, + genesis_state_root, min_version, CURRENT_SCHEMA_VERSION, store.logger().clone(), - spec, ) .expect("schema upgrade from minimum version should work"); @@ -3288,11 +3041,10 @@ async fn schema_downgrade_to_min_version() { let min_version_sub_1 = SchemaVersion(min_version.as_u64().checked_sub(1).unwrap()); migrate_schema::>( store.clone(), - deposit_contract_deploy_block, + genesis_state_root, CURRENT_SCHEMA_VERSION, min_version_sub_1, harness.logger().clone(), - spec, ) .expect_err("should not downgrade below minimum version"); } @@ -3624,15 +3376,15 @@ async fn prune_historic_states() { ) .await; - // Check historical state is present. - let state_roots_iter = harness + // Check historical states are present. + let first_epoch_state_roots = harness .chain .forwards_iter_state_roots(Slot::new(0)) - .unwrap(); - for (state_root, slot) in state_roots_iter + .unwrap() .take(E::slots_per_epoch() as usize) .map(Result::unwrap) - { + .collect::>(); + for &(state_root, slot) in &first_epoch_state_roots { assert!(store.get_state(&state_root, Some(slot)).unwrap().is_some()); } @@ -3641,29 +3393,18 @@ async fn prune_historic_states() { .unwrap(); // Check that anchor info is updated. - let anchor_info = store.get_anchor_info().unwrap(); + let anchor_info = store.get_anchor_info(); assert_eq!(anchor_info.state_lower_limit, 0); assert_eq!(anchor_info.state_upper_limit, STATE_UPPER_LIMIT_NO_RETAIN); - // Historical states should be pruned. - let state_roots_iter = harness - .chain - .forwards_iter_state_roots(Slot::new(1)) - .unwrap(); - for (state_root, slot) in state_roots_iter - .take(E::slots_per_epoch() as usize) - .map(Result::unwrap) - { - assert!(store.get_state(&state_root, Some(slot)).unwrap().is_none()); + // Ensure all epoch 0 states other than the genesis have been pruned. + for &(state_root, slot) in &first_epoch_state_roots { + assert_eq!( + store.get_state(&state_root, Some(slot)).unwrap().is_some(), + slot == 0 + ); } - // Ensure that genesis state is still accessible - let genesis_state_root = harness.chain.genesis_state_root; - assert!(store - .get_state(&genesis_state_root, Some(Slot::new(0))) - .unwrap() - .is_some()); - // Run for another two epochs. let additional_blocks_produced = 2 * E::slots_per_epoch(); harness diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index 2fe482d4d2..961f5140f9 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -1060,21 +1060,21 @@ where self.db_path = Some(hot_path.into()); self.freezer_db_path = Some(cold_path.into()); - let inner_spec = spec.clone(); - let deposit_contract_deploy_block = context + // Optionally grab the genesis state root. + // This will only be required if a DB upgrade to V22 is needed. + let genesis_state_root = context .eth2_network_config .as_ref() - .map(|config| config.deposit_contract_deploy_block) - .unwrap_or(0); + .and_then(|config| config.genesis_state_root::().transpose()) + .transpose()?; let schema_upgrade = |db, from, to| { migrate_schema::>( db, - deposit_contract_deploy_block, + genesis_state_root, from, to, log, - &inner_spec, ) }; diff --git a/beacon_node/client/src/config.rs b/beacon_node/client/src/config.rs index a25216ff3e..becc781ed3 100644 --- a/beacon_node/client/src/config.rs +++ b/beacon_node/client/src/config.rs @@ -59,10 +59,6 @@ pub struct Config { /// Path where the blobs database will be located if blobs should be in a separate database. pub blobs_db_path: Option, pub log_file: PathBuf, - /// If true, the node will use co-ordinated junk for eth1 values. - /// - /// This is the method used for the 2019 client interop in Canada. - pub dummy_eth1_backend: bool, pub sync_eth1_chain: bool, /// Graffiti to be inserted everytime we create a block if the validator doesn't specify. pub beacon_graffiti: GraffitiOrigin, @@ -103,8 +99,7 @@ impl Default for Config { store: <_>::default(), network: NetworkConfig::default(), chain: <_>::default(), - dummy_eth1_backend: false, - sync_eth1_chain: false, + sync_eth1_chain: true, eth1: <_>::default(), execution_layer: None, trusted_setup, diff --git a/beacon_node/client/src/notifier.rs b/beacon_node/client/src/notifier.rs index 839d296c76..f686c2c650 100644 --- a/beacon_node/client/src/notifier.rs +++ b/beacon_node/client/src/notifier.rs @@ -45,10 +45,7 @@ pub fn spawn_notifier( let mut current_sync_state = network.sync_state(); // Store info if we are required to do a backfill sync. - let original_anchor_slot = beacon_chain - .store - .get_anchor_info() - .map(|ai| ai.oldest_block_slot); + let original_oldest_block_slot = beacon_chain.store.get_anchor_info().oldest_block_slot; let interval_future = async move { // Perform pre-genesis logging. @@ -141,22 +138,17 @@ pub fn spawn_notifier( match current_sync_state { SyncState::BackFillSyncing { .. } => { // Observe backfilling sync info. - if let Some(oldest_slot) = original_anchor_slot { - if let Some(current_anchor_slot) = beacon_chain - .store - .get_anchor_info() - .map(|ai| ai.oldest_block_slot) - { - sync_distance = current_anchor_slot - .saturating_sub(beacon_chain.genesis_backfill_slot); - speedo - // For backfill sync use a fake slot which is the distance we've progressed from the starting `oldest_block_slot`. - .observe( - oldest_slot.saturating_sub(current_anchor_slot), - Instant::now(), - ); - } - } + let current_oldest_block_slot = + beacon_chain.store.get_anchor_info().oldest_block_slot; + sync_distance = current_oldest_block_slot + .saturating_sub(beacon_chain.genesis_backfill_slot); + speedo + // For backfill sync use a fake slot which is the distance we've progressed + // from the starting `original_oldest_block_slot`. + .observe( + original_oldest_block_slot.saturating_sub(current_oldest_block_slot), + Instant::now(), + ); } SyncState::SyncingFinalized { .. } | SyncState::SyncingHead { .. } @@ -213,14 +205,14 @@ pub fn spawn_notifier( "Downloading historical blocks"; "distance" => distance, "speed" => sync_speed_pretty(speed), - "est_time" => estimated_time_pretty(speedo.estimated_time_till_slot(original_anchor_slot.unwrap_or(current_slot).saturating_sub(beacon_chain.genesis_backfill_slot))), + "est_time" => estimated_time_pretty(speedo.estimated_time_till_slot(original_oldest_block_slot.saturating_sub(beacon_chain.genesis_backfill_slot))), ); } else { info!( log, "Downloading historical blocks"; "distance" => distance, - "est_time" => estimated_time_pretty(speedo.estimated_time_till_slot(original_anchor_slot.unwrap_or(current_slot).saturating_sub(beacon_chain.genesis_backfill_slot))), + "est_time" => estimated_time_pretty(speedo.estimated_time_till_slot(original_oldest_block_slot.saturating_sub(beacon_chain.genesis_backfill_slot))), ); } } else if !is_backfilling && last_backfill_log_slot.is_some() { diff --git a/beacon_node/execution_layer/src/engine_api.rs b/beacon_node/execution_layer/src/engine_api.rs index 1c23c8ba66..083aaf2e25 100644 --- a/beacon_node/execution_layer/src/engine_api.rs +++ b/beacon_node/execution_layer/src/engine_api.rs @@ -1,7 +1,7 @@ use crate::engines::ForkchoiceState; use crate::http::{ ENGINE_FORKCHOICE_UPDATED_V1, ENGINE_FORKCHOICE_UPDATED_V2, ENGINE_FORKCHOICE_UPDATED_V3, - ENGINE_GET_CLIENT_VERSION_V1, ENGINE_GET_PAYLOAD_BODIES_BY_HASH_V1, + ENGINE_GET_BLOBS_V1, ENGINE_GET_CLIENT_VERSION_V1, ENGINE_GET_PAYLOAD_BODIES_BY_HASH_V1, ENGINE_GET_PAYLOAD_BODIES_BY_RANGE_V1, ENGINE_GET_PAYLOAD_V1, ENGINE_GET_PAYLOAD_V2, ENGINE_GET_PAYLOAD_V3, ENGINE_GET_PAYLOAD_V4, ENGINE_NEW_PAYLOAD_V1, ENGINE_NEW_PAYLOAD_V2, ENGINE_NEW_PAYLOAD_V3, ENGINE_NEW_PAYLOAD_V4, @@ -507,6 +507,7 @@ pub struct EngineCapabilities { pub get_payload_v3: bool, pub get_payload_v4: bool, pub get_client_version_v1: bool, + pub get_blobs_v1: bool, } impl EngineCapabilities { @@ -554,6 +555,9 @@ impl EngineCapabilities { if self.get_client_version_v1 { response.push(ENGINE_GET_CLIENT_VERSION_V1); } + if self.get_blobs_v1 { + response.push(ENGINE_GET_BLOBS_V1); + } response } diff --git a/beacon_node/execution_layer/src/engine_api/http.rs b/beacon_node/execution_layer/src/engine_api/http.rs index 250b353879..33dc60d037 100644 --- a/beacon_node/execution_layer/src/engine_api/http.rs +++ b/beacon_node/execution_layer/src/engine_api/http.rs @@ -58,6 +58,9 @@ pub const ENGINE_EXCHANGE_CAPABILITIES_TIMEOUT: Duration = Duration::from_secs(1 pub const ENGINE_GET_CLIENT_VERSION_V1: &str = "engine_getClientVersionV1"; pub const ENGINE_GET_CLIENT_VERSION_TIMEOUT: Duration = Duration::from_secs(1); +pub const ENGINE_GET_BLOBS_V1: &str = "engine_getBlobsV1"; +pub const ENGINE_GET_BLOBS_TIMEOUT: Duration = Duration::from_secs(1); + /// This error is returned during a `chainId` call by Geth. pub const EIP155_ERROR_STR: &str = "chain not synced beyond EIP-155 replay-protection fork block"; /// This code is returned by all clients when a method is not supported @@ -79,6 +82,7 @@ pub static LIGHTHOUSE_CAPABILITIES: &[&str] = &[ ENGINE_GET_PAYLOAD_BODIES_BY_HASH_V1, ENGINE_GET_PAYLOAD_BODIES_BY_RANGE_V1, ENGINE_GET_CLIENT_VERSION_V1, + ENGINE_GET_BLOBS_V1, ]; /// We opt to initialize the JsonClientVersionV1 rather than the ClientVersionV1 @@ -702,6 +706,20 @@ impl HttpJsonRpc { } } + pub async fn get_blobs( + &self, + versioned_hashes: Vec, + ) -> Result>>, Error> { + let params = json!([versioned_hashes]); + + self.rpc_request( + ENGINE_GET_BLOBS_V1, + params, + ENGINE_GET_BLOBS_TIMEOUT * self.execution_timeout_multiplier, + ) + .await + } + pub async fn get_block_by_number<'a>( &self, query: BlockByNumberQuery<'a>, @@ -1067,6 +1085,7 @@ impl HttpJsonRpc { get_payload_v3: capabilities.contains(ENGINE_GET_PAYLOAD_V3), get_payload_v4: capabilities.contains(ENGINE_GET_PAYLOAD_V4), get_client_version_v1: capabilities.contains(ENGINE_GET_CLIENT_VERSION_V1), + get_blobs_v1: capabilities.contains(ENGINE_GET_BLOBS_V1), }) } diff --git a/beacon_node/execution_layer/src/engine_api/json_structures.rs b/beacon_node/execution_layer/src/engine_api/json_structures.rs index 4a813b24bb..1c6639804e 100644 --- a/beacon_node/execution_layer/src/engine_api/json_structures.rs +++ b/beacon_node/execution_layer/src/engine_api/json_structures.rs @@ -9,7 +9,7 @@ use types::blob_sidecar::BlobsList; use types::execution_requests::{ ConsolidationRequests, DepositRequests, RequestPrefix, WithdrawalRequests, }; -use types::{FixedVector, Unsigned}; +use types::{Blob, FixedVector, KzgProof, Unsigned}; #[derive(Debug, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] @@ -609,6 +609,14 @@ impl From> for BlobsBundle { } } +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(bound = "E: EthSpec", rename_all = "camelCase")] +pub struct BlobAndProofV1 { + #[serde(with = "ssz_types::serde_utils::hex_fixed_vec")] + pub blob: Blob, + pub proof: KzgProof, +} + #[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct JsonForkchoiceStateV1 { diff --git a/beacon_node/execution_layer/src/lib.rs b/beacon_node/execution_layer/src/lib.rs index f7e490233f..08a00d7bf8 100644 --- a/beacon_node/execution_layer/src/lib.rs +++ b/beacon_node/execution_layer/src/lib.rs @@ -4,6 +4,7 @@ //! This crate only provides useful functionality for "The Merge", it does not provide any of the //! deposit-contract functionality that the `beacon_node/eth1` crate already provides. +use crate::json_structures::BlobAndProofV1; use crate::payload_cache::PayloadCache; use arc_swap::ArcSwapOption; use auth::{strip_prefix, Auth, JwtKey}; @@ -65,7 +66,7 @@ mod metrics; pub mod payload_cache; mod payload_status; pub mod test_utils; -mod versioned_hashes; +pub mod versioned_hashes; /// Indicates the default jwt authenticated execution endpoint. pub const DEFAULT_EXECUTION_ENDPOINT: &str = "http://localhost:8551/"; @@ -1857,6 +1858,23 @@ impl ExecutionLayer { } } + pub async fn get_blobs( + &self, + query: Vec, + ) -> Result>>, Error> { + let capabilities = self.get_engine_capabilities(None).await?; + + if capabilities.get_blobs_v1 { + self.engine() + .request(|engine| async move { engine.api.get_blobs(query).await }) + .await + .map_err(Box::new) + .map_err(Error::EngineError) + } else { + Ok(vec![None; query.len()]) + } + } + pub async fn get_block_by_number( &self, query: BlockByNumberQuery<'_>, diff --git a/beacon_node/execution_layer/src/test_utils/mod.rs b/beacon_node/execution_layer/src/test_utils/mod.rs index be99b38054..1e71fde255 100644 --- a/beacon_node/execution_layer/src/test_utils/mod.rs +++ b/beacon_node/execution_layer/src/test_utils/mod.rs @@ -53,6 +53,7 @@ pub const DEFAULT_ENGINE_CAPABILITIES: EngineCapabilities = EngineCapabilities { get_payload_v3: true, get_payload_v4: true, get_client_version_v1: true, + get_blobs_v1: true, }; pub static DEFAULT_CLIENT_VERSION: LazyLock = diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index 307584b82d..fe05f55a01 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -2693,24 +2693,37 @@ pub fn serve( .and(warp::header::optional::("accept")) .and(task_spawner_filter.clone()) .and(chain_filter.clone()) + .and(log_filter.clone()) .then( |endpoint_version: EndpointVersion, state_id: StateId, accept_header: Option, task_spawner: TaskSpawner, - chain: Arc>| { + chain: Arc>, + log: Logger| { task_spawner.blocking_response_task(Priority::P1, move || match accept_header { Some(api_types::Accept::Ssz) => { // We can ignore the optimistic status for the "fork" since it's a // specification constant that doesn't change across competing heads of the // beacon chain. + let t = std::time::Instant::now(); let (state, _execution_optimistic, _finalized) = state_id.state(&chain)?; let fork_name = state .fork_name(&chain.spec) .map_err(inconsistent_fork_rejection)?; + let timer = metrics::start_timer(&metrics::HTTP_API_STATE_SSZ_ENCODE_TIMES); + let response_bytes = state.as_ssz_bytes(); + drop(timer); + debug!( + log, + "HTTP state load"; + "total_time_ms" => t.elapsed().as_millis(), + "target_slot" => state.slot() + ); + Response::builder() .status(200) - .body(state.as_ssz_bytes().into()) + .body(response_bytes.into()) .map(|res: Response| add_ssz_content_type_header(res)) .map(|resp: warp::reply::Response| { add_consensus_version_header(resp, fork_name) diff --git a/beacon_node/http_api/src/metrics.rs b/beacon_node/http_api/src/metrics.rs index b6a53b26c6..767931a747 100644 --- a/beacon_node/http_api/src/metrics.rs +++ b/beacon_node/http_api/src/metrics.rs @@ -39,3 +39,15 @@ pub static HTTP_API_BLOCK_GOSSIP_TIMES: LazyLock> = LazyLoc &["provenance"], ) }); +pub static HTTP_API_STATE_SSZ_ENCODE_TIMES: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "http_api_state_ssz_encode_times", + "Time to SSZ encode a BeaconState for a response", + ) +}); +pub static HTTP_API_STATE_ROOT_TIMES: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "http_api_state_root_times", + "Time to load a state root for a request", + ) +}); diff --git a/beacon_node/http_api/src/publish_blocks.rs b/beacon_node/http_api/src/publish_blocks.rs index fceeb2dd23..b5aa23acf8 100644 --- a/beacon_node/http_api/src/publish_blocks.rs +++ b/beacon_node/http_api/src/publish_blocks.rs @@ -1,4 +1,5 @@ use crate::metrics; +use std::future::Future; use beacon_chain::blob_verification::{GossipBlobError, GossipVerifiedBlob}; use beacon_chain::block_verification_types::AsBlock; @@ -13,9 +14,10 @@ use eth2::types::{ PublishBlockRequest, SignedBlockContents, }; use execution_layer::ProvenancedPayload; +use futures::TryFutureExt; use lighthouse_network::{NetworkGlobals, PubsubMessage}; use network::NetworkMessage; -use rand::seq::SliceRandom; +use rand::prelude::SliceRandom; use slog::{debug, error, info, warn, Logger}; use slot_clock::SlotClock; use std::marker::PhantomData; @@ -26,9 +28,8 @@ use tokio::sync::mpsc::UnboundedSender; use tree_hash::TreeHash; use types::{ AbstractExecPayload, BeaconBlockRef, BlobSidecar, BlobsList, BlockImportSource, - DataColumnSidecarList, DataColumnSubnetId, EthSpec, ExecPayload, ExecutionBlockHash, ForkName, - FullPayload, FullPayloadBellatrix, Hash256, KzgProofs, SignedBeaconBlock, - SignedBlindedBeaconBlock, + DataColumnSubnetId, EthSpec, ExecPayload, ExecutionBlockHash, ForkName, FullPayload, + FullPayloadBellatrix, Hash256, KzgProofs, SignedBeaconBlock, SignedBlindedBeaconBlock, }; use warp::http::StatusCode; use warp::{reply::Response, Rejection, Reply}; @@ -97,14 +98,9 @@ pub async fn publish_block>( }; let block = unverified_block.inner_block(); debug!(log, "Signed block received in HTTP API"; "slot" => block.slot()); - let malicious_withhold_count = chain.config.malicious_withhold_count; - let chain_cloned = chain.clone(); /* actually publish a block */ let publish_block_p2p = move |block: Arc>, - should_publish_block: bool, - blob_sidecars: Vec>>, - mut data_column_sidecars: DataColumnSidecarList, sender, log, seen_timestamp| @@ -120,53 +116,16 @@ pub async fn publish_block>( publish_delay, ); - let mut pubsub_messages = if should_publish_block { - info!( - log, - "Signed block published to network via HTTP API"; - "slot" => block.slot(), - "blobs_published" => blob_sidecars.len(), - "publish_delay_ms" => publish_delay.as_millis(), - ); - vec![PubsubMessage::BeaconBlock(block.clone())] - } else { - vec![] - }; + info!( + log, + "Signed block published to network via HTTP API"; + "slot" => block.slot(), + "publish_delay_ms" => publish_delay.as_millis(), + ); - match block.as_ref() { - SignedBeaconBlock::Base(_) - | SignedBeaconBlock::Altair(_) - | SignedBeaconBlock::Bellatrix(_) - | SignedBeaconBlock::Capella(_) => { - crate::publish_pubsub_messages(&sender, pubsub_messages) - .map_err(|_| BlockError::BeaconChainError(BeaconChainError::UnableToPublish))?; - } - SignedBeaconBlock::Deneb(_) | SignedBeaconBlock::Electra(_) => { - for blob in blob_sidecars.into_iter() { - pubsub_messages.push(PubsubMessage::BlobSidecar(Box::new((blob.index, blob)))); - } - if malicious_withhold_count > 0 { - let columns_to_keep = data_column_sidecars - .len() - .saturating_sub(malicious_withhold_count); - // Randomize columns before dropping the last malicious_withhold_count items - data_column_sidecars.shuffle(&mut rand::thread_rng()); - drop(data_column_sidecars.drain(columns_to_keep..)); - } + crate::publish_pubsub_message(&sender, PubsubMessage::BeaconBlock(block.clone())) + .map_err(|_| BlockError::BeaconChainError(BeaconChainError::UnableToPublish))?; - for data_col in data_column_sidecars { - let subnet = DataColumnSubnetId::from_column_index::( - data_col.index as usize, - &chain_cloned.spec, - ); - pubsub_messages.push(PubsubMessage::DataColumnSidecar(Box::new(( - subnet, data_col, - )))); - } - crate::publish_pubsub_messages(&sender, pubsub_messages) - .map_err(|_| BlockError::BeaconChainError(BeaconChainError::UnableToPublish))?; - } - }; Ok(()) }; @@ -174,145 +133,11 @@ pub async fn publish_block>( let slot = block.message().slot(); let sender_clone = network_tx.clone(); - // Convert blobs to either: - // - // 1. Blob sidecars if prior to peer DAS, or - // 2. Data column sidecars if post peer DAS. - let peer_das_enabled = chain.spec.is_peer_das_enabled_for_epoch(block.epoch()); - - let (blob_sidecars, data_column_sidecars) = match unverified_blobs { - // Pre-PeerDAS: construct blob sidecars for the network. - Some((kzg_proofs, blobs)) if !peer_das_enabled => { - let blob_sidecars = kzg_proofs - .into_iter() - .zip(blobs) - .enumerate() - .map(|(i, (proof, unverified_blob))| { - let _timer = metrics::start_timer( - &beacon_chain::metrics::BLOB_SIDECAR_INCLUSION_PROOF_COMPUTATION, - ); - let blob_sidecar = - BlobSidecar::new(i, unverified_blob, &block, proof).map(Arc::new); - blob_sidecar.map_err(|e| { - error!( - log, - "Invalid blob - not publishing block"; - "error" => ?e, - "blob_index" => i, - "slot" => slot, - ); - warp_utils::reject::custom_bad_request(format!("{e:?}")) - }) - }) - .collect::, Rejection>>()?; - (blob_sidecars, vec![]) - } - // Post PeerDAS: construct data columns. - Some((_, blobs)) => { - // TODO(das): this is sub-optimal and should likely not be happening prior to gossip - // block publishing. - let data_column_sidecars = build_blob_data_column_sidecars(&chain, &block, blobs) - .map_err(|e| { - error!( - log, - "Invalid data column - not publishing block"; - "error" => ?e, - "slot" => slot - ); - warp_utils::reject::custom_bad_request(format!("{e:?}")) - })?; - (vec![], data_column_sidecars) - } - None => (vec![], vec![]), - }; + let build_sidecar_task_handle = + spawn_build_data_sidecar_task(chain.clone(), block.clone(), unverified_blobs, log.clone())?; // Gossip verify the block and blobs/data columns separately. let gossip_verified_block_result = unverified_block.into_gossip_verified_block(&chain); - let gossip_verified_blobs = blob_sidecars - .into_iter() - .map(|blob_sidecar| { - let gossip_verified_blob = - GossipVerifiedBlob::new(blob_sidecar.clone(), blob_sidecar.index, &chain); - - match gossip_verified_blob { - Ok(blob) => Ok(Some(blob)), - Err(GossipBlobError::RepeatBlob { proposer, .. }) => { - // Log the error but do not abort publication, we may need to publish the block - // or some of the other blobs if the block & blobs are only partially published - // by the other publisher. - debug!( - log, - "Blob for publication already known"; - "blob_index" => blob_sidecar.index, - "slot" => slot, - "proposer" => proposer, - ); - Ok(None) - } - Err(e) => { - error!( - log, - "Blob for publication is gossip-invalid"; - "blob_index" => blob_sidecar.index, - "slot" => slot, - "error" => ?e, - ); - Err(warp_utils::reject::custom_bad_request(e.to_string())) - } - } - }) - .collect::, Rejection>>()?; - - let gossip_verified_data_columns = data_column_sidecars - .into_iter() - .map(|data_column_sidecar| { - let column_index = data_column_sidecar.index as usize; - let subnet = - DataColumnSubnetId::from_column_index::(column_index, &chain.spec); - let gossip_verified_column = - GossipVerifiedDataColumn::new(data_column_sidecar, subnet.into(), &chain); - - match gossip_verified_column { - Ok(blob) => Ok(Some(blob)), - Err(GossipDataColumnError::PriorKnown { proposer, .. }) => { - // Log the error but do not abort publication, we may need to publish the block - // or some of the other data columns if the block & data columns are only - // partially published by the other publisher. - debug!( - log, - "Data column for publication already known"; - "column_index" => column_index, - "slot" => slot, - "proposer" => proposer, - ); - Ok(None) - } - Err(e) => { - error!( - log, - "Data column for publication is gossip-invalid"; - "column_index" => column_index, - "slot" => slot, - "error" => ?e, - ); - Err(warp_utils::reject::custom_bad_request(format!("{e:?}"))) - } - } - }) - .collect::, Rejection>>()?; - - let publishable_blobs = gossip_verified_blobs - .iter() - .flatten() - .map(|b| b.clone_blob()) - .collect::>(); - - let publishable_data_columns = gossip_verified_data_columns - .iter() - .flatten() - .map(|b| b.clone_data_column()) - .collect::>(); - let block_root = block_root.unwrap_or_else(|| { gossip_verified_block_result.as_ref().map_or_else( |_| block.canonical_root(), @@ -321,12 +146,9 @@ pub async fn publish_block>( }); let should_publish_block = gossip_verified_block_result.is_ok(); - if let BroadcastValidation::Gossip = validation_level { + if BroadcastValidation::Gossip == validation_level && should_publish_block { publish_block_p2p( block.clone(), - should_publish_block, - publishable_blobs.clone(), - publishable_data_columns.clone(), sender_clone.clone(), log.clone(), seen_timestamp, @@ -337,38 +159,39 @@ pub async fn publish_block>( let publish_fn_completed = Arc::new(AtomicBool::new(false)); let block_to_publish = block.clone(); let publish_fn = || { - match validation_level { - BroadcastValidation::Gossip => (), - BroadcastValidation::Consensus => publish_block_p2p( - block_to_publish.clone(), - should_publish_block, - publishable_blobs.clone(), - publishable_data_columns.clone(), - sender_clone.clone(), - log.clone(), - seen_timestamp, - )?, - BroadcastValidation::ConsensusAndEquivocation => { - check_slashable(&chain, block_root, &block_to_publish, &log)?; - publish_block_p2p( + if should_publish_block { + match validation_level { + BroadcastValidation::Gossip => (), + BroadcastValidation::Consensus => publish_block_p2p( block_to_publish.clone(), - should_publish_block, - publishable_blobs.clone(), - publishable_data_columns.clone(), sender_clone.clone(), log.clone(), seen_timestamp, - )?; - } - }; + )?, + BroadcastValidation::ConsensusAndEquivocation => { + check_slashable(&chain, block_root, &block_to_publish, &log)?; + publish_block_p2p( + block_to_publish.clone(), + sender_clone.clone(), + log.clone(), + seen_timestamp, + )?; + } + }; + } + publish_fn_completed.store(true, Ordering::SeqCst); Ok(()) }; + // Wait for blobs/columns to get gossip verified before proceeding further as we need them for import. + let (gossip_verified_blobs, gossip_verified_columns) = build_sidecar_task_handle.await?; + for blob in gossip_verified_blobs.into_iter().flatten() { - // Importing the blobs could trigger block import and network publication in the case - // where the block was already seen on gossip. - if let Err(e) = Box::pin(chain.process_gossip_blob(blob, &publish_fn)).await { + publish_blob_sidecars(network_tx, &blob).map_err(|_| { + warp_utils::reject::custom_server_error("unable to publish blob sidecars".into()) + })?; + if let Err(e) = Box::pin(chain.process_gossip_blob(blob)).await { let msg = format!("Invalid blob: {e}"); return if let BroadcastValidation::Gossip = validation_level { Err(warp_utils::reject::broadcast_without_import(msg)) @@ -383,14 +206,12 @@ pub async fn publish_block>( } } - if gossip_verified_data_columns - .iter() - .map(Option::is_some) - .count() - > 0 - { + if gossip_verified_columns.iter().map(Option::is_some).count() > 0 { + publish_column_sidecars(network_tx, &gossip_verified_columns, &chain).map_err(|_| { + warp_utils::reject::custom_server_error("unable to publish data column sidecars".into()) + })?; let sampling_columns_indices = &network_globals.sampling_columns; - let sampling_columns = gossip_verified_data_columns + let sampling_columns = gossip_verified_columns .into_iter() .flatten() .filter(|data_column| sampling_columns_indices.contains(&data_column.index())) @@ -501,6 +322,224 @@ pub async fn publish_block>( } } +type BuildDataSidecarTaskResult = Result< + ( + Vec>>, + Vec>>, + ), + Rejection, +>; + +/// Convert blobs to either: +/// +/// 1. Blob sidecars if prior to peer DAS, or +/// 2. Data column sidecars if post peer DAS. +fn spawn_build_data_sidecar_task( + chain: Arc>, + block: Arc>>, + proofs_and_blobs: UnverifiedBlobs, + log: Logger, +) -> Result>, Rejection> { + chain + .clone() + .task_executor + .spawn_blocking_handle( + move || { + let Some((kzg_proofs, blobs)) = proofs_and_blobs else { + return Ok((vec![], vec![])); + }; + + let peer_das_enabled = chain.spec.is_peer_das_enabled_for_epoch(block.epoch()); + if !peer_das_enabled { + // Pre-PeerDAS: construct blob sidecars for the network. + let gossip_verified_blobs = + build_gossip_verified_blobs(&chain, &block, blobs, kzg_proofs, &log)?; + Ok((gossip_verified_blobs, vec![])) + } else { + // Post PeerDAS: construct data columns. + let gossip_verified_data_columns = + build_gossip_verified_data_columns(&chain, &block, blobs, &log)?; + Ok((vec![], gossip_verified_data_columns)) + } + }, + "build_data_sidecars", + ) + .ok_or(warp_utils::reject::custom_server_error( + "runtime shutdown".to_string(), + )) + .map(|r| { + r.map_err(|_| warp_utils::reject::custom_server_error("join error".to_string())) + .and_then(|output| async move { output }) + }) +} + +fn build_gossip_verified_data_columns( + chain: &BeaconChain, + block: &SignedBeaconBlock>, + blobs: BlobsList, + log: &Logger, +) -> Result>>, Rejection> { + let slot = block.slot(); + let data_column_sidecars = + build_blob_data_column_sidecars(chain, block, blobs).map_err(|e| { + error!( + log, + "Invalid data column - not publishing block"; + "error" => ?e, + "slot" => slot + ); + warp_utils::reject::custom_bad_request(format!("{e:?}")) + })?; + + let slot = block.slot(); + let gossip_verified_data_columns = data_column_sidecars + .into_iter() + .map(|data_column_sidecar| { + let column_index = data_column_sidecar.index as usize; + let subnet = + DataColumnSubnetId::from_column_index::(column_index, &chain.spec); + let gossip_verified_column = + GossipVerifiedDataColumn::new(data_column_sidecar, subnet.into(), chain); + + match gossip_verified_column { + Ok(blob) => Ok(Some(blob)), + Err(GossipDataColumnError::PriorKnown { proposer, .. }) => { + // Log the error but do not abort publication, we may need to publish the block + // or some of the other data columns if the block & data columns are only + // partially published by the other publisher. + debug!( + log, + "Data column for publication already known"; + "column_index" => column_index, + "slot" => slot, + "proposer" => proposer, + ); + Ok(None) + } + Err(e) => { + error!( + log, + "Data column for publication is gossip-invalid"; + "column_index" => column_index, + "slot" => slot, + "error" => ?e, + ); + Err(warp_utils::reject::custom_bad_request(format!("{e:?}"))) + } + } + }) + .collect::, Rejection>>()?; + + Ok(gossip_verified_data_columns) +} + +fn build_gossip_verified_blobs( + chain: &BeaconChain, + block: &SignedBeaconBlock>, + blobs: BlobsList, + kzg_proofs: KzgProofs, + log: &Logger, +) -> Result>>, Rejection> { + let slot = block.slot(); + let gossip_verified_blobs = kzg_proofs + .into_iter() + .zip(blobs) + .enumerate() + .map(|(i, (proof, unverified_blob))| { + let timer = metrics::start_timer( + &beacon_chain::metrics::BLOB_SIDECAR_INCLUSION_PROOF_COMPUTATION, + ); + let blob_sidecar = BlobSidecar::new(i, unverified_blob, block, proof) + .map(Arc::new) + .map_err(|e| { + error!( + log, + "Invalid blob - not publishing block"; + "error" => ?e, + "blob_index" => i, + "slot" => slot, + ); + warp_utils::reject::custom_bad_request(format!("{e:?}")) + })?; + drop(timer); + + let gossip_verified_blob = + GossipVerifiedBlob::new(blob_sidecar.clone(), blob_sidecar.index, chain); + + match gossip_verified_blob { + Ok(blob) => Ok(Some(blob)), + Err(GossipBlobError::RepeatBlob { proposer, .. }) => { + // Log the error but do not abort publication, we may need to publish the block + // or some of the other blobs if the block & blobs are only partially published + // by the other publisher. + debug!( + log, + "Blob for publication already known"; + "blob_index" => blob_sidecar.index, + "slot" => slot, + "proposer" => proposer, + ); + Ok(None) + } + Err(e) => { + error!( + log, + "Blob for publication is gossip-invalid"; + "blob_index" => blob_sidecar.index, + "slot" => slot, + "error" => ?e, + ); + Err(warp_utils::reject::custom_bad_request(e.to_string())) + } + } + }) + .collect::, Rejection>>()?; + + Ok(gossip_verified_blobs) +} + +fn publish_column_sidecars( + sender_clone: &UnboundedSender>, + data_column_sidecars: &[Option>], + chain: &BeaconChain, +) -> Result<(), BlockError> { + let malicious_withhold_count = chain.config.malicious_withhold_count; + let mut data_column_sidecars = data_column_sidecars + .iter() + .flatten() + .map(|d| d.clone_data_column()) + .collect::>(); + if malicious_withhold_count > 0 { + let columns_to_keep = data_column_sidecars + .len() + .saturating_sub(malicious_withhold_count); + // Randomize columns before dropping the last malicious_withhold_count items + data_column_sidecars.shuffle(&mut rand::thread_rng()); + data_column_sidecars.truncate(columns_to_keep); + } + let pubsub_messages = data_column_sidecars + .into_iter() + .map(|data_col| { + let subnet = DataColumnSubnetId::from_column_index::( + data_col.index as usize, + &chain.spec, + ); + PubsubMessage::DataColumnSidecar(Box::new((subnet, data_col))) + }) + .collect::>(); + crate::publish_pubsub_messages(sender_clone, pubsub_messages) + .map_err(|_| BlockError::BeaconChainError(BeaconChainError::UnableToPublish)) +} + +fn publish_blob_sidecars( + sender_clone: &UnboundedSender>, + blob: &GossipVerifiedBlob, +) -> Result<(), BlockError> { + let pubsub_message = PubsubMessage::BlobSidecar(Box::new((blob.index(), blob.clone_blob()))); + crate::publish_pubsub_message(sender_clone, pubsub_message) + .map_err(|_| BlockError::BeaconChainError(BeaconChainError::UnableToPublish)) +} + async fn post_block_import_logging_and_response( result: Result, validation_level: BroadcastValidation, diff --git a/beacon_node/http_api/src/state_id.rs b/beacon_node/http_api/src/state_id.rs index fdc99fa954..ddacde9a3f 100644 --- a/beacon_node/http_api/src/state_id.rs +++ b/beacon_node/http_api/src/state_id.rs @@ -1,3 +1,4 @@ +use crate::metrics; use crate::ExecutionOptimistic; use beacon_chain::{BeaconChain, BeaconChainError, BeaconChainTypes}; use eth2::types::StateId as CoreStateId; @@ -23,6 +24,7 @@ impl StateId { &self, chain: &BeaconChain, ) -> Result<(Hash256, ExecutionOptimistic, Finalized), warp::Rejection> { + let _t = metrics::start_timer(&metrics::HTTP_API_STATE_ROOT_TIMES); let (slot, execution_optimistic, finalized) = match &self.0 { CoreStateId::Head => { let (cached_head, execution_status) = chain diff --git a/beacon_node/http_api/tests/broadcast_validation_tests.rs b/beacon_node/http_api/tests/broadcast_validation_tests.rs index f55983ec66..1338f4f180 100644 --- a/beacon_node/http_api/tests/broadcast_validation_tests.rs +++ b/beacon_node/http_api/tests/broadcast_validation_tests.rs @@ -1486,7 +1486,7 @@ pub async fn block_seen_on_gossip_with_some_blobs() { tester .harness .chain - .process_gossip_blob(gossip_blob, || panic!("should not publish block yet")) + .process_gossip_blob(gossip_blob) .await .unwrap(); } @@ -1559,7 +1559,7 @@ pub async fn blobs_seen_on_gossip_without_block() { tester .harness .chain - .process_gossip_blob(gossip_blob, || panic!("should not publish block yet")) + .process_gossip_blob(gossip_blob) .await .unwrap(); } @@ -1633,7 +1633,7 @@ pub async fn blobs_seen_on_gossip_without_block_and_no_http_blobs() { tester .harness .chain - .process_gossip_blob(gossip_blob, || panic!("should not publish block yet")) + .process_gossip_blob(gossip_blob) .await .unwrap(); } @@ -1705,7 +1705,7 @@ pub async fn slashable_blobs_seen_on_gossip_cause_failure() { tester .harness .chain - .process_gossip_blob(gossip_blob, || panic!("should not publish block yet")) + .process_gossip_blob(gossip_blob) .await .unwrap(); } diff --git a/beacon_node/lighthouse_network/gossipsub/src/behaviour.rs b/beacon_node/lighthouse_network/gossipsub/src/behaviour.rs index 60f3d48d06..5ead0c06a0 100644 --- a/beacon_node/lighthouse_network/gossipsub/src/behaviour.rs +++ b/beacon_node/lighthouse_network/gossipsub/src/behaviour.rs @@ -776,6 +776,11 @@ where return Err(PublishError::AllQueuesFull(recipient_peers.len())); } + // Broadcast IDONTWANT messages + if raw_message.raw_protobuf_len() > self.config.idontwant_message_size_threshold() { + self.send_idontwant(&raw_message, &msg_id, raw_message.source.as_ref()); + } + tracing::debug!(message=%msg_id, "Published message"); if let Some(metrics) = self.metrics.as_mut() { @@ -1380,7 +1385,7 @@ where "IWANT: Peer has asked for message too many times; ignoring request" ); } else if let Some(peer) = &mut self.connected_peers.get_mut(peer_id) { - if peer.dont_send.get(&id).is_some() { + if peer.dont_send_received.get(&id).is_some() { tracing::debug!(%peer_id, message=%id, "Peer already sent IDONTWANT for this message"); continue; } @@ -1812,6 +1817,15 @@ where // Calculate the message id on the transformed data. let msg_id = self.config.message_id(&message); + if let Some(metrics) = self.metrics.as_mut() { + if let Some(peer) = self.connected_peers.get_mut(propagation_source) { + // Record if we received a message that we already sent a IDONTWANT for to the peer + if peer.dont_send_sent.contains_key(&msg_id) { + metrics.register_idontwant_messages_ignored_per_topic(&raw_message.topic); + } + } + } + // Check the validity of the message // Peers get penalized if this message is invalid. We don't add it to the duplicate cache // and instead continually penalize peers that repeatedly send this message. @@ -1830,7 +1844,7 @@ where // Broadcast IDONTWANT messages if raw_message.raw_protobuf_len() > self.config.idontwant_message_size_threshold() { - self.send_idontwant(&raw_message, &msg_id, propagation_source); + self.send_idontwant(&raw_message, &msg_id, Some(propagation_source)); } tracing::debug!( @@ -2507,11 +2521,19 @@ where // Flush stale IDONTWANTs. for peer in self.connected_peers.values_mut() { - while let Some((_front, instant)) = peer.dont_send.front() { + while let Some((_front, instant)) = peer.dont_send_received.front() { if (*instant + IDONTWANT_TIMEOUT) >= Instant::now() { break; } else { - peer.dont_send.pop_front(); + peer.dont_send_received.pop_front(); + } + } + // If metrics are not enabled, this queue would be empty. + while let Some((_front, instant)) = peer.dont_send_sent.front() { + if (*instant + IDONTWANT_TIMEOUT) >= Instant::now() { + break; + } else { + peer.dont_send_sent.pop_front(); } } } @@ -2702,7 +2724,7 @@ where &mut self, message: &RawMessage, msg_id: &MessageId, - propagation_source: &PeerId, + propagation_source: Option<&PeerId>, ) { let Some(mesh_peers) = self.mesh.get(&message.topic) else { return; @@ -2713,8 +2735,8 @@ where let recipient_peers = mesh_peers .iter() .chain(iwant_peers.iter()) - .filter(|peer_id| { - *peer_id != propagation_source && Some(*peer_id) != message.source.as_ref() + .filter(|&peer_id| { + Some(peer_id) != propagation_source && Some(peer_id) != message.source.as_ref() }); for peer_id in recipient_peers { @@ -2746,6 +2768,16 @@ where .entry(*peer_id) .or_default() .non_priority += 1; + return; + } + // IDONTWANT sent successfully. + if let Some(metrics) = self.metrics.as_mut() { + peer.dont_send_sent.insert(msg_id.clone(), Instant::now()); + // Don't exceed capacity. + if peer.dont_send_sent.len() > IDONTWANT_CAP { + peer.dont_send_sent.pop_front(); + } + metrics.register_idontwant_messages_sent_per_topic(&message.topic); } } } @@ -2803,7 +2835,7 @@ where if !recipient_peers.is_empty() { for peer_id in recipient_peers.iter() { if let Some(peer) = self.connected_peers.get_mut(peer_id) { - if peer.dont_send.get(msg_id).is_some() { + if peer.dont_send_received.get(msg_id).is_some() { tracing::debug!(%peer_id, message=%msg_id, "Peer doesn't want message"); continue; } @@ -3157,7 +3189,8 @@ where connections: vec![], sender: RpcSender::new(self.config.connection_handler_queue_len()), topics: Default::default(), - dont_send: LinkedHashMap::new(), + dont_send_received: LinkedHashMap::new(), + dont_send_sent: LinkedHashMap::new(), }); // Add the new connection connected_peer.connections.push(connection_id); @@ -3189,7 +3222,8 @@ where connections: vec![], sender: RpcSender::new(self.config.connection_handler_queue_len()), topics: Default::default(), - dont_send: LinkedHashMap::new(), + dont_send_received: LinkedHashMap::new(), + dont_send_sent: LinkedHashMap::new(), }); // Add the new connection connected_peer.connections.push(connection_id); @@ -3361,10 +3395,10 @@ where metrics.register_idontwant_bytes(idontwant_size); } for message_id in message_ids { - peer.dont_send.insert(message_id, Instant::now()); + peer.dont_send_received.insert(message_id, Instant::now()); // Don't exceed capacity. - if peer.dont_send.len() > IDONTWANT_CAP { - peer.dont_send.pop_front(); + if peer.dont_send_received.len() > IDONTWANT_CAP { + peer.dont_send_received.pop_front(); } } } diff --git a/beacon_node/lighthouse_network/gossipsub/src/behaviour/tests.rs b/beacon_node/lighthouse_network/gossipsub/src/behaviour/tests.rs index 62f026b568..713fe1f266 100644 --- a/beacon_node/lighthouse_network/gossipsub/src/behaviour/tests.rs +++ b/beacon_node/lighthouse_network/gossipsub/src/behaviour/tests.rs @@ -238,7 +238,8 @@ where kind: kind.clone().unwrap_or(PeerKind::Floodsub), connections: vec![connection_id], topics: Default::default(), - dont_send: LinkedHashMap::new(), + dont_send_received: LinkedHashMap::new(), + dont_send_sent: LinkedHashMap::new(), sender, }, ); @@ -626,7 +627,8 @@ fn test_join() { kind: PeerKind::Floodsub, connections: vec![connection_id], topics: Default::default(), - dont_send: LinkedHashMap::new(), + dont_send_received: LinkedHashMap::new(), + dont_send_sent: LinkedHashMap::new(), sender, }, ); @@ -1023,7 +1025,8 @@ fn test_get_random_peers() { connections: vec![ConnectionId::new_unchecked(0)], topics: topics.clone(), sender: RpcSender::new(gs.config.connection_handler_queue_len()), - dont_send: LinkedHashMap::new(), + dont_send_sent: LinkedHashMap::new(), + dont_send_received: LinkedHashMap::new(), }, ); } @@ -5408,7 +5411,7 @@ fn doesnt_forward_idontwant() { .unwrap(); let message_id = gs.config.message_id(&message); let peer = gs.connected_peers.get_mut(&peers[2]).unwrap(); - peer.dont_send.insert(message_id, Instant::now()); + peer.dont_send_received.insert(message_id, Instant::now()); gs.handle_received_message(raw_message.clone(), &local_id); assert_eq!( @@ -5457,7 +5460,7 @@ fn parses_idontwant() { }, ); let peer = gs.connected_peers.get_mut(&peers[1]).unwrap(); - assert!(peer.dont_send.get(&message_id).is_some()); + assert!(peer.dont_send_received.get(&message_id).is_some()); } /// Test that a node clears stale IDONTWANT messages. @@ -5473,10 +5476,10 @@ fn clear_stale_idontwant() { .create_network(); let peer = gs.connected_peers.get_mut(&peers[2]).unwrap(); - peer.dont_send + peer.dont_send_received .insert(MessageId::new(&[1, 2, 3, 4]), Instant::now()); std::thread::sleep(Duration::from_secs(3)); gs.heartbeat(); let peer = gs.connected_peers.get_mut(&peers[2]).unwrap(); - assert!(peer.dont_send.is_empty()); + assert!(peer.dont_send_received.is_empty()); } diff --git a/beacon_node/lighthouse_network/gossipsub/src/metrics.rs b/beacon_node/lighthouse_network/gossipsub/src/metrics.rs index a4ac389a74..d3ca6c299e 100644 --- a/beacon_node/lighthouse_network/gossipsub/src/metrics.rs +++ b/beacon_node/lighthouse_network/gossipsub/src/metrics.rs @@ -188,6 +188,12 @@ pub(crate) struct Metrics { /// The number of bytes we have received in every IDONTWANT control message. idontwant_bytes: Counter, + /// Number of IDONTWANT messages sent per topic. + idontwant_messages_sent_per_topic: Family, + + /// Number of full messages we received that we previously sent a IDONTWANT for. + idontwant_messages_ignored_per_topic: Family, + /// The size of the priority queue. priority_queue_size: Histogram, /// The size of the non-priority queue. @@ -341,6 +347,18 @@ impl Metrics { metric }; + // IDONTWANT messages sent per topic + let idontwant_messages_sent_per_topic = register_family!( + "idonttwant_messages_sent_per_topic", + "Number of IDONTWANT messages sent per topic" + ); + + // IDONTWANTs which were ignored, and we still received the message per topic + let idontwant_messages_ignored_per_topic = register_family!( + "idontwant_messages_ignored_per_topic", + "IDONTWANT messages that were sent but we received the full message regardless" + ); + let idontwant_bytes = { let metric = Counter::default(); registry.register( @@ -405,6 +423,8 @@ impl Metrics { idontwant_msgs, idontwant_bytes, idontwant_msgs_ids, + idontwant_messages_sent_per_topic, + idontwant_messages_ignored_per_topic, priority_queue_size, non_priority_queue_size, } @@ -608,6 +628,20 @@ impl Metrics { self.idontwant_bytes.inc_by(bytes as u64); } + /// Register receiving an IDONTWANT control message for a given topic. + pub(crate) fn register_idontwant_messages_sent_per_topic(&mut self, topic: &TopicHash) { + self.idontwant_messages_sent_per_topic + .get_or_create(topic) + .inc(); + } + + /// Register receiving a message for an already sent IDONTWANT. + pub(crate) fn register_idontwant_messages_ignored_per_topic(&mut self, topic: &TopicHash) { + self.idontwant_messages_ignored_per_topic + .get_or_create(topic) + .inc(); + } + /// Register receiving an IDONTWANT msg for this topic. pub(crate) fn register_idontwant(&mut self, msgs: usize) { self.idontwant_msgs.inc(); diff --git a/beacon_node/lighthouse_network/gossipsub/src/types.rs b/beacon_node/lighthouse_network/gossipsub/src/types.rs index d14a929374..f5dac380e3 100644 --- a/beacon_node/lighthouse_network/gossipsub/src/types.rs +++ b/beacon_node/lighthouse_network/gossipsub/src/types.rs @@ -123,8 +123,10 @@ pub(crate) struct PeerConnections { pub(crate) sender: RpcSender, /// Subscribed topics. pub(crate) topics: BTreeSet, - /// Don't send messages. - pub(crate) dont_send: LinkedHashMap, + /// IDONTWANT messages received from the peer. + pub(crate) dont_send_received: LinkedHashMap, + /// IDONTWANT messages we sent to the peer. + pub(crate) dont_send_sent: LinkedHashMap, } /// Describes the types of peers that can exist in the gossipsub context. diff --git a/beacon_node/lighthouse_network/src/config.rs b/beacon_node/lighthouse_network/src/config.rs index d70e50b1da..21f3dc830f 100644 --- a/beacon_node/lighthouse_network/src/config.rs +++ b/beacon_node/lighthouse_network/src/config.rs @@ -305,12 +305,12 @@ impl Default for Config { let discv5_config = discv5::ConfigBuilder::new(discv5_listen_config) .enable_packet_filter() .session_cache_capacity(5000) - .request_timeout(Duration::from_secs(1)) + .request_timeout(Duration::from_secs(2)) .query_peer_timeout(Duration::from_secs(2)) .query_timeout(Duration::from_secs(30)) .request_retries(1) .enr_peer_update_min(10) - .query_parallelism(5) + .query_parallelism(8) .disable_report_discovered_peers() .ip_limit() // limits /24 IP's in buckets. .incoming_bucket_limit(8) // half the bucket size diff --git a/beacon_node/lighthouse_network/src/discovery/mod.rs b/beacon_node/lighthouse_network/src/discovery/mod.rs index d57c67bacb..b91ad40916 100644 --- a/beacon_node/lighthouse_network/src/discovery/mod.rs +++ b/beacon_node/lighthouse_network/src/discovery/mod.rs @@ -1052,10 +1052,6 @@ impl NetworkBehaviour for Discovery { discv5::Event::SocketUpdated(socket_addr) => { info!(self.log, "Address updated"; "ip" => %socket_addr.ip(), "udp_port" => %socket_addr.port()); metrics::inc_counter(&metrics::ADDRESS_UPDATE_COUNT); - // We have SOCKET_UPDATED messages. This occurs when discovery has a majority of - // users reporting an external port and our ENR gets updated. - // Which means we are able to do NAT traversal. - metrics::set_gauge_vec(&metrics::NAT_OPEN, &["discv5"], 1); // Discv5 will have updated our local ENR. We save the updated version // to disk. diff --git a/beacon_node/lighthouse_network/src/metrics.rs b/beacon_node/lighthouse_network/src/metrics.rs index 15445c7d64..cb9c007b91 100644 --- a/beacon_node/lighthouse_network/src/metrics.rs +++ b/beacon_node/lighthouse_network/src/metrics.rs @@ -8,6 +8,7 @@ pub static NAT_OPEN: LazyLock> = LazyLock::new(|| { &["protocol"], ) }); + pub static ADDRESS_UPDATE_COUNT: LazyLock> = LazyLock::new(|| { try_create_int_counter( "libp2p_address_update_total", @@ -212,4 +213,6 @@ pub fn scrape_discovery_metrics() { set_gauge(&DISCOVERY_SESSIONS, metrics.active_sessions as i64); set_gauge_vec(&DISCOVERY_BYTES, &["inbound"], metrics.bytes_recv as i64); set_gauge_vec(&DISCOVERY_BYTES, &["outbound"], metrics.bytes_sent as i64); + set_gauge_vec(&NAT_OPEN, &["discv5_ipv4"], metrics.ipv4_contactable as i64); + set_gauge_vec(&NAT_OPEN, &["discv5_ipv6"], metrics.ipv6_contactable as i64); } diff --git a/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs b/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs index c40f78b4b0..11676f9a01 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs @@ -7,10 +7,12 @@ use futures::StreamExt; use libp2p::core::transport::PortUse; use libp2p::core::ConnectedPoint; use libp2p::identity::PeerId; +use libp2p::multiaddr::Protocol; use libp2p::swarm::behaviour::{ConnectionClosed, ConnectionEstablished, DialFailure, FromSwarm}; use libp2p::swarm::dial_opts::{DialOpts, PeerCondition}; use libp2p::swarm::dummy::ConnectionHandler; use libp2p::swarm::{ConnectionDenied, ConnectionId, NetworkBehaviour, ToSwarm}; +pub use metrics::{set_gauge_vec, NAT_OPEN}; use slog::{debug, error, trace}; use types::EthSpec; @@ -160,8 +162,8 @@ impl NetworkBehaviour for PeerManager { ) -> Result<(), ConnectionDenied> { // get the IP address to verify it's not banned. let ip = match remote_addr.iter().next() { - Some(libp2p::multiaddr::Protocol::Ip6(ip)) => IpAddr::V6(ip), - Some(libp2p::multiaddr::Protocol::Ip4(ip)) => IpAddr::V4(ip), + Some(Protocol::Ip6(ip)) => IpAddr::V6(ip), + Some(Protocol::Ip4(ip)) => IpAddr::V4(ip), _ => { return Err(ConnectionDenied::new(format!( "Connection to peer rejected: invalid multiaddr: {remote_addr}" @@ -207,6 +209,14 @@ impl NetworkBehaviour for PeerManager { )); } + // We have an inbound connection, this is indicative of having our libp2p NAT ports open. We + // distinguish between ipv4 and ipv6 here: + match remote_addr.iter().next() { + Some(Protocol::Ip4(_)) => set_gauge_vec(&NAT_OPEN, &["libp2p_ipv4"], 1), + Some(Protocol::Ip6(_)) => set_gauge_vec(&NAT_OPEN, &["libp2p_ipv6"], 1), + _ => {} + } + Ok(ConnectionHandler) } diff --git a/beacon_node/lighthouse_network/src/rpc/protocol.rs b/beacon_node/lighthouse_network/src/rpc/protocol.rs index d0dbffe932..57c2795b04 100644 --- a/beacon_node/lighthouse_network/src/rpc/protocol.rs +++ b/beacon_node/lighthouse_network/src/rpc/protocol.rs @@ -18,11 +18,11 @@ use tokio_util::{ }; use types::{ BeaconBlock, BeaconBlockAltair, BeaconBlockBase, BeaconBlockCapella, BeaconBlockElectra, - BlobSidecar, ChainSpec, DataColumnSidecar, EmptyBlock, EthSpec, ForkContext, ForkName, - LightClientBootstrap, LightClientBootstrapAltair, LightClientFinalityUpdate, + BlobSidecar, ChainSpec, DataColumnSidecar, EmptyBlock, EthSpec, EthSpecId, ForkContext, + ForkName, LightClientBootstrap, LightClientBootstrapAltair, LightClientFinalityUpdate, LightClientFinalityUpdateAltair, LightClientOptimisticUpdate, - LightClientOptimisticUpdateAltair, LightClientUpdate, MainnetEthSpec, Signature, - SignedBeaconBlock, + LightClientOptimisticUpdateAltair, LightClientUpdate, MainnetEthSpec, MinimalEthSpec, + Signature, SignedBeaconBlock, }; // Note: Hardcoding the `EthSpec` type for `SignedBeaconBlock` as min/max values is @@ -105,6 +105,20 @@ pub static SIGNED_BEACON_BLOCK_ELECTRA_MAX: LazyLock = LazyLock::new(|| { + ssz::BYTES_PER_LENGTH_OFFSET }); // Length offset for the blob commitments field. +pub static BLOB_SIDECAR_SIZE: LazyLock = + LazyLock::new(BlobSidecar::::max_size); + +pub static BLOB_SIDECAR_SIZE_MINIMAL: LazyLock = + LazyLock::new(BlobSidecar::::max_size); + +pub static DATA_COLUMNS_SIDECAR_MIN: LazyLock = LazyLock::new(|| { + DataColumnSidecar::::empty() + .as_ssz_bytes() + .len() +}); +pub static DATA_COLUMNS_SIDECAR_MAX: LazyLock = + LazyLock::new(DataColumnSidecar::::max_size); + pub static ERROR_TYPE_MIN: LazyLock = LazyLock::new(|| { VariableList::::from(Vec::::new()) .as_ssz_bytes() @@ -597,8 +611,8 @@ impl ProtocolId { Protocol::BlocksByRoot => rpc_block_limits_by_fork(fork_context.current_fork()), Protocol::BlobsByRange => rpc_blob_limits::(), Protocol::BlobsByRoot => rpc_blob_limits::(), - Protocol::DataColumnsByRoot => rpc_data_column_limits::(), - Protocol::DataColumnsByRange => rpc_data_column_limits::(), + Protocol::DataColumnsByRoot => rpc_data_column_limits(), + Protocol::DataColumnsByRange => rpc_data_column_limits(), Protocol::Ping => RpcLimits::new( ::ssz_fixed_len(), ::ssz_fixed_len(), @@ -668,17 +682,18 @@ impl ProtocolId { } pub fn rpc_blob_limits() -> RpcLimits { - RpcLimits::new( - BlobSidecar::::empty().as_ssz_bytes().len(), - BlobSidecar::::max_size(), - ) + match E::spec_name() { + EthSpecId::Minimal => { + RpcLimits::new(*BLOB_SIDECAR_SIZE_MINIMAL, *BLOB_SIDECAR_SIZE_MINIMAL) + } + EthSpecId::Mainnet | EthSpecId::Gnosis => { + RpcLimits::new(*BLOB_SIDECAR_SIZE, *BLOB_SIDECAR_SIZE) + } + } } -pub fn rpc_data_column_limits() -> RpcLimits { - RpcLimits::new( - DataColumnSidecar::::empty().as_ssz_bytes().len(), - DataColumnSidecar::::max_size(), - ) +pub fn rpc_data_column_limits() -> RpcLimits { + RpcLimits::new(*DATA_COLUMNS_SIDECAR_MIN, *DATA_COLUMNS_SIDECAR_MAX) } /* Inbound upgrade */ diff --git a/beacon_node/lighthouse_network/src/types/globals.rs b/beacon_node/lighthouse_network/src/types/globals.rs index bcebd02a0e..92583b7b5d 100644 --- a/beacon_node/lighthouse_network/src/types/globals.rs +++ b/beacon_node/lighthouse_network/src/types/globals.rs @@ -82,7 +82,7 @@ impl NetworkGlobals { peers: RwLock::new(PeerDB::new(trusted_peers, disable_peer_scoring, log)), gossipsub_subscriptions: RwLock::new(HashSet::new()), sync_state: RwLock::new(SyncState::Stalled), - backfill_state: RwLock::new(BackFillState::NotRequired), + backfill_state: RwLock::new(BackFillState::Paused), sampling_subnets, sampling_columns, config, diff --git a/beacon_node/lighthouse_network/src/types/sync_state.rs b/beacon_node/lighthouse_network/src/types/sync_state.rs index 4322763fc5..0519d6f4b0 100644 --- a/beacon_node/lighthouse_network/src/types/sync_state.rs +++ b/beacon_node/lighthouse_network/src/types/sync_state.rs @@ -35,8 +35,6 @@ pub enum BackFillState { Syncing, /// A backfill sync has completed. Completed, - /// A backfill sync is not required. - NotRequired, /// Too many failed attempts at backfilling. Consider it failed. Failed, } diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index 4d875cb4a1..e92f450476 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -914,18 +914,15 @@ impl NetworkBeaconProcessor { let blob_slot = verified_blob.slot(); let blob_index = verified_blob.id().index; - let result = self - .chain - .process_gossip_blob(verified_blob, || Ok(())) - .await; + let result = self.chain.process_gossip_blob(verified_blob).await; match &result { Ok(AvailabilityProcessingStatus::Imported(block_root)) => { // Note: Reusing block imported metric here metrics::inc_counter(&metrics::BEACON_PROCESSOR_GOSSIP_BLOCK_IMPORTED_TOTAL); - info!( + debug!( self.log, - "Gossipsub blob processed, imported fully available block"; + "Gossipsub blob processed - imported fully available block"; "block_root" => %block_root ); self.chain.recompute_head_at_current_slot().await; @@ -936,9 +933,9 @@ impl NetworkBeaconProcessor { ); } Ok(AvailabilityProcessingStatus::MissingComponents(slot, block_root)) => { - trace!( + debug!( self.log, - "Processed blob, waiting for other components"; + "Processed gossip blob - waiting for other components"; "slot" => %slot, "blob_index" => %blob_index, "block_root" => %block_root, @@ -1079,7 +1076,7 @@ impl NetworkBeaconProcessor { message_id, peer_id, peer_client, - block, + block.clone(), reprocess_tx.clone(), seen_duration, ) @@ -1497,6 +1494,13 @@ impl NetworkBeaconProcessor { "slot" => slot, "block_root" => %block_root, ); + + // Block is valid, we can now attempt fetching blobs from EL using version hashes + // derived from kzg commitments from the block, without having to wait for all blobs + // to be sent from the peers if we already have them. + let publish_blobs = true; + self.fetch_engine_blobs_and_publish(block.clone(), *block_root, publish_blobs) + .await; } Err(BlockError::ParentUnknown { .. }) => { // This should not occur. It should be checked by `should_forward_block`. diff --git a/beacon_node/network/src/network_beacon_processor/mod.rs b/beacon_node/network/src/network_beacon_processor/mod.rs index 76f5e886ff..d81d964e7c 100644 --- a/beacon_node/network/src/network_beacon_processor/mod.rs +++ b/beacon_node/network/src/network_beacon_processor/mod.rs @@ -1,11 +1,17 @@ use crate::sync::manager::BlockProcessType; use crate::sync::SamplingId; use crate::{service::NetworkMessage, sync::manager::SyncMessage}; +use beacon_chain::blob_verification::{GossipBlobError, GossipVerifiedBlob}; use beacon_chain::block_verification_types::RpcBlock; +use beacon_chain::data_column_verification::{observe_gossip_data_column, GossipDataColumnError}; +use beacon_chain::fetch_blobs::{ + fetch_and_process_engine_blobs, BlobsOrDataColumns, FetchEngineBlobError, +}; +use beacon_chain::observed_data_sidecars::DoNotObserve; use beacon_chain::{ builder::Witness, eth1_chain::CachingEth1Backend, AvailabilityProcessingStatus, BeaconChain, + BeaconChainTypes, BlockError, NotifyExecutionLayer, }; -use beacon_chain::{BeaconChainTypes, NotifyExecutionLayer}; use beacon_processor::{ work_reprocessing_queue::ReprocessQueueMessage, BeaconProcessorChannels, BeaconProcessorSend, DuplicateCache, GossipAggregatePackage, GossipAttestationPackage, Work, @@ -21,7 +27,8 @@ use lighthouse_network::{ rpc::{BlocksByRangeRequest, BlocksByRootRequest, LightClientBootstrapRequest, StatusMessage}, Client, MessageId, NetworkGlobals, PeerId, PubsubMessage, }; -use slog::{debug, error, trace, Logger}; +use rand::prelude::SliceRandom; +use slog::{debug, error, trace, warn, Logger}; use slot_clock::ManualSlotClock; use std::path::PathBuf; use std::sync::Arc; @@ -67,6 +74,9 @@ pub struct NetworkBeaconProcessor { pub log: Logger, } +// Publish blobs in batches of exponentially increasing size. +const BLOB_PUBLICATION_EXP_FACTOR: usize = 2; + impl NetworkBeaconProcessor { fn try_send(&self, event: BeaconWorkEvent) -> Result<(), Error> { self.beacon_processor_send @@ -878,6 +888,79 @@ impl NetworkBeaconProcessor { }); } + pub async fn fetch_engine_blobs_and_publish( + self: &Arc, + block: Arc>>, + block_root: Hash256, + publish_blobs: bool, + ) { + let self_cloned = self.clone(); + let publish_fn = move |blobs_or_data_column| { + if publish_blobs { + match blobs_or_data_column { + BlobsOrDataColumns::Blobs(blobs) => { + self_cloned.publish_blobs_gradually(blobs, block_root); + } + BlobsOrDataColumns::DataColumns(columns) => { + self_cloned.publish_data_columns_gradually(columns, block_root); + } + }; + } + }; + + match fetch_and_process_engine_blobs( + self.chain.clone(), + block_root, + block.clone(), + publish_fn, + ) + .await + { + Ok(Some(availability)) => match availability { + AvailabilityProcessingStatus::Imported(_) => { + debug!( + self.log, + "Block components retrieved from EL"; + "result" => "imported block and custody columns", + "block_root" => %block_root, + ); + self.chain.recompute_head_at_current_slot().await; + } + AvailabilityProcessingStatus::MissingComponents(_, _) => { + debug!( + self.log, + "Still missing blobs after engine blobs processed successfully"; + "block_root" => %block_root, + ); + } + }, + Ok(None) => { + debug!( + self.log, + "Fetch blobs completed without import"; + "block_root" => %block_root, + ); + } + Err(FetchEngineBlobError::BlobProcessingError(BlockError::DuplicateFullyImported( + .., + ))) => { + debug!( + self.log, + "Fetch blobs duplicate import"; + "block_root" => %block_root, + ); + } + Err(e) => { + error!( + self.log, + "Error fetching or processing blobs from EL"; + "error" => ?e, + "block_root" => %block_root, + ); + } + } + } + /// Attempt to reconstruct all data columns if the following conditions satisfies: /// - Our custody requirement is all columns /// - We have >= 50% of columns, but not all columns @@ -885,25 +968,13 @@ impl NetworkBeaconProcessor { /// Returns `Some(AvailabilityProcessingStatus)` if reconstruction is successfully performed, /// otherwise returns `None`. async fn attempt_data_column_reconstruction( - &self, + self: &Arc, block_root: Hash256, ) -> Option { let result = self.chain.reconstruct_data_columns(block_root).await; match result { Ok(Some((availability_processing_status, data_columns_to_publish))) => { - self.send_network_message(NetworkMessage::Publish { - messages: data_columns_to_publish - .iter() - .map(|d| { - let subnet = DataColumnSubnetId::from_column_index::( - d.index as usize, - &self.chain.spec, - ); - PubsubMessage::DataColumnSidecar(Box::new((subnet, d.clone()))) - }) - .collect(), - }); - + self.publish_data_columns_gradually(data_columns_to_publish, block_root); match &availability_processing_status { AvailabilityProcessingStatus::Imported(hash) => { debug!( @@ -946,6 +1017,175 @@ impl NetworkBeaconProcessor { } } } + + /// This function gradually publishes blobs to the network in randomised batches. + /// + /// This is an optimisation to reduce outbound bandwidth and ensures each blob is published + /// by some nodes on the network as soon as possible. Our hope is that some blobs arrive from + /// other nodes in the meantime, obviating the need for us to publish them. If no other + /// publisher exists for a blob, it will eventually get published here. + fn publish_blobs_gradually( + self: &Arc, + mut blobs: Vec>, + block_root: Hash256, + ) { + let self_clone = self.clone(); + + self.executor.spawn( + async move { + let chain = self_clone.chain.clone(); + let log = self_clone.chain.logger(); + let publish_fn = |blobs: Vec>>| { + self_clone.send_network_message(NetworkMessage::Publish { + messages: blobs + .into_iter() + .map(|blob| PubsubMessage::BlobSidecar(Box::new((blob.index, blob)))) + .collect(), + }); + }; + + // Permute the blobs and split them into batches. + // The hope is that we won't need to publish some blobs because we will receive them + // on gossip from other nodes. + blobs.shuffle(&mut rand::thread_rng()); + + let blob_publication_batch_interval = chain.config.blob_publication_batch_interval; + let mut publish_count = 0usize; + let blob_count = blobs.len(); + let mut blobs_iter = blobs.into_iter().peekable(); + let mut batch_size = 1usize; + + while blobs_iter.peek().is_some() { + let batch = blobs_iter.by_ref().take(batch_size); + let publishable = batch + .filter_map(|unobserved| match unobserved.observe(&chain) { + Ok(observed) => Some(observed.clone_blob()), + Err(GossipBlobError::RepeatBlob { .. }) => None, + Err(e) => { + warn!( + log, + "Previously verified blob is invalid"; + "error" => ?e + ); + None + } + }) + .collect::>(); + + if !publishable.is_empty() { + debug!( + log, + "Publishing blob batch"; + "publish_count" => publishable.len(), + "block_root" => ?block_root, + ); + publish_count += publishable.len(); + publish_fn(publishable); + } + + tokio::time::sleep(blob_publication_batch_interval).await; + batch_size *= BLOB_PUBLICATION_EXP_FACTOR; + } + + debug!( + log, + "Batch blob publication complete"; + "batch_interval" => blob_publication_batch_interval.as_millis(), + "blob_count" => blob_count, + "published_count" => publish_count, + "block_root" => ?block_root, + ) + }, + "gradual_blob_publication", + ); + } + + /// This function gradually publishes data columns to the network in randomised batches. + /// + /// This is an optimisation to reduce outbound bandwidth and ensures each column is published + /// by some nodes on the network as soon as possible. Our hope is that some columns arrive from + /// other supernodes in the meantime, obviating the need for us to publish them. If no other + /// publisher exists for a column, it will eventually get published here. + fn publish_data_columns_gradually( + self: &Arc, + mut data_columns_to_publish: DataColumnSidecarList, + block_root: Hash256, + ) { + let self_clone = self.clone(); + + self.executor.spawn( + async move { + let chain = self_clone.chain.clone(); + let log = self_clone.chain.logger(); + let publish_fn = |columns: DataColumnSidecarList| { + self_clone.send_network_message(NetworkMessage::Publish { + messages: columns + .into_iter() + .map(|d| { + let subnet = DataColumnSubnetId::from_column_index::( + d.index as usize, + &chain.spec, + ); + PubsubMessage::DataColumnSidecar(Box::new((subnet, d))) + }) + .collect(), + }); + }; + + // If this node is a super node, permute the columns and split them into batches. + // The hope is that we won't need to publish some columns because we will receive them + // on gossip from other supernodes. + data_columns_to_publish.shuffle(&mut rand::thread_rng()); + + let blob_publication_batch_interval = chain.config.blob_publication_batch_interval; + let blob_publication_batches = chain.config.blob_publication_batches; + let batch_size = chain.spec.number_of_columns / blob_publication_batches; + let mut publish_count = 0usize; + + for batch in data_columns_to_publish.chunks(batch_size) { + let publishable = batch + .iter() + .filter_map(|col| match observe_gossip_data_column(col, &chain) { + Ok(()) => Some(col.clone()), + Err(GossipDataColumnError::PriorKnown { .. }) => None, + Err(e) => { + warn!( + log, + "Previously verified data column is invalid"; + "error" => ?e + ); + None + } + }) + .collect::>(); + + if !publishable.is_empty() { + debug!( + log, + "Publishing data column batch"; + "publish_count" => publishable.len(), + "block_root" => ?block_root, + ); + publish_count += publishable.len(); + publish_fn(publishable); + } + + tokio::time::sleep(blob_publication_batch_interval).await; + } + + debug!( + log, + "Batch data column publishing complete"; + "batch_size" => batch_size, + "batch_interval" => blob_publication_batch_interval.as_millis(), + "data_columns_to_publish_count" => data_columns_to_publish.len(), + "published_count" => publish_count, + "block_root" => ?block_root, + ) + }, + "gradual_data_column_publication", + ); + } } type TestBeaconChainType = diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs index 6d32806713..c4944078fe 100644 --- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs @@ -2,7 +2,7 @@ use crate::network_beacon_processor::{NetworkBeaconProcessor, FUTURE_SLOT_TOLERA use crate::service::NetworkMessage; use crate::status::ToStatusMessage; use crate::sync::SyncMessage; -use beacon_chain::{BeaconChainError, BeaconChainTypes, HistoricalBlockError, WhenSlotSkipped}; +use beacon_chain::{BeaconChainError, BeaconChainTypes, WhenSlotSkipped}; use itertools::process_results; use lighthouse_network::discovery::ConnectionId; use lighthouse_network::rpc::methods::{ @@ -682,12 +682,10 @@ impl NetworkBeaconProcessor { .forwards_iter_block_roots(Slot::from(*req.start_slot())) { Ok(iter) => iter, - Err(BeaconChainError::HistoricalBlockError( - HistoricalBlockError::BlockOutOfRange { - slot, - oldest_block_slot, - }, - )) => { + Err(BeaconChainError::HistoricalBlockOutOfRange { + slot, + oldest_block_slot, + }) => { debug!(self.log, "Range request failed during backfill"; "requested_slot" => slot, "oldest_known_slot" => oldest_block_slot @@ -941,12 +939,10 @@ impl NetworkBeaconProcessor { let forwards_block_root_iter = match self.chain.forwards_iter_block_roots(request_start_slot) { Ok(iter) => iter, - Err(BeaconChainError::HistoricalBlockError( - HistoricalBlockError::BlockOutOfRange { - slot, - oldest_block_slot, - }, - )) => { + Err(BeaconChainError::HistoricalBlockOutOfRange { + slot, + oldest_block_slot, + }) => { debug!(self.log, "Range request failed during backfill"; "requested_slot" => slot, "oldest_known_slot" => oldest_block_slot @@ -1147,12 +1143,10 @@ impl NetworkBeaconProcessor { let forwards_block_root_iter = match self.chain.forwards_iter_block_roots(request_start_slot) { Ok(iter) => iter, - Err(BeaconChainError::HistoricalBlockError( - HistoricalBlockError::BlockOutOfRange { - slot, - oldest_block_slot, - }, - )) => { + Err(BeaconChainError::HistoricalBlockOutOfRange { + slot, + oldest_block_slot, + }) => { debug!(self.log, "Range request failed during backfill"; "requested_slot" => slot, "oldest_known_slot" => oldest_block_slot diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index 82d06c20f8..6c6bb26ee0 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -10,8 +10,8 @@ use beacon_chain::data_availability_checker::AvailabilityCheckError; use beacon_chain::data_availability_checker::MaybeAvailableBlock; use beacon_chain::data_column_verification::verify_kzg_for_data_column_list; use beacon_chain::{ - validator_monitor::get_slot_delay_ms, AvailabilityProcessingStatus, BeaconChainError, - BeaconChainTypes, BlockError, ChainSegmentResult, HistoricalBlockError, NotifyExecutionLayer, + validator_monitor::get_slot_delay_ms, AvailabilityProcessingStatus, BeaconChainTypes, + BlockError, ChainSegmentResult, HistoricalBlockError, NotifyExecutionLayer, }; use beacon_processor::{ work_reprocessing_queue::{QueuedRpcBlock, ReprocessQueueMessage}, @@ -153,6 +153,7 @@ impl NetworkBeaconProcessor { "process_type" => ?process_type, ); + let signed_beacon_block = block.block_cloned(); let result = self .chain .process_block_with_early_caching( @@ -166,26 +167,37 @@ impl NetworkBeaconProcessor { metrics::inc_counter(&metrics::BEACON_PROCESSOR_RPC_BLOCK_IMPORTED_TOTAL); // RPC block imported, regardless of process type - if let &Ok(AvailabilityProcessingStatus::Imported(hash)) = &result { - info!(self.log, "New RPC block received"; "slot" => slot, "hash" => %hash); + match result.as_ref() { + Ok(AvailabilityProcessingStatus::Imported(hash)) => { + info!(self.log, "New RPC block received"; "slot" => slot, "hash" => %hash); - // Trigger processing for work referencing this block. - let reprocess_msg = ReprocessQueueMessage::BlockImported { - block_root: hash, - parent_root, - }; - if reprocess_tx.try_send(reprocess_msg).is_err() { - error!(self.log, "Failed to inform block import"; "source" => "rpc", "block_root" => %hash) - }; - self.chain.block_times_cache.write().set_time_observed( - hash, - slot, - seen_timestamp, - None, - None, - ); + // Trigger processing for work referencing this block. + let reprocess_msg = ReprocessQueueMessage::BlockImported { + block_root: *hash, + parent_root, + }; + if reprocess_tx.try_send(reprocess_msg).is_err() { + error!(self.log, "Failed to inform block import"; "source" => "rpc", "block_root" => %hash) + }; + self.chain.block_times_cache.write().set_time_observed( + *hash, + slot, + seen_timestamp, + None, + None, + ); - self.chain.recompute_head_at_current_slot().await; + self.chain.recompute_head_at_current_slot().await; + } + Ok(AvailabilityProcessingStatus::MissingComponents(..)) => { + // Block is valid, we can now attempt fetching blobs from EL using version hashes + // derived from kzg commitments from the block, without having to wait for all blobs + // to be sent from the peers if we already have them. + let publish_blobs = false; + self.fetch_engine_blobs_and_publish(signed_beacon_block, block_root, publish_blobs) + .await + } + _ => {} } // RPC block imported or execution validated. If the block was already imported by gossip we @@ -606,103 +618,69 @@ impl NetworkBeaconProcessor { ); (imported_blocks, Ok(())) } - Err(error) => { + Err(e) => { metrics::inc_counter( &metrics::BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_FAILED_TOTAL, ); - let err = match error { - // Handle the historical block errors specifically - BeaconChainError::HistoricalBlockError(e) => match e { - HistoricalBlockError::MismatchedBlockRoot { - block_root, - expected_block_root, - } => { - debug!( - self.log, - "Backfill batch processing error"; - "error" => "mismatched_block_root", - "block_root" => ?block_root, - "expected_root" => ?expected_block_root - ); - - ChainSegmentFailed { - message: String::from("mismatched_block_root"), - // The peer is faulty if they send blocks with bad roots. - peer_action: Some(PeerAction::LowToleranceError), - } - } - HistoricalBlockError::InvalidSignature - | HistoricalBlockError::SignatureSet(_) => { - warn!( - self.log, - "Backfill batch processing error"; - "error" => ?e - ); - - ChainSegmentFailed { - message: "invalid_signature".into(), - // The peer is faulty if they bad signatures. - peer_action: Some(PeerAction::LowToleranceError), - } - } - HistoricalBlockError::ValidatorPubkeyCacheTimeout => { - warn!( - self.log, - "Backfill batch processing error"; - "error" => "pubkey_cache_timeout" - ); - - ChainSegmentFailed { - message: "pubkey_cache_timeout".into(), - // This is an internal error, do not penalize the peer. - peer_action: None, - } - } - HistoricalBlockError::NoAnchorInfo => { - warn!(self.log, "Backfill not required"); - - ChainSegmentFailed { - message: String::from("no_anchor_info"), - // There is no need to do a historical sync, this is not a fault of - // the peer. - peer_action: None, - } - } - HistoricalBlockError::IndexOutOfBounds => { - error!( - self.log, - "Backfill batch OOB error"; - "error" => ?e, - ); - ChainSegmentFailed { - message: String::from("logic_error"), - // This should never occur, don't penalize the peer. - peer_action: None, - } - } - HistoricalBlockError::BlockOutOfRange { .. } => { - error!( - self.log, - "Backfill batch error"; - "error" => ?e, - ); - ChainSegmentFailed { - message: String::from("unexpected_error"), - // This should never occur, don't penalize the peer. - peer_action: None, - } - } - }, - other => { - warn!(self.log, "Backfill batch processing error"; "error" => ?other); - ChainSegmentFailed { - message: format!("{:?}", other), - // This is an internal error, don't penalize the peer. - peer_action: None, - } + let peer_action = match &e { + HistoricalBlockError::MismatchedBlockRoot { + block_root, + expected_block_root, + } => { + debug!( + self.log, + "Backfill batch processing error"; + "error" => "mismatched_block_root", + "block_root" => ?block_root, + "expected_root" => ?expected_block_root + ); + // The peer is faulty if they send blocks with bad roots. + Some(PeerAction::LowToleranceError) } + HistoricalBlockError::InvalidSignature + | HistoricalBlockError::SignatureSet(_) => { + warn!( + self.log, + "Backfill batch processing error"; + "error" => ?e + ); + // The peer is faulty if they bad signatures. + Some(PeerAction::LowToleranceError) + } + HistoricalBlockError::ValidatorPubkeyCacheTimeout => { + warn!( + self.log, + "Backfill batch processing error"; + "error" => "pubkey_cache_timeout" + ); + // This is an internal error, do not penalize the peer. + None + } + HistoricalBlockError::IndexOutOfBounds => { + error!( + self.log, + "Backfill batch OOB error"; + "error" => ?e, + ); + // This should never occur, don't penalize the peer. + None + } + HistoricalBlockError::StoreError(e) => { + warn!(self.log, "Backfill batch processing error"; "error" => ?e); + // This is an internal error, don't penalize the peer. + None + } // + // Do not use a fallback match, handle all errors explicitly }; - (0, Err(err)) + let err_str: &'static str = e.into(); + ( + 0, + Err(ChainSegmentFailed { + message: format!("{:?}", err_str), + // This is an internal error, don't penalize the peer. + peer_action, + }), + ) } } } diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index 946d25237b..5703ed3504 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -158,26 +158,20 @@ impl BackFillSync { log: slog::Logger, ) -> Self { // Determine if backfill is enabled or not. - // Get the anchor info, if this returns None, then backfill is not required for this - // running instance. // If, for some reason a backfill has already been completed (or we've used a trusted // genesis root) then backfill has been completed. - - let (state, current_start) = match beacon_chain.store.get_anchor_info() { - Some(anchor_info) => { - if anchor_info.block_backfill_complete(beacon_chain.genesis_backfill_slot) { - (BackFillState::Completed, Epoch::new(0)) - } else { - ( - BackFillState::Paused, - anchor_info - .oldest_block_slot - .epoch(T::EthSpec::slots_per_epoch()), - ) - } - } - None => (BackFillState::NotRequired, Epoch::new(0)), - }; + let anchor_info = beacon_chain.store.get_anchor_info(); + let (state, current_start) = + if anchor_info.block_backfill_complete(beacon_chain.genesis_backfill_slot) { + (BackFillState::Completed, Epoch::new(0)) + } else { + ( + BackFillState::Paused, + anchor_info + .oldest_block_slot + .epoch(T::EthSpec::slots_per_epoch()), + ) + }; let bfs = BackFillSync { batches: BTreeMap::new(), @@ -253,25 +247,15 @@ impl BackFillSync { self.set_state(BackFillState::Syncing); // Obtain a new start slot, from the beacon chain and handle possible errors. - match self.reset_start_epoch() { - Err(ResetEpochError::SyncCompleted) => { - error!(self.log, "Backfill sync completed whilst in failed status"); - self.set_state(BackFillState::Completed); - return Err(BackFillError::InvalidSyncState(String::from( - "chain completed", - ))); - } - Err(ResetEpochError::NotRequired) => { - error!( - self.log, - "Backfill sync not required whilst in failed status" - ); - self.set_state(BackFillState::NotRequired); - return Err(BackFillError::InvalidSyncState(String::from( - "backfill not required", - ))); - } - Ok(_) => {} + if let Err(e) = self.reset_start_epoch() { + // This infallible match exists to force us to update this code if a future + // refactor of `ResetEpochError` adds a variant. + let ResetEpochError::SyncCompleted = e; + error!(self.log, "Backfill sync completed whilst in failed status"); + self.set_state(BackFillState::Completed); + return Err(BackFillError::InvalidSyncState(String::from( + "chain completed", + ))); } debug!(self.log, "Resuming a failed backfill sync"; "start_epoch" => self.current_start); @@ -279,9 +263,7 @@ impl BackFillSync { // begin requesting blocks from the peer pool, until all peers are exhausted. self.request_batches(network)?; } - BackFillState::Completed | BackFillState::NotRequired => { - return Ok(SyncStart::NotSyncing) - } + BackFillState::Completed => return Ok(SyncStart::NotSyncing), } Ok(SyncStart::Syncing { @@ -313,10 +295,7 @@ impl BackFillSync { peer_id: &PeerId, network: &mut SyncNetworkContext, ) -> Result<(), BackFillError> { - if matches!( - self.state(), - BackFillState::Failed | BackFillState::NotRequired - ) { + if matches!(self.state(), BackFillState::Failed) { return Ok(()); } @@ -1142,17 +1121,14 @@ impl BackFillSync { /// This errors if the beacon chain indicates that backfill sync has already completed or is /// not required. fn reset_start_epoch(&mut self) -> Result<(), ResetEpochError> { - if let Some(anchor_info) = self.beacon_chain.store.get_anchor_info() { - if anchor_info.block_backfill_complete(self.beacon_chain.genesis_backfill_slot) { - Err(ResetEpochError::SyncCompleted) - } else { - self.current_start = anchor_info - .oldest_block_slot - .epoch(T::EthSpec::slots_per_epoch()); - Ok(()) - } + let anchor_info = self.beacon_chain.store.get_anchor_info(); + if anchor_info.block_backfill_complete(self.beacon_chain.genesis_backfill_slot) { + Err(ResetEpochError::SyncCompleted) } else { - Err(ResetEpochError::NotRequired) + self.current_start = anchor_info + .oldest_block_slot + .epoch(T::EthSpec::slots_per_epoch()); + Ok(()) } } @@ -1160,13 +1136,12 @@ impl BackFillSync { fn check_completed(&mut self) -> bool { if self.would_complete(self.current_start) { // Check that the beacon chain agrees - if let Some(anchor_info) = self.beacon_chain.store.get_anchor_info() { - // Conditions that we have completed a backfill sync - if anchor_info.block_backfill_complete(self.beacon_chain.genesis_backfill_slot) { - return true; - } else { - error!(self.log, "Backfill out of sync with beacon chain"); - } + let anchor_info = self.beacon_chain.store.get_anchor_info(); + // Conditions that we have completed a backfill sync + if anchor_info.block_backfill_complete(self.beacon_chain.genesis_backfill_slot) { + return true; + } else { + error!(self.log, "Backfill out of sync with beacon chain"); } } false @@ -1195,6 +1170,4 @@ impl BackFillSync { enum ResetEpochError { /// The chain has already completed. SyncCompleted, - /// Backfill is not required. - NotRequired, } diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 882f199b52..344e91711c 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -1188,22 +1188,14 @@ impl SyncManager { } fn on_sampling_result(&mut self, requester: SamplingRequester, result: SamplingResult) { - // TODO(das): How is a consumer of sampling results? - // - Fork-choice for trailing DA - // - Single lookups to complete import requirements - // - Range sync to complete import requirements? Can sampling for syncing lag behind and - // accumulate in fork-choice? - match requester { SamplingRequester::ImportedBlock(block_root) => { debug!(self.log, "Sampling result"; "block_root" => %block_root, "result" => ?result); - // TODO(das): Consider moving SamplingResult to the beacon_chain crate and import - // here. No need to add too much enum variants, just whatever the beacon_chain or - // fork-choice needs to make a decision. Currently the fork-choice only needs to - // be notified of successful samplings, i.e. sampling failures don't trigger pruning match result { Ok(_) => { + // Notify the fork-choice of a successful sampling result to mark the block + // branch as safe. if let Err(e) = self .network .beacon_processor() diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index 5f7778ffcc..c4d987e858 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -769,7 +769,6 @@ impl SyncNetworkContext { self.log.clone(), ); - // TODO(das): start request // Note that you can only send, but not handle a response here match request.continue_requests(self) { Ok(_) => { @@ -779,7 +778,6 @@ impl SyncNetworkContext { self.custody_by_root_requests.insert(requester, request); Ok(LookupRequestResult::RequestSent(req_id)) } - // TODO(das): handle this error properly Err(e) => Err(RpcRequestSendError::CustodyRequestError(e)), } } diff --git a/beacon_node/network/src/sync/peer_sampling.rs b/beacon_node/network/src/sync/peer_sampling.rs index 7e725f5df5..289ed73cdd 100644 --- a/beacon_node/network/src/sync/peer_sampling.rs +++ b/beacon_node/network/src/sync/peer_sampling.rs @@ -24,7 +24,6 @@ pub type SamplingResult = Result<(), SamplingError>; type DataColumnSidecarList = Vec>>; pub struct Sampling { - // TODO(das): stalled sampling request are never cleaned up requests: HashMap>, sampling_config: SamplingConfig, log: slog::Logger, @@ -313,8 +312,8 @@ impl ActiveSamplingRequest { .iter() .position(|data| &data.index == column_index) else { - // Peer does not have the requested data. - // TODO(das) what to do? + // Peer does not have the requested data, mark peer as "dont have" and try + // again with a different peer. debug!(self.log, "Sampling peer claims to not have the data"; "block_root" => %self.block_root, @@ -373,7 +372,9 @@ impl ActiveSamplingRequest { sampling_request_id, }, ) { - // TODO(das): Beacon processor is overloaded, what should we do? + // Beacon processor is overloaded, drop sampling attempt. Failing to sample + // is not a permanent state so we should recover once the node has capacity + // and receives a descendant block. error!(self.log, "Dropping sampling"; "block" => %self.block_root, @@ -391,8 +392,8 @@ impl ActiveSamplingRequest { ); metrics::inc_counter_vec(&metrics::SAMPLE_DOWNLOAD_RESULT, &[metrics::FAILURE]); - // Error downloading, maybe penalize peer and retry again. - // TODO(das) with different peer or different peer? + // Error downloading, malicious network errors are already penalized before + // reaching this function. Mark the peer as failed and try again with another. for column_index in column_indexes { let Some(request) = self.column_requests.get_mut(column_index) else { warn!(self.log, @@ -453,7 +454,7 @@ impl ActiveSamplingRequest { debug!(self.log, "Sample verification failure"; "block_root" => %self.block_root, "column_indexes" => ?column_indexes, "reason" => ?err); metrics::inc_counter_vec(&metrics::SAMPLE_VERIFY_RESULT, &[metrics::FAILURE]); - // TODO(das): Peer sent invalid data, penalize and try again from different peer + // Peer sent invalid data, penalize and try again from different peer // TODO(das): Count individual failures for column_index in column_indexes { let Some(request) = self.column_requests.get_mut(column_index) else { diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 34b03a0955..cecfcee868 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -86,6 +86,24 @@ pub fn cli_app() -> Command { .hide(true) .display_order(0) ) + .arg( + Arg::new("blob-publication-batches") + .long("blob-publication-batches") + .action(ArgAction::Set) + .help_heading(FLAG_HEADER) + .help("Number of batches that the node splits blobs or data columns into during publication. This doesn't apply if the node is the block proposer. Used in PeerDAS only.") + .display_order(0) + .hide(true) + ) + .arg( + Arg::new("blob-publication-batch-interval") + .long("blob-publication-batch-interval") + .action(ArgAction::Set) + .help_heading(FLAG_HEADER) + .help("The delay in milliseconds applied by the node between sending each blob or data column batch. This doesn't apply if the node is the block proposer.") + .display_order(0) + .hide(true) + ) .arg( Arg::new("subscribe-all-subnets") .long("subscribe-all-subnets") @@ -675,8 +693,7 @@ pub fn cli_app() -> Command { Arg::new("staking") .long("staking") .help("Standard option for a staking beacon node. This will enable the HTTP server \ - on localhost:5052 and import deposit logs from the execution node. This is \ - equivalent to `--http` on merge-ready networks, or `--http --eth1` pre-merge") + on localhost:5052 and import deposit logs from the execution node.") .action(ArgAction::SetTrue) .help_heading(FLAG_HEADER) .display_order(0) @@ -688,21 +705,21 @@ pub fn cli_app() -> Command { .arg( Arg::new("eth1") .long("eth1") - .help("If present the node will connect to an eth1 node. This is required for \ - block production, you must use this flag if you wish to serve a validator.") + .help("DEPRECATED") .action(ArgAction::SetTrue) .help_heading(FLAG_HEADER) .display_order(0) + .hide(true) ) .arg( Arg::new("dummy-eth1") .long("dummy-eth1") + .help("DEPRECATED") .action(ArgAction::SetTrue) .help_heading(FLAG_HEADER) .conflicts_with("eth1") - .help("If present, uses an eth1 backend that generates static dummy data.\ - Identical to the method used at the 2019 Canada interop.") .display_order(0) + .hide(true) ) .arg( Arg::new("eth1-purge-cache") @@ -738,9 +755,23 @@ pub fn cli_app() -> Command { Arg::new("slots-per-restore-point") .long("slots-per-restore-point") .value_name("SLOT_COUNT") - .help("Specifies how often a freezer DB restore point should be stored. \ - Cannot be changed after initialization. \ - [default: 8192 (mainnet) or 64 (minimal)]") + .help("DEPRECATED. This flag has no effect.") + .action(ArgAction::Set) + .display_order(0) + ) + .arg( + Arg::new("hierarchy-exponents") + .long("hierarchy-exponents") + .value_name("EXPONENTS") + .help("Specifies the frequency for storing full state snapshots and hierarchical \ + diffs in the freezer DB. Accepts a comma-separated list of ascending \ + exponents. Each exponent defines an interval for storing diffs to the layer \ + above. The last exponent defines the interval for full snapshots. \ + For example, a config of '4,8,12' would store a full snapshot every \ + 4096 (2^12) slots, first-level diffs every 256 (2^8) slots, and second-level \ + diffs every 16 (2^4) slots. \ + Cannot be changed after initialization. \ + [default: 5,9,11,13,16,18,21]") .action(ArgAction::Set) .display_order(0) ) @@ -768,11 +799,24 @@ pub fn cli_app() -> Command { Arg::new("historic-state-cache-size") .long("historic-state-cache-size") .value_name("SIZE") - .help("Specifies how many states from the freezer database should cache in memory") + .help("Specifies how many states from the freezer database should be cached in \ + memory") .default_value("1") .action(ArgAction::Set) .display_order(0) ) + .arg( + Arg::new("hdiff-buffer-cache-size") + .long("hdiff-buffer-cache-size") + .value_name("SIZE") + .help("Number of hierarchical diff (hdiff) buffers to cache in memory. Each buffer \ + is around the size of a BeaconState so you should be cautious about setting \ + this value too high. This flag is irrelevant for most nodes, which run with \ + state pruning enabled.") + .default_value("16") + .action(ArgAction::Set) + .display_order(0) + ) .arg( Arg::new("state-cache-size") .long("state-cache-size") @@ -988,7 +1032,6 @@ pub fn cli_app() -> Command { .default_value("0") .display_order(0) ) - /* * Misc. */ @@ -1445,6 +1488,7 @@ pub fn cli_app() -> Command { Useful if you intend to run a non-validating beacon node.") .action(ArgAction::SetTrue) .help_heading(FLAG_HEADER) + .conflicts_with("staking") .display_order(0) ) .arg( diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index ecadee5f47..8d8a44a6fd 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -121,7 +121,6 @@ pub fn get_config( if cli_args.get_flag("staking") { client_config.http_api.enabled = true; - client_config.sync_eth1_chain = true; } /* @@ -192,6 +191,15 @@ pub fn get_config( client_config.chain.enable_sampling = true; } + if let Some(batches) = clap_utils::parse_optional(cli_args, "blob-publication-batches")? { + client_config.chain.blob_publication_batches = batches; + } + + if let Some(interval) = clap_utils::parse_optional(cli_args, "blob-publication-batch-interval")? + { + client_config.chain.blob_publication_batch_interval = Duration::from_millis(interval); + } + /* * Prometheus metrics HTTP server */ @@ -254,18 +262,12 @@ pub fn get_config( * Eth1 */ - // When present, use an eth1 backend that generates deterministic junk. - // - // Useful for running testnets without the overhead of a deposit contract. if cli_args.get_flag("dummy-eth1") { - client_config.dummy_eth1_backend = true; + warn!(log, "The --dummy-eth1 flag is deprecated"); } - // When present, attempt to sync to an eth1 node. - // - // Required for block production. if cli_args.get_flag("eth1") { - client_config.sync_eth1_chain = true; + warn!(log, "The --eth1 flag is deprecated"); } if let Some(val) = cli_args.get_one::("eth1-blocks-per-log-query") { @@ -288,17 +290,6 @@ pub fn get_config( let endpoints: String = clap_utils::parse_required(cli_args, "execution-endpoint")?; let mut el_config = execution_layer::Config::default(); - // Always follow the deposit contract when there is an execution endpoint. - // - // This is wasteful for non-staking nodes as they have no need to process deposit contract - // logs and build an "eth1" cache. The alternative is to explicitly require the `--eth1` or - // `--staking` flags, however that poses a risk to stakers since they cannot produce blocks - // without "eth1". - // - // The waste for non-staking nodes is relatively small so we err on the side of safety for - // stakers. The merge is already complicated enough. - client_config.sync_eth1_chain = true; - // Parse a single execution endpoint, logging warnings if multiple endpoints are supplied. let execution_endpoint = parse_only_one_value( endpoints.as_str(), @@ -402,13 +393,6 @@ pub fn get_config( client_config.blobs_db_path = Some(PathBuf::from(blobs_db_dir)); } - let (sprp, sprp_explicit) = get_slots_per_restore_point::(clap_utils::parse_optional( - cli_args, - "slots-per-restore-point", - )?)?; - client_config.store.slots_per_restore_point = sprp; - client_config.store.slots_per_restore_point_set_explicitly = sprp_explicit; - if let Some(block_cache_size) = cli_args.get_one::("block-cache-size") { client_config.store.block_cache_size = block_cache_size .parse() @@ -421,11 +405,16 @@ pub fn get_config( .map_err(|_| "state-cache-size is not a valid integer".to_string())?; } - if let Some(historic_state_cache_size) = cli_args.get_one::("historic-state-cache-size") + if let Some(historic_state_cache_size) = + clap_utils::parse_optional(cli_args, "historic-state-cache-size")? { - client_config.store.historic_state_cache_size = historic_state_cache_size - .parse() - .map_err(|_| "historic-state-cache-size is not a valid integer".to_string())?; + client_config.store.historic_state_cache_size = historic_state_cache_size; + } + + if let Some(hdiff_buffer_cache_size) = + clap_utils::parse_optional(cli_args, "hdiff-buffer-cache-size")? + { + client_config.store.hdiff_buffer_cache_size = hdiff_buffer_cache_size; } client_config.store.compact_on_init = cli_args.get_flag("compact-db"); @@ -439,6 +428,14 @@ pub fn get_config( client_config.store.prune_payloads = prune_payloads; } + if clap_utils::parse_optional::(cli_args, "slots-per-restore-point")?.is_some() { + warn!(log, "The slots-per-restore-point flag is deprecated"); + } + + if let Some(hierarchy_config) = clap_utils::parse_optional(cli_args, "hierarchy-exponents")? { + client_config.store.hierarchy_config = hierarchy_config; + } + if let Some(epochs_per_migration) = clap_utils::parse_optional(cli_args, "epochs-per-migration")? { @@ -1486,23 +1483,6 @@ pub fn get_data_dir(cli_args: &ArgMatches) -> PathBuf { .unwrap_or_else(|| PathBuf::from(".")) } -/// Get the `slots_per_restore_point` value to use for the database. -/// -/// Return `(sprp, set_explicitly)` where `set_explicitly` is `true` if the user provided the value. -pub fn get_slots_per_restore_point( - slots_per_restore_point: Option, -) -> Result<(u64, bool), String> { - if let Some(slots_per_restore_point) = slots_per_restore_point { - Ok((slots_per_restore_point, true)) - } else { - let default = std::cmp::min( - E::slots_per_historical_root() as u64, - store::config::DEFAULT_SLOTS_PER_RESTORE_POINT, - ); - Ok((default, false)) - } -} - /// Parses the `cli_value` as a comma-separated string of values to be parsed with `parser`. /// /// If there is more than one value, log a warning. If there are no values, return an error. diff --git a/beacon_node/src/lib.rs b/beacon_node/src/lib.rs index 5bc0f9dc6a..cca617d8c6 100644 --- a/beacon_node/src/lib.rs +++ b/beacon_node/src/lib.rs @@ -9,7 +9,7 @@ use beacon_chain::{ use clap::ArgMatches; pub use cli::cli_app; pub use client::{Client, ClientBuilder, ClientConfig, ClientGenesis}; -pub use config::{get_config, get_data_dir, get_slots_per_restore_point, set_network_config}; +pub use config::{get_config, get_data_dir, set_network_config}; use environment::RuntimeContext; pub use eth2_config::Eth2Config; use slasher::{DatabaseBackendOverride, Slasher}; @@ -140,7 +140,7 @@ impl ProductionBeaconNode { let builder = builder .beacon_chain_builder(client_genesis, client_config.clone()) .await?; - let builder = if client_config.sync_eth1_chain && !client_config.dummy_eth1_backend { + let builder = if client_config.sync_eth1_chain { info!( log, "Block production enabled"; @@ -150,13 +150,6 @@ impl ProductionBeaconNode { builder .caching_eth1_backend(client_config.eth1.clone()) .await? - } else if client_config.dummy_eth1_backend { - warn!( - log, - "Block production impaired"; - "reason" => "dummy eth1 backend is enabled" - ); - builder.dummy_eth1_backend()? } else { info!( log, diff --git a/beacon_node/store/Cargo.toml b/beacon_node/store/Cargo.toml index aac1ee26e1..7cee16c353 100644 --- a/beacon_node/store/Cargo.toml +++ b/beacon_node/store/Cargo.toml @@ -7,6 +7,8 @@ edition = { workspace = true } [dev-dependencies] tempfile = { workspace = true } beacon_chain = { workspace = true } +criterion = { workspace = true } +rand = { workspace = true, features = ["small_rng"] } [dependencies] db-key = "0.0.5" @@ -15,6 +17,7 @@ parking_lot = { workspace = true } itertools = { workspace = true } ethereum_ssz = { workspace = true } ethereum_ssz_derive = { workspace = true } +superstruct = { workspace = true } types = { workspace = true } safe_arith = { workspace = true } state_processing = { workspace = true } @@ -25,3 +28,12 @@ lru = { workspace = true } sloggers = { workspace = true } directory = { workspace = true } strum = { workspace = true } +xdelta3 = { workspace = true } +zstd = { workspace = true } +bls = { workspace = true } +smallvec = { workspace = true } +logging = { workspace = true } + +[[bench]] +name = "hdiff" +harness = false diff --git a/beacon_node/store/benches/hdiff.rs b/beacon_node/store/benches/hdiff.rs new file mode 100644 index 0000000000..2577f03f66 --- /dev/null +++ b/beacon_node/store/benches/hdiff.rs @@ -0,0 +1,116 @@ +use bls::PublicKeyBytes; +use criterion::{criterion_group, criterion_main, Criterion}; +use rand::Rng; +use ssz::Decode; +use store::{ + hdiff::{HDiff, HDiffBuffer}, + StoreConfig, +}; +use types::{BeaconState, Epoch, Eth1Data, EthSpec, MainnetEthSpec as E, Validator}; + +pub fn all_benches(c: &mut Criterion) { + let spec = E::default_spec(); + let genesis_time = 0; + let eth1_data = Eth1Data::default(); + let mut rng = rand::thread_rng(); + let validator_mutations = 1000; + let validator_additions = 100; + + for n in [1_000_000, 1_500_000, 2_000_000] { + let mut source_state = BeaconState::::new(genesis_time, eth1_data.clone(), &spec); + + for _ in 0..n { + append_validator(&mut source_state, &mut rng); + } + + let mut target_state = source_state.clone(); + // Change all balances + for i in 0..n { + let balance = target_state.balances_mut().get_mut(i).unwrap(); + *balance += rng.gen_range(1..=1_000_000); + } + // And some validator records + for _ in 0..validator_mutations { + let index = rng.gen_range(1..n); + // TODO: Only change a few things, and not the pubkey + *target_state.validators_mut().get_mut(index).unwrap() = rand_validator(&mut rng); + } + for _ in 0..validator_additions { + append_validator(&mut target_state, &mut rng); + } + + bench_against_states( + c, + source_state, + target_state, + &format!("n={n} v_mut={validator_mutations} v_add={validator_additions}"), + ); + } +} + +fn bench_against_states( + c: &mut Criterion, + source_state: BeaconState, + target_state: BeaconState, + id: &str, +) { + let slot_diff = target_state.slot() - source_state.slot(); + let config = StoreConfig::default(); + let source = HDiffBuffer::from_state(source_state); + let target = HDiffBuffer::from_state(target_state); + let diff = HDiff::compute(&source, &target, &config).unwrap(); + println!( + "state slot diff {slot_diff} - diff size {id} {}", + diff.size() + ); + + c.bench_function(&format!("compute hdiff {id}"), |b| { + b.iter(|| { + HDiff::compute(&source, &target, &config).unwrap(); + }) + }); + c.bench_function(&format!("apply hdiff {id}"), |b| { + b.iter(|| { + let mut source = source.clone(); + diff.apply(&mut source, &config).unwrap(); + }) + }); +} + +fn rand_validator(mut rng: impl Rng) -> Validator { + let mut pubkey = [0u8; 48]; + rng.fill_bytes(&mut pubkey); + let withdrawal_credentials: [u8; 32] = rng.gen(); + + Validator { + pubkey: PublicKeyBytes::from_ssz_bytes(&pubkey).unwrap(), + withdrawal_credentials: withdrawal_credentials.into(), + slashed: false, + effective_balance: 32_000_000_000, + activation_eligibility_epoch: Epoch::max_value(), + activation_epoch: Epoch::max_value(), + exit_epoch: Epoch::max_value(), + withdrawable_epoch: Epoch::max_value(), + } +} + +fn append_validator(state: &mut BeaconState, mut rng: impl Rng) { + state + .balances_mut() + .push(32_000_000_000 + rng.gen_range(1..=1_000_000_000)) + .unwrap(); + if let Ok(inactivity_scores) = state.inactivity_scores_mut() { + inactivity_scores.push(0).unwrap(); + } + state + .validators_mut() + .push(rand_validator(&mut rng)) + .unwrap(); +} + +criterion_group! { + name = benches; + config = Criterion::default().sample_size(10); + targets = all_benches +} +criterion_main!(benches); diff --git a/beacon_node/store/src/chunk_writer.rs b/beacon_node/store/src/chunk_writer.rs deleted file mode 100644 index 059b812e74..0000000000 --- a/beacon_node/store/src/chunk_writer.rs +++ /dev/null @@ -1,75 +0,0 @@ -use crate::chunked_vector::{chunk_key, Chunk, ChunkError, Field}; -use crate::{Error, KeyValueStore, KeyValueStoreOp}; -use types::EthSpec; - -/// Buffered writer for chunked vectors (block roots mainly). -pub struct ChunkWriter<'a, F, E, S> -where - F: Field, - E: EthSpec, - S: KeyValueStore, -{ - /// Buffered chunk awaiting writing to disk (always dirty). - chunk: Chunk, - /// Chunk index of `chunk`. - index: usize, - store: &'a S, -} - -impl<'a, F, E, S> ChunkWriter<'a, F, E, S> -where - F: Field, - E: EthSpec, - S: KeyValueStore, -{ - pub fn new(store: &'a S, vindex: usize) -> Result { - let chunk_index = F::chunk_index(vindex); - let chunk = Chunk::load(store, F::column(), &chunk_key(chunk_index))? - .unwrap_or_else(|| Chunk::new(vec![F::Value::default(); F::chunk_size()])); - - Ok(Self { - chunk, - index: chunk_index, - store, - }) - } - - /// Set the value at a given vector index, writing the current chunk and moving on if necessary. - pub fn set( - &mut self, - vindex: usize, - value: F::Value, - batch: &mut Vec, - ) -> Result<(), Error> { - let chunk_index = F::chunk_index(vindex); - - // Advance to the next chunk. - if chunk_index != self.index { - self.write(batch)?; - *self = Self::new(self.store, vindex)?; - } - - let i = vindex % F::chunk_size(); - let existing_value = &self.chunk.values[i]; - - if existing_value == &value || existing_value == &F::Value::default() { - self.chunk.values[i] = value; - Ok(()) - } else { - Err(ChunkError::Inconsistent { - field: F::column(), - chunk_index, - existing_value: format!("{:?}", existing_value), - new_value: format!("{:?}", value), - } - .into()) - } - } - - /// Write the current chunk to disk. - /// - /// Should be called before the writer is dropped, in order to write the final chunk to disk. - pub fn write(&self, batch: &mut Vec) -> Result<(), Error> { - self.chunk.store(F::column(), &chunk_key(self.index), batch) - } -} diff --git a/beacon_node/store/src/chunked_vector.rs b/beacon_node/store/src/chunked_vector.rs index 4450989d59..83b8da2a18 100644 --- a/beacon_node/store/src/chunked_vector.rs +++ b/beacon_node/store/src/chunked_vector.rs @@ -322,11 +322,11 @@ macro_rules! field { } field!( - BlockRoots, + BlockRootsChunked, FixedLengthField, Hash256, E::SlotsPerHistoricalRoot, - DBColumn::BeaconBlockRoots, + DBColumn::BeaconBlockRootsChunked, |_| OncePerNSlots { n: 1, activation_slot: Some(Slot::new(0)), @@ -336,11 +336,11 @@ field!( ); field!( - StateRoots, + StateRootsChunked, FixedLengthField, Hash256, E::SlotsPerHistoricalRoot, - DBColumn::BeaconStateRoots, + DBColumn::BeaconStateRootsChunked, |_| OncePerNSlots { n: 1, activation_slot: Some(Slot::new(0)), @@ -859,8 +859,8 @@ mod test { fn test_fixed_length>(_: F, expected: bool) { assert_eq!(F::is_fixed_length(), expected); } - test_fixed_length(BlockRoots, true); - test_fixed_length(StateRoots, true); + test_fixed_length(BlockRootsChunked, true); + test_fixed_length(StateRootsChunked, true); test_fixed_length(HistoricalRoots, false); test_fixed_length(RandaoMixes, true); } @@ -880,12 +880,12 @@ mod test { #[test] fn needs_genesis_value_block_roots() { - needs_genesis_value_once_per_slot(BlockRoots); + needs_genesis_value_once_per_slot(BlockRootsChunked); } #[test] fn needs_genesis_value_state_roots() { - needs_genesis_value_once_per_slot(StateRoots); + needs_genesis_value_once_per_slot(StateRootsChunked); } #[test] diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index d43999d822..4f67530570 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -1,38 +1,47 @@ -use crate::{DBColumn, Error, StoreItem}; +use crate::hdiff::HierarchyConfig; +use crate::{AnchorInfo, DBColumn, Error, Split, StoreItem}; use serde::{Deserialize, Serialize}; use ssz::{Decode, Encode}; use ssz_derive::{Decode, Encode}; +use std::io::Write; use std::num::NonZeroUsize; +use superstruct::superstruct; use types::non_zero_usize::new_non_zero_usize; -use types::{EthSpec, MinimalEthSpec}; +use types::EthSpec; +use zstd::Encoder; -pub const PREV_DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 2048; -pub const DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 8192; -pub const DEFAULT_BLOCK_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(5); +// Only used in tests. Mainnet sets a higher default on the CLI. +pub const DEFAULT_EPOCHS_PER_STATE_DIFF: u64 = 8; +pub const DEFAULT_BLOCK_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(64); pub const DEFAULT_STATE_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(128); +pub const DEFAULT_COMPRESSION_LEVEL: i32 = 1; pub const DEFAULT_HISTORIC_STATE_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(1); +pub const DEFAULT_HDIFF_BUFFER_CACHE_SIZE: NonZeroUsize = new_non_zero_usize(16); +const EST_COMPRESSION_FACTOR: usize = 2; pub const DEFAULT_EPOCHS_PER_BLOB_PRUNE: u64 = 1; pub const DEFAULT_BLOB_PUNE_MARGIN_EPOCHS: u64 = 0; /// Database configuration parameters. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct StoreConfig { - /// Number of slots to wait between storing restore points in the freezer database. - pub slots_per_restore_point: u64, - /// Flag indicating whether the `slots_per_restore_point` was set explicitly by the user. - pub slots_per_restore_point_set_explicitly: bool, /// Maximum number of blocks to store in the in-memory block cache. pub block_cache_size: NonZeroUsize, /// Maximum number of states to store in the in-memory state cache. pub state_cache_size: NonZeroUsize, - /// Maximum number of states from freezer database to store in the in-memory state cache. + /// Compression level for blocks, state diffs and other compressed values. + pub compression_level: i32, + /// Maximum number of historic states to store in the in-memory historic state cache. pub historic_state_cache_size: NonZeroUsize, + /// Maximum number of `HDiffBuffer`s to store in memory. + pub hdiff_buffer_cache_size: NonZeroUsize, /// Whether to compact the database on initialization. pub compact_on_init: bool, /// Whether to compact the database during database pruning. pub compact_on_prune: bool, /// Whether to prune payloads on initialization and finalization. pub prune_payloads: bool, + /// State diff hierarchy. + pub hierarchy_config: HierarchyConfig, /// Whether to prune blobs older than the blob data availability boundary. pub prune_blobs: bool, /// Frequency of blob pruning in epochs. Default: 1 (every epoch). @@ -43,28 +52,59 @@ pub struct StoreConfig { } /// Variant of `StoreConfig` that gets written to disk. Contains immutable configuration params. -#[derive(Debug, Clone, PartialEq, Eq, Encode, Decode)] +#[superstruct( + variants(V1, V22), + variant_attributes(derive(Debug, Clone, PartialEq, Eq, Encode, Decode)) +)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct OnDiskStoreConfig { + #[superstruct(only(V1))] pub slots_per_restore_point: u64, + /// Prefix byte to future-proof versions of the `OnDiskStoreConfig` post V1 + #[superstruct(only(V22))] + version_byte: u8, + #[superstruct(only(V22))] + pub hierarchy_config: HierarchyConfig, +} + +impl OnDiskStoreConfigV22 { + fn new(hierarchy_config: HierarchyConfig) -> Self { + Self { + version_byte: 22, + hierarchy_config, + } + } } #[derive(Debug, Clone)] pub enum StoreConfigError { - MismatchedSlotsPerRestorePoint { config: u64, on_disk: u64 }, + MismatchedSlotsPerRestorePoint { + config: u64, + on_disk: u64, + }, + InvalidCompressionLevel { + level: i32, + }, + IncompatibleStoreConfig { + config: OnDiskStoreConfig, + on_disk: OnDiskStoreConfig, + }, + ZeroEpochsPerBlobPrune, + InvalidVersionByte(Option), } impl Default for StoreConfig { fn default() -> Self { Self { - // Safe default for tests, shouldn't ever be read by a CLI node. - slots_per_restore_point: MinimalEthSpec::slots_per_historical_root() as u64, - slots_per_restore_point_set_explicitly: false, block_cache_size: DEFAULT_BLOCK_CACHE_SIZE, state_cache_size: DEFAULT_STATE_CACHE_SIZE, historic_state_cache_size: DEFAULT_HISTORIC_STATE_CACHE_SIZE, + hdiff_buffer_cache_size: DEFAULT_HDIFF_BUFFER_CACHE_SIZE, + compression_level: DEFAULT_COMPRESSION_LEVEL, compact_on_init: false, compact_on_prune: true, prune_payloads: true, + hierarchy_config: HierarchyConfig::default(), prune_blobs: true, epochs_per_blob_prune: DEFAULT_EPOCHS_PER_BLOB_PRUNE, blob_prune_margin_epochs: DEFAULT_BLOB_PUNE_MARGIN_EPOCHS, @@ -74,22 +114,90 @@ impl Default for StoreConfig { impl StoreConfig { pub fn as_disk_config(&self) -> OnDiskStoreConfig { - OnDiskStoreConfig { - slots_per_restore_point: self.slots_per_restore_point, - } + OnDiskStoreConfig::V22(OnDiskStoreConfigV22::new(self.hierarchy_config.clone())) } pub fn check_compatibility( &self, on_disk_config: &OnDiskStoreConfig, + split: &Split, + anchor: &AnchorInfo, ) -> Result<(), StoreConfigError> { - if self.slots_per_restore_point != on_disk_config.slots_per_restore_point { - return Err(StoreConfigError::MismatchedSlotsPerRestorePoint { - config: self.slots_per_restore_point, - on_disk: on_disk_config.slots_per_restore_point, - }); + // Allow changing the hierarchy exponents if no historic states are stored. + let no_historic_states_stored = anchor.no_historic_states_stored(split.slot); + let hierarchy_config_changed = + if let Ok(on_disk_hierarchy_config) = on_disk_config.hierarchy_config() { + *on_disk_hierarchy_config != self.hierarchy_config + } else { + false + }; + + if hierarchy_config_changed && !no_historic_states_stored { + Err(StoreConfigError::IncompatibleStoreConfig { + config: self.as_disk_config(), + on_disk: on_disk_config.clone(), + }) + } else { + Ok(()) } - Ok(()) + } + + /// Check that the configuration is valid. + pub fn verify(&self) -> Result<(), StoreConfigError> { + self.verify_compression_level()?; + self.verify_epochs_per_blob_prune() + } + + /// Check that the compression level is valid. + fn verify_compression_level(&self) -> Result<(), StoreConfigError> { + if zstd::compression_level_range().contains(&self.compression_level) { + Ok(()) + } else { + Err(StoreConfigError::InvalidCompressionLevel { + level: self.compression_level, + }) + } + } + + /// Check that epochs_per_blob_prune is at least 1 epoch to avoid attempting to prune the same + /// epochs over and over again. + fn verify_epochs_per_blob_prune(&self) -> Result<(), StoreConfigError> { + if self.epochs_per_blob_prune > 0 { + Ok(()) + } else { + Err(StoreConfigError::ZeroEpochsPerBlobPrune) + } + } + + /// Estimate the size of `len` bytes after compression at the current compression level. + pub fn estimate_compressed_size(&self, len: usize) -> usize { + // This is a rough estimate, but for our data it seems that all non-zero compression levels + // provide a similar compression ratio. + if self.compression_level == 0 { + len + } else { + len / EST_COMPRESSION_FACTOR + } + } + + /// Estimate the size of `len` compressed bytes after decompression at the current compression + /// level. + pub fn estimate_decompressed_size(&self, len: usize) -> usize { + if self.compression_level == 0 { + len + } else { + len * EST_COMPRESSION_FACTOR + } + } + + pub fn compress_bytes(&self, ssz_bytes: &[u8]) -> Result, Error> { + let mut compressed_value = + Vec::with_capacity(self.estimate_compressed_size(ssz_bytes.len())); + let mut encoder = Encoder::new(&mut compressed_value, self.compression_level) + .map_err(Error::Compression)?; + encoder.write_all(ssz_bytes).map_err(Error::Compression)?; + encoder.finish().map_err(Error::Compression)?; + Ok(compressed_value) } } @@ -99,10 +207,136 @@ impl StoreItem for OnDiskStoreConfig { } fn as_store_bytes(&self) -> Vec { - self.as_ssz_bytes() + match self { + OnDiskStoreConfig::V1(value) => value.as_ssz_bytes(), + OnDiskStoreConfig::V22(value) => value.as_ssz_bytes(), + } } fn from_store_bytes(bytes: &[u8]) -> Result { - Ok(Self::from_ssz_bytes(bytes)?) + // NOTE: V22 config can never be deserialized as a V1 because the minimum length of its + // serialization is: 1 prefix byte + 1 offset (OnDiskStoreConfigV1 container) + + // 1 offset (HierarchyConfig container) = 9. + if let Ok(value) = OnDiskStoreConfigV1::from_ssz_bytes(bytes) { + return Ok(Self::V1(value)); + } + + Ok(Self::V22(OnDiskStoreConfigV22::from_ssz_bytes(bytes)?)) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + metadata::{ANCHOR_FOR_ARCHIVE_NODE, ANCHOR_UNINITIALIZED, STATE_UPPER_LIMIT_NO_RETAIN}, + AnchorInfo, Split, + }; + use ssz::DecodeError; + use types::{Hash256, Slot}; + + #[test] + fn check_compatibility_ok() { + let store_config = StoreConfig { + ..Default::default() + }; + let on_disk_config = OnDiskStoreConfig::V22(OnDiskStoreConfigV22::new( + store_config.hierarchy_config.clone(), + )); + let split = Split::default(); + assert!(store_config + .check_compatibility(&on_disk_config, &split, &ANCHOR_UNINITIALIZED) + .is_ok()); + } + + #[test] + fn check_compatibility_after_migration() { + let store_config = StoreConfig { + ..Default::default() + }; + let on_disk_config = OnDiskStoreConfig::V1(OnDiskStoreConfigV1 { + slots_per_restore_point: 8192, + }); + let split = Split::default(); + assert!(store_config + .check_compatibility(&on_disk_config, &split, &ANCHOR_UNINITIALIZED) + .is_ok()); + } + + #[test] + fn check_compatibility_hierarchy_config_incompatible() { + let store_config = StoreConfig::default(); + let on_disk_config = OnDiskStoreConfig::V22(OnDiskStoreConfigV22::new(HierarchyConfig { + exponents: vec![5, 8, 11, 13, 16, 18, 21], + })); + let split = Split { + slot: Slot::new(32), + ..Default::default() + }; + assert!(store_config + .check_compatibility(&on_disk_config, &split, &ANCHOR_FOR_ARCHIVE_NODE) + .is_err()); + } + + #[test] + fn check_compatibility_hierarchy_config_update() { + let store_config = StoreConfig { + ..Default::default() + }; + let on_disk_config = OnDiskStoreConfig::V22(OnDiskStoreConfigV22::new(HierarchyConfig { + exponents: vec![5, 8, 11, 13, 16, 18, 21], + })); + let split = Split::default(); + let anchor = AnchorInfo { + anchor_slot: Slot::new(0), + oldest_block_slot: Slot::new(0), + oldest_block_parent: Hash256::ZERO, + state_upper_limit: STATE_UPPER_LIMIT_NO_RETAIN, + state_lower_limit: Slot::new(0), + }; + assert!(store_config + .check_compatibility(&on_disk_config, &split, &anchor) + .is_ok()); + } + + #[test] + fn serde_on_disk_config_v0_from_v1_default() { + let config = OnDiskStoreConfig::V22(OnDiskStoreConfigV22::new(<_>::default())); + let config_bytes = config.as_store_bytes(); + // On a downgrade, the previous version of lighthouse will attempt to deserialize the + // prefixed V22 as just the V1 version. + assert_eq!( + OnDiskStoreConfigV1::from_ssz_bytes(&config_bytes).unwrap_err(), + DecodeError::InvalidByteLength { + len: 16, + expected: 8 + }, + ); + } + + #[test] + fn serde_on_disk_config_v0_from_v1_empty() { + let config = OnDiskStoreConfig::V22(OnDiskStoreConfigV22::new(HierarchyConfig { + exponents: vec![], + })); + let config_bytes = config.as_store_bytes(); + // On a downgrade, the previous version of lighthouse will attempt to deserialize the + // prefixed V22 as just the V1 version. + assert_eq!( + OnDiskStoreConfigV1::from_ssz_bytes(&config_bytes).unwrap_err(), + DecodeError::InvalidByteLength { + len: 9, + expected: 8 + }, + ); + } + + #[test] + fn serde_on_disk_config_v1_roundtrip() { + let config = OnDiskStoreConfig::V22(OnDiskStoreConfigV22::new(<_>::default())); + let bytes = config.as_store_bytes(); + assert_eq!(bytes[0], 22); + let config_out = OnDiskStoreConfig::from_store_bytes(&bytes).unwrap(); + assert_eq!(config_out, config); } } diff --git a/beacon_node/store/src/errors.rs b/beacon_node/store/src/errors.rs index c543a9c4e4..6bb4edee6b 100644 --- a/beacon_node/store/src/errors.rs +++ b/beacon_node/store/src/errors.rs @@ -1,9 +1,10 @@ use crate::chunked_vector::ChunkError; use crate::config::StoreConfigError; use crate::hot_cold_store::HotColdDBError; +use crate::{hdiff, DBColumn}; use ssz::DecodeError; use state_processing::BlockReplayError; -use types::{BeaconStateError, EpochCacheError, Hash256, InconsistentFork, Slot}; +use types::{milhouse, BeaconStateError, EpochCacheError, Hash256, InconsistentFork, Slot}; pub type Result = std::result::Result; @@ -38,27 +39,35 @@ pub enum Error { /// State reconstruction failed because it didn't reach the upper limit slot. /// /// This should never happen (it's a logic error). - StateReconstructionDidNotComplete, + StateReconstructionLogicError, StateReconstructionRootMismatch { slot: Slot, expected: Hash256, computed: Hash256, }, + MissingGenesisState, + MissingSnapshot(Slot), BlockReplayError(BlockReplayError), - AddPayloadLogicError, - SlotClockUnavailableForMigration, - InvalidKey, - InvalidBytes, - UnableToDowngrade, - InconsistentFork(InconsistentFork), - CacheBuildError(EpochCacheError), - RandaoMixOutOfBounds, + MilhouseError(milhouse::Error), + Compression(std::io::Error), FinalizedStateDecreasingSlot, FinalizedStateUnaligned, StateForCacheHasPendingUpdates { state_root: Hash256, slot: Slot, }, + AddPayloadLogicError, + InvalidKey, + InvalidBytes, + InconsistentFork(InconsistentFork), + Hdiff(hdiff::Error), + CacheBuildError(EpochCacheError), + ForwardsIterInvalidColumn(DBColumn), + ForwardsIterGap(DBColumn, Slot, Slot), + StateShouldNotBeRequired(Slot), + MissingBlock(Hash256), + RandaoMixOutOfBounds, + GenesisStateUnknown, ArithError(safe_arith::ArithError), } @@ -112,6 +121,18 @@ impl From for Error { } } +impl From for Error { + fn from(e: milhouse::Error) -> Self { + Self::MilhouseError(e) + } +} + +impl From for Error { + fn from(e: hdiff::Error) -> Self { + Self::Hdiff(e) + } +} + impl From for Error { fn from(e: BlockReplayError) -> Error { Error::BlockReplayError(e) diff --git a/beacon_node/store/src/forwards_iter.rs b/beacon_node/store/src/forwards_iter.rs index 1ccf1da1b7..e0f44f3aff 100644 --- a/beacon_node/store/src/forwards_iter.rs +++ b/beacon_node/store/src/forwards_iter.rs @@ -1,37 +1,34 @@ -use crate::chunked_iter::ChunkedVectorIter; -use crate::chunked_vector::{BlockRoots, Field, StateRoots}; use crate::errors::{Error, Result}; use crate::iter::{BlockRootsIterator, StateRootsIterator}; -use crate::{HotColdDB, ItemStore}; +use crate::{ColumnIter, DBColumn, HotColdDB, ItemStore}; use itertools::process_results; -use types::{BeaconState, ChainSpec, EthSpec, Hash256, Slot}; +use std::marker::PhantomData; +use types::{BeaconState, EthSpec, Hash256, Slot}; pub type HybridForwardsBlockRootsIterator<'a, E, Hot, Cold> = - HybridForwardsIterator<'a, E, BlockRoots, Hot, Cold>; + HybridForwardsIterator<'a, E, Hot, Cold>; pub type HybridForwardsStateRootsIterator<'a, E, Hot, Cold> = - HybridForwardsIterator<'a, E, StateRoots, Hot, Cold>; + HybridForwardsIterator<'a, E, Hot, Cold>; -/// Trait unifying `BlockRoots` and `StateRoots` for forward iteration. -pub trait Root: Field { - fn simple_forwards_iterator, Cold: ItemStore>( - store: &HotColdDB, +impl, Cold: ItemStore> HotColdDB { + fn simple_forwards_iterator( + &self, + column: DBColumn, start_slot: Slot, end_state: BeaconState, end_root: Hash256, - ) -> Result; + ) -> Result { + if column == DBColumn::BeaconBlockRoots { + self.forwards_iter_block_roots_using_state(start_slot, end_state, end_root) + } else if column == DBColumn::BeaconStateRoots { + self.forwards_iter_state_roots_using_state(start_slot, end_state, end_root) + } else { + Err(Error::ForwardsIterInvalidColumn(column)) + } + } - /// The first slot for which this field is *no longer* stored in the freezer database. - /// - /// If `None`, then this field is not stored in the freezer database at all due to pruning - /// configuration. - fn freezer_upper_limit, Cold: ItemStore>( - store: &HotColdDB, - ) -> Option; -} - -impl Root for BlockRoots { - fn simple_forwards_iterator, Cold: ItemStore>( - store: &HotColdDB, + fn forwards_iter_block_roots_using_state( + &self, start_slot: Slot, end_state: BeaconState, end_block_root: Hash256, @@ -39,7 +36,7 @@ impl Root for BlockRoots { // Iterate backwards from the end state, stopping at the start slot. let values = process_results( std::iter::once(Ok((end_block_root, end_state.slot()))) - .chain(BlockRootsIterator::owned(store, end_state)), + .chain(BlockRootsIterator::owned(self, end_state)), |iter| { iter.take_while(|(_, slot)| *slot >= start_slot) .collect::>() @@ -48,17 +45,8 @@ impl Root for BlockRoots { Ok(SimpleForwardsIterator { values }) } - fn freezer_upper_limit, Cold: ItemStore>( - store: &HotColdDB, - ) -> Option { - // Block roots are stored for all slots up to the split slot (exclusive). - Some(store.get_split_slot()) - } -} - -impl Root for StateRoots { - fn simple_forwards_iterator, Cold: ItemStore>( - store: &HotColdDB, + fn forwards_iter_state_roots_using_state( + &self, start_slot: Slot, end_state: BeaconState, end_state_root: Hash256, @@ -66,7 +54,7 @@ impl Root for StateRoots { // Iterate backwards from the end state, stopping at the start slot. let values = process_results( std::iter::once(Ok((end_state_root, end_state.slot()))) - .chain(StateRootsIterator::owned(store, end_state)), + .chain(StateRootsIterator::owned(self, end_state)), |iter| { iter.take_while(|(_, slot)| *slot >= start_slot) .collect::>() @@ -75,51 +63,123 @@ impl Root for StateRoots { Ok(SimpleForwardsIterator { values }) } - fn freezer_upper_limit, Cold: ItemStore>( - store: &HotColdDB, - ) -> Option { - // State roots are stored for all slots up to the latest restore point (exclusive). - // There may not be a latest restore point if state pruning is enabled, in which - // case this function will return `None`. - store.get_latest_restore_point_slot() - } -} - -/// Forwards root iterator that makes use of a flat field table in the freezer DB. -pub struct FrozenForwardsIterator<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> -{ - inner: ChunkedVectorIter<'a, F, E, Hot, Cold>, -} - -impl<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> - FrozenForwardsIterator<'a, E, F, Hot, Cold> -{ - pub fn new( - store: &'a HotColdDB, + /// Values in `column` are available in the range `start_slot..upper_bound`. + /// + /// If `None` is returned then no values are available from `start_slot` due to pruning or + /// incomplete backfill. + pub fn freezer_upper_bound_for_column( + &self, + column: DBColumn, start_slot: Slot, - last_restore_point_slot: Slot, - spec: &ChainSpec, - ) -> Self { - Self { - inner: ChunkedVectorIter::new( - store, - start_slot.as_usize(), - last_restore_point_slot, - spec, - ), + ) -> Result> { + if column == DBColumn::BeaconBlockRoots { + Ok(self.freezer_upper_bound_for_block_roots(start_slot)) + } else if column == DBColumn::BeaconStateRoots { + Ok(self.freezer_upper_bound_for_state_roots(start_slot)) + } else { + Err(Error::ForwardsIterInvalidColumn(column)) + } + } + + fn freezer_upper_bound_for_block_roots(&self, start_slot: Slot) -> Option { + let oldest_block_slot = self.get_oldest_block_slot(); + if start_slot < oldest_block_slot { + if start_slot == 0 { + // Slot 0 block root is always available. + Some(Slot::new(1)) + // Non-zero block roots are not available prior to the `oldest_block_slot`. + } else { + None + } + } else { + // Block roots are stored for all slots up to the split slot (exclusive). + Some(self.get_split_slot()) + } + } + + fn freezer_upper_bound_for_state_roots(&self, start_slot: Slot) -> Option { + let split_slot = self.get_split_slot(); + let anchor = self.get_anchor_info(); + + if start_slot >= anchor.state_upper_limit { + // Starting slot is after the upper limit, so the split is the upper limit. + // The split state's root is not available in the freezer so this is exclusive. + Some(split_slot) + } else if start_slot <= anchor.state_lower_limit { + // Starting slot is prior to lower limit, so that's the upper limit. We can't + // iterate past the lower limit into the gap. The +1 accounts for exclusivity. + Some(anchor.state_lower_limit + 1) + } else { + // In the gap, nothing is available. + None } } } -impl<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> Iterator - for FrozenForwardsIterator<'a, E, F, Hot, Cold> +/// Forwards root iterator that makes use of a slot -> root mapping in the freezer DB. +pub struct FrozenForwardsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> { + inner: ColumnIter<'a, Vec>, + column: DBColumn, + next_slot: Slot, + end_slot: Slot, + _phantom: PhantomData<(E, Hot, Cold)>, +} + +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> + FrozenForwardsIterator<'a, E, Hot, Cold> { - type Item = (Hash256, Slot); + /// `end_slot` is EXCLUSIVE here. + pub fn new( + store: &'a HotColdDB, + column: DBColumn, + start_slot: Slot, + end_slot: Slot, + ) -> Result { + if column != DBColumn::BeaconBlockRoots && column != DBColumn::BeaconStateRoots { + return Err(Error::ForwardsIterInvalidColumn(column)); + } + let start = start_slot.as_u64().to_be_bytes(); + Ok(Self { + inner: store.cold_db.iter_column_from(column, &start), + column, + next_slot: start_slot, + end_slot, + _phantom: PhantomData, + }) + } +} + +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> Iterator + for FrozenForwardsIterator<'a, E, Hot, Cold> +{ + type Item = Result<(Hash256, Slot)>; fn next(&mut self) -> Option { + if self.next_slot == self.end_slot { + return None; + } self.inner - .next() - .map(|(slot, root)| (root, Slot::from(slot))) + .next()? + .and_then(|(slot_bytes, root_bytes)| { + let slot = slot_bytes + .clone() + .try_into() + .map(u64::from_be_bytes) + .map(Slot::new) + .map_err(|_| Error::InvalidBytes)?; + if root_bytes.len() != std::mem::size_of::() { + return Err(Error::InvalidBytes); + } + let root = Hash256::from_slice(&root_bytes); + + if slot != self.next_slot { + return Err(Error::ForwardsIterGap(self.column, slot, self.next_slot)); + } + self.next_slot += 1; + + Ok(Some((root, slot))) + }) + .transpose() } } @@ -139,10 +199,12 @@ impl Iterator for SimpleForwardsIterator { } /// Fusion of the above two approaches to forwards iteration. Fast and efficient. -pub enum HybridForwardsIterator<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> { +pub enum HybridForwardsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> { PreFinalization { - iter: Box>, + iter: Box>, + store: &'a HotColdDB, end_slot: Option, + column: DBColumn, /// Data required by the `PostFinalization` iterator when we get to it. continuation_data: Option, Hash256)>>, }, @@ -150,6 +212,7 @@ pub enum HybridForwardsIterator<'a, E: EthSpec, F: Root, Hot: ItemStore, C continuation_data: Option, Hash256)>>, store: &'a HotColdDB, start_slot: Slot, + column: DBColumn, }, PostFinalization { iter: SimpleForwardsIterator, @@ -157,8 +220,8 @@ pub enum HybridForwardsIterator<'a, E: EthSpec, F: Root, Hot: ItemStore, C Finished, } -impl<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> - HybridForwardsIterator<'a, E, F, Hot, Cold> +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> + HybridForwardsIterator<'a, E, Hot, Cold> { /// Construct a new hybrid iterator. /// @@ -174,48 +237,54 @@ impl<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> /// function may block for some time while `get_state` runs. pub fn new( store: &'a HotColdDB, + column: DBColumn, start_slot: Slot, end_slot: Option, get_state: impl FnOnce() -> Result<(BeaconState, Hash256)>, - spec: &ChainSpec, ) -> Result { use HybridForwardsIterator::*; // First slot at which this field is *not* available in the freezer. i.e. all slots less // than this slot have their data available in the freezer. - let freezer_upper_limit = F::freezer_upper_limit(store).unwrap_or(Slot::new(0)); + let opt_freezer_upper_bound = store.freezer_upper_bound_for_column(column, start_slot)?; - let result = if start_slot < freezer_upper_limit { - let iter = Box::new(FrozenForwardsIterator::new( - store, - start_slot, - freezer_upper_limit, - spec, - )); + match opt_freezer_upper_bound { + Some(freezer_upper_bound) if start_slot < freezer_upper_bound => { + // EXCLUSIVE end slot for the frozen portion of the iterator. + let frozen_end_slot = end_slot.map_or(freezer_upper_bound, |end_slot| { + std::cmp::min(end_slot + 1, freezer_upper_bound) + }); + let iter = Box::new(FrozenForwardsIterator::new( + store, + column, + start_slot, + frozen_end_slot, + )?); - // No continuation data is needed if the forwards iterator plans to halt before - // `end_slot`. If it tries to continue further a `NoContinuationData` error will be - // returned. - let continuation_data = - if end_slot.map_or(false, |end_slot| end_slot < freezer_upper_limit) { - None - } else { - Some(Box::new(get_state()?)) - }; - PreFinalization { - iter, - end_slot, - continuation_data, + // No continuation data is needed if the forwards iterator plans to halt before + // `end_slot`. If it tries to continue further a `NoContinuationData` error will be + // returned. + let continuation_data = + if end_slot.map_or(false, |end_slot| end_slot < freezer_upper_bound) { + None + } else { + Some(Box::new(get_state()?)) + }; + Ok(PreFinalization { + iter, + store, + end_slot, + column, + continuation_data, + }) } - } else { - PostFinalizationLazy { + _ => Ok(PostFinalizationLazy { continuation_data: Some(Box::new(get_state()?)), store, start_slot, - } - }; - - Ok(result) + column, + }), + } } fn do_next(&mut self) -> Result> { @@ -225,29 +294,31 @@ impl<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> PreFinalization { iter, end_slot, + store, continuation_data, + column, } => { match iter.next() { - Some(x) => Ok(Some(x)), + Some(x) => x.map(Some), // Once the pre-finalization iterator is consumed, transition // to a post-finalization iterator beginning from the last slot // of the pre iterator. None => { // If the iterator has an end slot (inclusive) which has already been // covered by the (exclusive) frozen forwards iterator, then we're done! - let iter_end_slot = Slot::from(iter.inner.end_vindex); - if end_slot.map_or(false, |end_slot| iter_end_slot == end_slot + 1) { + if end_slot.map_or(false, |end_slot| iter.end_slot == end_slot + 1) { *self = Finished; return Ok(None); } let continuation_data = continuation_data.take(); - let store = iter.inner.store; - let start_slot = iter_end_slot; + let start_slot = iter.end_slot; + *self = PostFinalizationLazy { continuation_data, store, start_slot, + column: *column, }; self.do_next() @@ -258,11 +329,17 @@ impl<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> continuation_data, store, start_slot, + column, } => { let (end_state, end_root) = *continuation_data.take().ok_or(Error::NoContinuationData)?; *self = PostFinalization { - iter: F::simple_forwards_iterator(store, *start_slot, end_state, end_root)?, + iter: store.simple_forwards_iterator( + *column, + *start_slot, + end_state, + end_root, + )?, }; self.do_next() } @@ -272,8 +349,8 @@ impl<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> } } -impl<'a, E: EthSpec, F: Root, Hot: ItemStore, Cold: ItemStore> Iterator - for HybridForwardsIterator<'a, E, F, Hot, Cold> +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> Iterator + for HybridForwardsIterator<'a, E, Hot, Cold> { type Item = Result<(Hash256, Slot)>; diff --git a/beacon_node/store/src/hdiff.rs b/beacon_node/store/src/hdiff.rs new file mode 100644 index 0000000000..a29e680eb5 --- /dev/null +++ b/beacon_node/store/src/hdiff.rs @@ -0,0 +1,914 @@ +//! Hierarchical diff implementation. +use crate::{metrics, DBColumn, StoreConfig, StoreItem}; +use bls::PublicKeyBytes; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; +use ssz::{Decode, Encode}; +use ssz_derive::{Decode, Encode}; +use std::cmp::Ordering; +use std::io::{Read, Write}; +use std::ops::RangeInclusive; +use std::str::FromStr; +use std::sync::LazyLock; +use superstruct::superstruct; +use types::historical_summary::HistoricalSummary; +use types::{BeaconState, ChainSpec, Epoch, EthSpec, Hash256, List, Slot, Validator}; +use zstd::{Decoder, Encoder}; + +static EMPTY_PUBKEY: LazyLock = LazyLock::new(PublicKeyBytes::empty); + +#[derive(Debug)] +pub enum Error { + InvalidHierarchy, + DiffDeletionsNotSupported, + UnableToComputeDiff, + UnableToApplyDiff, + BalancesIncompleteChunk, + Compression(std::io::Error), + InvalidSszState(ssz::DecodeError), + InvalidBalancesLength, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Encode, Decode)] +pub struct HierarchyConfig { + /// A sequence of powers of two to define how frequently to store each layer of state diffs. + /// The last value always represents the frequency of full state snapshots. Adding more + /// exponents increases the number of diff layers. This value allows to customize the trade-off + /// between reconstruction speed and disk space. + /// + /// Consider an example `exponents value of `[5,13,21]`. This means we have 3 layers: + /// - Full state stored every 2^21 slots (2097152 slots or 291 days) + /// - First diff layer stored every 2^13 slots (8192 slots or 2.3 hours) + /// - Second diff layer stored every 2^5 slots (32 slots or 1 epoch) + /// + /// To reconstruct a state at slot 3,000,003 we load each closest layer + /// - Layer 0: 3000003 - (3000003 mod 2^21) = 2097152 + /// - Layer 1: 3000003 - (3000003 mod 2^13) = 2998272 + /// - Layer 2: 3000003 - (3000003 mod 2^5) = 3000000 + /// + /// Layer 0 is full state snapshot, apply layer 1 diff, then apply layer 2 diff and then replay + /// blocks 3,000,001 to 3,000,003. + pub exponents: Vec, +} + +impl FromStr for HierarchyConfig { + type Err = String; + + fn from_str(s: &str) -> Result { + let exponents = s + .split(',') + .map(|s| { + s.parse() + .map_err(|e| format!("invalid hierarchy-exponents: {e:?}")) + }) + .collect::, _>>()?; + + if exponents.windows(2).any(|w| w[0] >= w[1]) { + return Err("hierarchy-exponents must be in ascending order".to_string()); + } + + Ok(HierarchyConfig { exponents }) + } +} + +impl std::fmt::Display for HierarchyConfig { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.exponents.iter().join(",")) + } +} + +#[derive(Debug)] +pub struct HierarchyModuli { + moduli: Vec, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum StorageStrategy { + ReplayFrom(Slot), + DiffFrom(Slot), + Snapshot, +} + +/// Hierarchical diff output and working buffer. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct HDiffBuffer { + state: Vec, + balances: Vec, + inactivity_scores: Vec, + validators: Vec, + historical_roots: Vec, + historical_summaries: Vec, +} + +/// Hierarchical state diff. +/// +/// Splits the diff into two data sections: +/// +/// - **balances**: The balance of each active validator is almost certain to change every epoch. +/// So this is the field in the state with most entropy. However the balance changes are small. +/// We can optimize the diff significantly by computing the balance difference first and then +/// compressing the result to squash those leading zero bytes. +/// +/// - **everything else**: Instead of trying to apply heuristics and be clever on each field, +/// running a generic binary diff algorithm on the rest of fields yields very good results. With +/// this strategy the HDiff code is easily mantainable across forks, as new fields are covered +/// automatically. xdelta3 algorithm showed diff compute and apply times of ~200 ms on a mainnet +/// state from Apr 2023 (570k indexes), and a 92kB diff size. +#[superstruct( + variants(V0), + variant_attributes(derive(Debug, PartialEq, Encode, Decode)) +)] +#[derive(Debug, PartialEq, Encode, Decode)] +#[ssz(enum_behaviour = "union")] +pub struct HDiff { + state_diff: BytesDiff, + balances_diff: CompressedU64Diff, + /// inactivity_scores are small integers that change slowly epoch to epoch. And are 0 for all + /// participants unless there's non-finality. Computing the diff and compressing the result is + /// much faster than running them through a binary patch algorithm. In the default case where + /// all values are 0 it should also result in a tiny output. + inactivity_scores_diff: CompressedU64Diff, + /// The validators array represents the vast majority of data in a BeaconState. Due to its big + /// size we have seen the performance of xdelta3 degrade. Comparing each entry of the + /// validators array manually significantly speeds up the computation of the diff (+10x faster) + /// and result in the same minimal diff. As the `Validator` record is unlikely to change, + /// maintaining this extra complexity should be okay. + validators_diff: ValidatorsDiff, + /// `historical_roots` is an unbounded forever growing (after Capella it's + /// historical_summaries) list of unique roots. This data is pure entropy so there's no point + /// in compressing it. As it's an append only list, the optimal diff + compression is just the + /// list of new entries. The size of `historical_roots` and `historical_summaries` in + /// non-trivial ~10 MB so throwing it to xdelta3 adds CPU cycles. With a bit of extra complexity + /// we can save those completely. + historical_roots: AppendOnlyDiff, + /// See historical_roots + historical_summaries: AppendOnlyDiff, +} + +#[derive(Debug, PartialEq, Encode, Decode)] +pub struct BytesDiff { + bytes: Vec, +} + +#[derive(Debug, PartialEq, Encode, Decode)] +pub struct CompressedU64Diff { + bytes: Vec, +} + +#[derive(Debug, PartialEq, Encode, Decode)] +pub struct ValidatorsDiff { + bytes: Vec, +} + +#[derive(Debug, PartialEq, Encode, Decode)] +pub struct AppendOnlyDiff { + values: Vec, +} + +impl HDiffBuffer { + pub fn from_state(mut beacon_state: BeaconState) -> Self { + let _t = metrics::start_timer(&metrics::STORE_BEACON_HDIFF_BUFFER_FROM_STATE_TIME); + // Set state.balances to empty list, and then serialize state as ssz + let balances_list = std::mem::take(beacon_state.balances_mut()); + let inactivity_scores = if let Ok(inactivity_scores) = beacon_state.inactivity_scores_mut() + { + std::mem::take(inactivity_scores).to_vec() + } else { + // If this state is pre-altair consider the list empty. If the target state + // is post altair, all its items will show up in the diff as is. + vec![] + }; + let validators = std::mem::take(beacon_state.validators_mut()).to_vec(); + let historical_roots = std::mem::take(beacon_state.historical_roots_mut()).to_vec(); + let historical_summaries = + if let Ok(historical_summaries) = beacon_state.historical_summaries_mut() { + std::mem::take(historical_summaries).to_vec() + } else { + // If this state is pre-capella consider the list empty. The diff will + // include all items in the target state. If both states are + // pre-capella the diff will be empty. + vec![] + }; + + let state = beacon_state.as_ssz_bytes(); + let balances = balances_list.to_vec(); + + HDiffBuffer { + state, + balances, + inactivity_scores, + validators, + historical_roots, + historical_summaries, + } + } + + pub fn as_state(&self, spec: &ChainSpec) -> Result, Error> { + let _t = metrics::start_timer(&metrics::STORE_BEACON_HDIFF_BUFFER_INTO_STATE_TIME); + let mut state = + BeaconState::from_ssz_bytes(&self.state, spec).map_err(Error::InvalidSszState)?; + + *state.balances_mut() = List::try_from_iter(self.balances.iter().copied()) + .map_err(|_| Error::InvalidBalancesLength)?; + + if let Ok(inactivity_scores) = state.inactivity_scores_mut() { + *inactivity_scores = List::try_from_iter(self.inactivity_scores.iter().copied()) + .map_err(|_| Error::InvalidBalancesLength)?; + } + + *state.validators_mut() = List::try_from_iter(self.validators.iter().cloned()) + .map_err(|_| Error::InvalidBalancesLength)?; + + *state.historical_roots_mut() = List::try_from_iter(self.historical_roots.iter().copied()) + .map_err(|_| Error::InvalidBalancesLength)?; + + if let Ok(historical_summaries) = state.historical_summaries_mut() { + *historical_summaries = List::try_from_iter(self.historical_summaries.iter().copied()) + .map_err(|_| Error::InvalidBalancesLength)?; + } + + Ok(state) + } + + /// Byte size of this instance + pub fn size(&self) -> usize { + self.state.len() + + self.balances.len() * std::mem::size_of::() + + self.inactivity_scores.len() * std::mem::size_of::() + + self.validators.len() * std::mem::size_of::() + + self.historical_roots.len() * std::mem::size_of::() + + self.historical_summaries.len() * std::mem::size_of::() + } +} + +impl HDiff { + pub fn compute( + source: &HDiffBuffer, + target: &HDiffBuffer, + config: &StoreConfig, + ) -> Result { + let state_diff = BytesDiff::compute(&source.state, &target.state)?; + let balances_diff = CompressedU64Diff::compute(&source.balances, &target.balances, config)?; + let inactivity_scores_diff = CompressedU64Diff::compute( + &source.inactivity_scores, + &target.inactivity_scores, + config, + )?; + let validators_diff = + ValidatorsDiff::compute(&source.validators, &target.validators, config)?; + let historical_roots = + AppendOnlyDiff::compute(&source.historical_roots, &target.historical_roots)?; + let historical_summaries = + AppendOnlyDiff::compute(&source.historical_summaries, &target.historical_summaries)?; + + Ok(HDiff::V0(HDiffV0 { + state_diff, + balances_diff, + inactivity_scores_diff, + validators_diff, + historical_roots, + historical_summaries, + })) + } + + pub fn apply(&self, source: &mut HDiffBuffer, config: &StoreConfig) -> Result<(), Error> { + let source_state = std::mem::take(&mut source.state); + self.state_diff().apply(&source_state, &mut source.state)?; + self.balances_diff().apply(&mut source.balances, config)?; + self.inactivity_scores_diff() + .apply(&mut source.inactivity_scores, config)?; + self.validators_diff() + .apply(&mut source.validators, config)?; + self.historical_roots().apply(&mut source.historical_roots); + self.historical_summaries() + .apply(&mut source.historical_summaries); + + Ok(()) + } + + /// Byte size of this instance + pub fn size(&self) -> usize { + self.sizes().iter().sum() + } + + pub fn sizes(&self) -> Vec { + vec![ + self.state_diff().size(), + self.balances_diff().size(), + self.inactivity_scores_diff().size(), + self.validators_diff().size(), + self.historical_roots().size(), + self.historical_summaries().size(), + ] + } +} + +impl StoreItem for HDiff { + fn db_column() -> DBColumn { + DBColumn::BeaconStateDiff + } + + fn as_store_bytes(&self) -> Vec { + self.as_ssz_bytes() + } + + fn from_store_bytes(bytes: &[u8]) -> Result { + Ok(Self::from_ssz_bytes(bytes)?) + } +} + +impl BytesDiff { + pub fn compute(source: &[u8], target: &[u8]) -> Result { + Self::compute_xdelta(source, target) + } + + pub fn compute_xdelta(source_bytes: &[u8], target_bytes: &[u8]) -> Result { + let bytes = xdelta3::encode(target_bytes, source_bytes) + .ok_or(Error::UnableToComputeDiff) + .unwrap(); + Ok(Self { bytes }) + } + + pub fn apply(&self, source: &[u8], target: &mut Vec) -> Result<(), Error> { + self.apply_xdelta(source, target) + } + + pub fn apply_xdelta(&self, source: &[u8], target: &mut Vec) -> Result<(), Error> { + *target = xdelta3::decode(&self.bytes, source).ok_or(Error::UnableToApplyDiff)?; + Ok(()) + } + + /// Byte size of this instance + pub fn size(&self) -> usize { + self.bytes.len() + } +} + +impl CompressedU64Diff { + pub fn compute(xs: &[u64], ys: &[u64], config: &StoreConfig) -> Result { + if xs.len() > ys.len() { + return Err(Error::DiffDeletionsNotSupported); + } + + let uncompressed_bytes: Vec = ys + .iter() + .enumerate() + .flat_map(|(i, y)| { + // Diff from 0 if the entry is new. + let x = xs.get(i).copied().unwrap_or(0); + y.wrapping_sub(x).to_be_bytes() + }) + .collect(); + + Ok(CompressedU64Diff { + bytes: compress_bytes(&uncompressed_bytes, config)?, + }) + } + + pub fn apply(&self, xs: &mut Vec, config: &StoreConfig) -> Result<(), Error> { + // Decompress balances diff. + let balances_diff_bytes = uncompress_bytes(&self.bytes, config)?; + + for (i, diff_bytes) in balances_diff_bytes + .chunks(u64::BITS as usize / 8) + .enumerate() + { + let diff = diff_bytes + .try_into() + .map(u64::from_be_bytes) + .map_err(|_| Error::BalancesIncompleteChunk)?; + + if let Some(x) = xs.get_mut(i) { + *x = x.wrapping_add(diff); + } else { + xs.push(diff); + } + } + + Ok(()) + } + + /// Byte size of this instance + pub fn size(&self) -> usize { + self.bytes.len() + } +} + +fn compress_bytes(input: &[u8], config: &StoreConfig) -> Result, Error> { + let compression_level = config.compression_level; + let mut out = Vec::with_capacity(config.estimate_compressed_size(input.len())); + let mut encoder = Encoder::new(&mut out, compression_level).map_err(Error::Compression)?; + encoder.write_all(input).map_err(Error::Compression)?; + encoder.finish().map_err(Error::Compression)?; + Ok(out) +} + +fn uncompress_bytes(input: &[u8], config: &StoreConfig) -> Result, Error> { + let mut out = Vec::with_capacity(config.estimate_decompressed_size(input.len())); + let mut decoder = Decoder::new(input).map_err(Error::Compression)?; + decoder.read_to_end(&mut out).map_err(Error::Compression)?; + Ok(out) +} + +impl ValidatorsDiff { + pub fn compute( + xs: &[Validator], + ys: &[Validator], + config: &StoreConfig, + ) -> Result { + if xs.len() > ys.len() { + return Err(Error::DiffDeletionsNotSupported); + } + + let uncompressed_bytes = ys + .iter() + .enumerate() + .filter_map(|(i, y)| { + let validator_diff = if let Some(x) = xs.get(i) { + if y == x { + return None; + } else { + let pubkey_changed = y.pubkey != x.pubkey; + // Note: If researchers attempt to change the Validator container, go quickly to + // All Core Devs and push hard to add another List in the BeaconState instead. + Validator { + // The pubkey can be changed on index re-use + pubkey: if pubkey_changed { + y.pubkey + } else { + PublicKeyBytes::empty() + }, + // withdrawal_credentials can be set to zero initially but can never be + // changed INTO zero. On index re-use it can be set to zero, but in that + // case the pubkey will also change. + withdrawal_credentials: if pubkey_changed + || y.withdrawal_credentials != x.withdrawal_credentials + { + y.withdrawal_credentials + } else { + Hash256::ZERO + }, + // effective_balance can increase and decrease + effective_balance: y.effective_balance - x.effective_balance, + // slashed can only change from false into true. In an index re-use it can + // switch back to false, but in that case the pubkey will also change. + slashed: y.slashed, + // activation_eligibility_epoch can never be zero under any case. It's + // set to either FAR_FUTURE_EPOCH or get_current_epoch(state) + 1 + activation_eligibility_epoch: if y.activation_eligibility_epoch + != x.activation_eligibility_epoch + { + y.activation_eligibility_epoch + } else { + Epoch::new(0) + }, + // activation_epoch can never be zero under any case. It's + // set to either FAR_FUTURE_EPOCH or epoch + 1 + MAX_SEED_LOOKAHEAD + activation_epoch: if y.activation_epoch != x.activation_epoch { + y.activation_epoch + } else { + Epoch::new(0) + }, + // exit_epoch can never be zero under any case. It's set to either + // FAR_FUTURE_EPOCH or > epoch + 1 + MAX_SEED_LOOKAHEAD + exit_epoch: if y.exit_epoch != x.exit_epoch { + y.exit_epoch + } else { + Epoch::new(0) + }, + // withdrawable_epoch can never be zero under any case. It's set to + // either FAR_FUTURE_EPOCH or > epoch + 1 + MAX_SEED_LOOKAHEAD + withdrawable_epoch: if y.withdrawable_epoch != x.withdrawable_epoch { + y.withdrawable_epoch + } else { + Epoch::new(0) + }, + } + } + } else { + y.clone() + }; + + Some(ValidatorDiffEntry { + index: i as u64, + validator_diff, + }) + }) + .flat_map(|v_diff| v_diff.as_ssz_bytes()) + .collect::>(); + + Ok(Self { + bytes: compress_bytes(&uncompressed_bytes, config)?, + }) + } + + pub fn apply(&self, xs: &mut Vec, config: &StoreConfig) -> Result<(), Error> { + let validator_diff_bytes = uncompress_bytes(&self.bytes, config)?; + + for diff_bytes in + validator_diff_bytes.chunks(::ssz_fixed_len()) + { + let ValidatorDiffEntry { + index, + validator_diff: diff, + } = ValidatorDiffEntry::from_ssz_bytes(diff_bytes) + .map_err(|_| Error::BalancesIncompleteChunk)?; + + if let Some(x) = xs.get_mut(index as usize) { + // Note: a pubkey change implies index re-use. In that case over-write + // withdrawal_credentials and slashed inconditionally as their default values + // are valid values. + let pubkey_changed = diff.pubkey != *EMPTY_PUBKEY; + if pubkey_changed { + x.pubkey = diff.pubkey; + } + if pubkey_changed || diff.withdrawal_credentials != Hash256::ZERO { + x.withdrawal_credentials = diff.withdrawal_credentials; + } + if diff.effective_balance != 0 { + x.effective_balance = x.effective_balance.wrapping_add(diff.effective_balance); + } + if pubkey_changed || diff.slashed { + x.slashed = diff.slashed; + } + if diff.activation_eligibility_epoch != Epoch::new(0) { + x.activation_eligibility_epoch = diff.activation_eligibility_epoch; + } + if diff.activation_epoch != Epoch::new(0) { + x.activation_epoch = diff.activation_epoch; + } + if diff.exit_epoch != Epoch::new(0) { + x.exit_epoch = diff.exit_epoch; + } + if diff.withdrawable_epoch != Epoch::new(0) { + x.withdrawable_epoch = diff.withdrawable_epoch; + } + } else { + xs.push(diff) + } + } + + Ok(()) + } + + /// Byte size of this instance + pub fn size(&self) -> usize { + self.bytes.len() + } +} + +#[derive(Debug, Encode, Decode)] +struct ValidatorDiffEntry { + index: u64, + validator_diff: Validator, +} + +impl AppendOnlyDiff { + pub fn compute(xs: &[T], ys: &[T]) -> Result { + match xs.len().cmp(&ys.len()) { + Ordering::Less => Ok(Self { + values: ys.iter().skip(xs.len()).copied().collect(), + }), + // Don't even create an iterator for this common case + Ordering::Equal => Ok(Self { values: vec![] }), + Ordering::Greater => Err(Error::DiffDeletionsNotSupported), + } + } + + pub fn apply(&self, xs: &mut Vec) { + xs.extend(self.values.iter().copied()); + } + + /// Byte size of this instance + pub fn size(&self) -> usize { + self.values.len() * size_of::() + } +} + +impl Default for HierarchyConfig { + fn default() -> Self { + HierarchyConfig { + exponents: vec![5, 9, 11, 13, 16, 18, 21], + } + } +} + +impl HierarchyConfig { + pub fn to_moduli(&self) -> Result { + self.validate()?; + let moduli = self.exponents.iter().map(|n| 1 << n).collect(); + Ok(HierarchyModuli { moduli }) + } + + pub fn validate(&self) -> Result<(), Error> { + if !self.exponents.is_empty() + && self + .exponents + .iter() + .tuple_windows() + .all(|(small, big)| small < big && *big < u64::BITS as u8) + { + Ok(()) + } else { + Err(Error::InvalidHierarchy) + } + } +} + +impl HierarchyModuli { + pub fn storage_strategy(&self, slot: Slot) -> Result { + // last = full snapshot interval + let last = self.moduli.last().copied().ok_or(Error::InvalidHierarchy)?; + // first = most frequent diff layer, need to replay blocks from this layer + let first = self + .moduli + .first() + .copied() + .ok_or(Error::InvalidHierarchy)?; + + if slot % last == 0 { + return Ok(StorageStrategy::Snapshot); + } + + Ok(self + .moduli + .iter() + .rev() + .tuple_windows() + .find_map(|(&n_big, &n_small)| { + if slot % n_small == 0 { + // Diff from the previous layer. + Some(StorageStrategy::DiffFrom(slot / n_big * n_big)) + } else { + // Keep trying with next layer + None + } + }) + // Exhausted layers, need to replay from most frequent layer + .unwrap_or(StorageStrategy::ReplayFrom(slot / first * first))) + } + + /// Return the smallest slot greater than or equal to `slot` at which a full snapshot should + /// be stored. + pub fn next_snapshot_slot(&self, slot: Slot) -> Result { + let last = self.moduli.last().copied().ok_or(Error::InvalidHierarchy)?; + if slot % last == 0 { + Ok(slot) + } else { + Ok((slot / last + 1) * last) + } + } + + /// Return `true` if the database ops for this slot should be committed immediately. + /// + /// This is the case for all diffs aside from the ones in the leaf layer. To store a diff + /// might require loading the state at the previous layer, in which case the diff for that + /// layer must already have been stored. + /// + /// In future we may be able to handle this differently (with proper transaction semantics + /// rather than LevelDB's "write batches"). + pub fn should_commit_immediately(&self, slot: Slot) -> Result { + // If there's only 1 layer of snapshots, then commit only when writing a snapshot. + self.moduli.get(1).map_or_else( + || Ok(slot == self.next_snapshot_slot(slot)?), + |second_layer_moduli| Ok(slot % *second_layer_moduli == 0), + ) + } +} + +impl StorageStrategy { + /// For the state stored with this `StorageStrategy` at `slot`, return the range of slots which + /// should be checked for ancestor states in the historic state cache. + /// + /// The idea is that for states which need to be built by replaying blocks we should scan + /// for any viable ancestor state between their `from` slot and `slot`. If we find such a + /// state it will save us from the slow reconstruction of the `from` state using diffs. + /// + /// Similarly for `DiffFrom` and `Snapshot` states, loading the prior state and replaying 1 + /// block is often going to be faster than loading and applying diffs/snapshots, so we may as + /// well check the cache for that 1 slot prior (in case the caller is iterating sequentially). + pub fn replay_from_range( + &self, + slot: Slot, + ) -> std::iter::Map, fn(u64) -> Slot> { + match self { + Self::ReplayFrom(from) => from.as_u64()..=slot.as_u64(), + Self::Snapshot | Self::DiffFrom(_) => { + if slot > 0 { + (slot - 1).as_u64()..=slot.as_u64() + } else { + slot.as_u64()..=slot.as_u64() + } + } + } + .map(Slot::from) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rand::{rngs::SmallRng, thread_rng, Rng, SeedableRng}; + + #[test] + fn default_storage_strategy() { + let config = HierarchyConfig::default(); + config.validate().unwrap(); + + let moduli = config.to_moduli().unwrap(); + + // Full snapshots at multiples of 2^21. + let snapshot_freq = Slot::new(1 << 21); + assert_eq!( + moduli.storage_strategy(Slot::new(0)).unwrap(), + StorageStrategy::Snapshot + ); + assert_eq!( + moduli.storage_strategy(snapshot_freq).unwrap(), + StorageStrategy::Snapshot + ); + assert_eq!( + moduli.storage_strategy(snapshot_freq * 3).unwrap(), + StorageStrategy::Snapshot + ); + + // Diffs should be from the previous layer (the snapshot in this case), and not the previous diff in the same layer. + let first_layer = Slot::new(1 << 18); + assert_eq!( + moduli.storage_strategy(first_layer * 2).unwrap(), + StorageStrategy::DiffFrom(Slot::new(0)) + ); + + let replay_strategy_slot = first_layer + 1; + assert_eq!( + moduli.storage_strategy(replay_strategy_slot).unwrap(), + StorageStrategy::ReplayFrom(first_layer) + ); + } + + #[test] + fn next_snapshot_slot() { + let config = HierarchyConfig::default(); + config.validate().unwrap(); + + let moduli = config.to_moduli().unwrap(); + let snapshot_freq = Slot::new(1 << 21); + + assert_eq!( + moduli.next_snapshot_slot(snapshot_freq).unwrap(), + snapshot_freq + ); + assert_eq!( + moduli.next_snapshot_slot(snapshot_freq + 1).unwrap(), + snapshot_freq * 2 + ); + assert_eq!( + moduli.next_snapshot_slot(snapshot_freq * 2 - 1).unwrap(), + snapshot_freq * 2 + ); + assert_eq!( + moduli.next_snapshot_slot(snapshot_freq * 2).unwrap(), + snapshot_freq * 2 + ); + assert_eq!( + moduli.next_snapshot_slot(snapshot_freq * 100).unwrap(), + snapshot_freq * 100 + ); + } + + #[test] + fn compressed_u64_vs_bytes_diff() { + let x_values = vec![99u64, 55, 123, 6834857, 0, 12]; + let y_values = vec![98u64, 55, 312, 1, 1, 2, 4, 5]; + let config = &StoreConfig::default(); + + let to_bytes = + |nums: &[u64]| -> Vec { nums.iter().flat_map(|x| x.to_be_bytes()).collect() }; + + let x_bytes = to_bytes(&x_values); + let y_bytes = to_bytes(&y_values); + + let u64_diff = CompressedU64Diff::compute(&x_values, &y_values, config).unwrap(); + + let mut y_from_u64_diff = x_values; + u64_diff.apply(&mut y_from_u64_diff, config).unwrap(); + + assert_eq!(y_values, y_from_u64_diff); + + let bytes_diff = BytesDiff::compute(&x_bytes, &y_bytes).unwrap(); + + let mut y_from_bytes = vec![]; + bytes_diff.apply(&x_bytes, &mut y_from_bytes).unwrap(); + + assert_eq!(y_bytes, y_from_bytes); + + // U64 diff wins by more than a factor of 3 + assert!(u64_diff.bytes.len() < 3 * bytes_diff.bytes.len()); + } + + #[test] + fn compressed_validators_diff() { + assert_eq!(::ssz_fixed_len(), 129); + + let mut rng = thread_rng(); + let config = &StoreConfig::default(); + let xs = (0..10) + .map(|_| rand_validator(&mut rng)) + .collect::>(); + let mut ys = xs.clone(); + ys[5] = rand_validator(&mut rng); + ys.push(rand_validator(&mut rng)); + let diff = ValidatorsDiff::compute(&xs, &ys, config).unwrap(); + + let mut xs_out = xs.clone(); + diff.apply(&mut xs_out, config).unwrap(); + assert_eq!(xs_out, ys); + } + + fn rand_validator(mut rng: impl Rng) -> Validator { + let mut pubkey = [0u8; 48]; + rng.fill_bytes(&mut pubkey); + let withdrawal_credentials: [u8; 32] = rng.gen(); + + Validator { + pubkey: PublicKeyBytes::from_ssz_bytes(&pubkey).unwrap(), + withdrawal_credentials: withdrawal_credentials.into(), + slashed: false, + effective_balance: 32_000_000_000, + activation_eligibility_epoch: Epoch::max_value(), + activation_epoch: Epoch::max_value(), + exit_epoch: Epoch::max_value(), + withdrawable_epoch: Epoch::max_value(), + } + } + + // This test checks that the hdiff algorithm doesn't accidentally change between releases. + // If it does, we need to ensure appropriate backwards compatibility measures are implemented + // before this test is updated. + #[test] + fn hdiff_version_stability() { + let mut rng = SmallRng::seed_from_u64(0xffeeccdd00aa); + + let pre_balances = vec![32_000_000_000, 16_000_000_000, 0]; + let post_balances = vec![31_000_000_000, 17_000_000, 0, 0]; + + let pre_inactivity_scores = vec![1, 1, 1]; + let post_inactivity_scores = vec![0, 0, 0, 1]; + + let pre_validators = (0..3).map(|_| rand_validator(&mut rng)).collect::>(); + let post_validators = pre_validators.clone(); + + let pre_historical_roots = vec![Hash256::repeat_byte(0xff)]; + let post_historical_roots = vec![Hash256::repeat_byte(0xff), Hash256::repeat_byte(0xee)]; + + let pre_historical_summaries = vec![HistoricalSummary::default()]; + let post_historical_summaries = pre_historical_summaries.clone(); + + let pre_buffer = HDiffBuffer { + state: vec![0, 1, 2, 3, 3, 2, 1, 0], + balances: pre_balances, + inactivity_scores: pre_inactivity_scores, + validators: pre_validators, + historical_roots: pre_historical_roots, + historical_summaries: pre_historical_summaries, + }; + let post_buffer = HDiffBuffer { + state: vec![0, 1, 3, 2, 2, 3, 1, 1], + balances: post_balances, + inactivity_scores: post_inactivity_scores, + validators: post_validators, + historical_roots: post_historical_roots, + historical_summaries: post_historical_summaries, + }; + + let config = StoreConfig::default(); + let hdiff = HDiff::compute(&pre_buffer, &post_buffer, &config).unwrap(); + let hdiff_ssz = hdiff.as_ssz_bytes(); + + // First byte should match enum version. + assert_eq!(hdiff_ssz[0], 0); + + // Should roundtrip. + assert_eq!(HDiff::from_ssz_bytes(&hdiff_ssz).unwrap(), hdiff); + + // Should roundtrip as V0 with enum selector stripped. + assert_eq!( + HDiff::V0(HDiffV0::from_ssz_bytes(&hdiff_ssz[1..]).unwrap()), + hdiff + ); + + assert_eq!( + hdiff_ssz, + vec![ + 0u8, 24, 0, 0, 0, 49, 0, 0, 0, 85, 0, 0, 0, 114, 0, 0, 0, 127, 0, 0, 0, 163, 0, 0, + 0, 4, 0, 0, 0, 214, 195, 196, 0, 0, 0, 14, 8, 0, 8, 1, 0, 0, 1, 3, 2, 2, 3, 1, 1, + 9, 4, 0, 0, 0, 40, 181, 47, 253, 0, 72, 189, 0, 0, 136, 255, 255, 255, 255, 196, + 101, 54, 0, 255, 255, 255, 252, 71, 86, 198, 64, 0, 1, 0, 59, 176, 4, 4, 0, 0, 0, + 40, 181, 47, 253, 0, 72, 133, 0, 0, 80, 255, 255, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 10, + 192, 2, 4, 0, 0, 0, 40, 181, 47, 253, 32, 0, 1, 0, 0, 4, 0, 0, 0, 238, 238, 238, + 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, + 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 4, 0, 0, 0 + ] + ); + } +} diff --git a/beacon_node/store/src/historic_state_cache.rs b/beacon_node/store/src/historic_state_cache.rs new file mode 100644 index 0000000000..c0e8f8346c --- /dev/null +++ b/beacon_node/store/src/historic_state_cache.rs @@ -0,0 +1,92 @@ +use crate::hdiff::{Error, HDiffBuffer}; +use crate::metrics; +use lru::LruCache; +use std::num::NonZeroUsize; +use types::{BeaconState, ChainSpec, EthSpec, Slot}; + +/// Holds a combination of finalized states in two formats: +/// - `hdiff_buffers`: Format close to an SSZ serialized state for rapid application of diffs on top +/// of it +/// - `states`: Deserialized states for direct use or for rapid application of blocks (replay) +/// +/// An example use: when requesting state data for consecutive slots, this cache allows the node to +/// apply diffs once on the first request, and latter just apply blocks one at a time. +#[derive(Debug)] +pub struct HistoricStateCache { + hdiff_buffers: LruCache, + states: LruCache>, +} + +#[derive(Debug, Default)] +pub struct Metrics { + pub num_hdiff: usize, + pub num_state: usize, + pub hdiff_byte_size: usize, +} + +impl HistoricStateCache { + pub fn new(hdiff_buffer_cache_size: NonZeroUsize, state_cache_size: NonZeroUsize) -> Self { + Self { + hdiff_buffers: LruCache::new(hdiff_buffer_cache_size), + states: LruCache::new(state_cache_size), + } + } + + pub fn get_hdiff_buffer(&mut self, slot: Slot) -> Option { + if let Some(buffer_ref) = self.hdiff_buffers.get(&slot) { + let _timer = metrics::start_timer(&metrics::BEACON_HDIFF_BUFFER_CLONE_TIMES); + Some(buffer_ref.clone()) + } else if let Some(state) = self.states.get(&slot) { + let buffer = HDiffBuffer::from_state(state.clone()); + let _timer = metrics::start_timer(&metrics::BEACON_HDIFF_BUFFER_CLONE_TIMES); + let cloned = buffer.clone(); + drop(_timer); + self.hdiff_buffers.put(slot, cloned); + Some(buffer) + } else { + None + } + } + + pub fn get_state( + &mut self, + slot: Slot, + spec: &ChainSpec, + ) -> Result>, Error> { + if let Some(state) = self.states.get(&slot) { + Ok(Some(state.clone())) + } else if let Some(buffer) = self.hdiff_buffers.get(&slot) { + let state = buffer.as_state(spec)?; + self.states.put(slot, state.clone()); + Ok(Some(state)) + } else { + Ok(None) + } + } + + pub fn put_state(&mut self, slot: Slot, state: BeaconState) { + self.states.put(slot, state); + } + + pub fn put_hdiff_buffer(&mut self, slot: Slot, buffer: HDiffBuffer) { + self.hdiff_buffers.put(slot, buffer); + } + + pub fn put_both(&mut self, slot: Slot, state: BeaconState, buffer: HDiffBuffer) { + self.put_state(slot, state); + self.put_hdiff_buffer(slot, buffer); + } + + pub fn metrics(&self) -> Metrics { + let hdiff_byte_size = self + .hdiff_buffers + .iter() + .map(|(_, buffer)| buffer.size()) + .sum::(); + Metrics { + num_hdiff: self.hdiff_buffers.len(), + num_state: self.states.len(), + hdiff_byte_size, + } + } +} diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 5483c490dc..4942b14881 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -1,29 +1,24 @@ -use crate::chunked_vector::{ - store_updated_vector, BlockRoots, HistoricalRoots, HistoricalSummaries, RandaoMixes, StateRoots, -}; -use crate::config::{ - OnDiskStoreConfig, StoreConfig, DEFAULT_SLOTS_PER_RESTORE_POINT, - PREV_DEFAULT_SLOTS_PER_RESTORE_POINT, -}; +use crate::config::{OnDiskStoreConfig, StoreConfig}; use crate::forwards_iter::{HybridForwardsBlockRootsIterator, HybridForwardsStateRootsIterator}; +use crate::hdiff::{HDiff, HDiffBuffer, HierarchyModuli, StorageStrategy}; +use crate::historic_state_cache::HistoricStateCache; use crate::impls::beacon_state::{get_full_state, store_full_state}; use crate::iter::{BlockRootsIterator, ParentRootBlockIterator, RootsIterator}; -use crate::leveldb_store::BytesKey; -use crate::leveldb_store::LevelDB; +use crate::leveldb_store::{BytesKey, LevelDB}; use crate::memory_store::MemoryStore; use crate::metadata::{ AnchorInfo, BlobInfo, CompactionTimestamp, DataColumnInfo, PruningCheckpoint, SchemaVersion, - ANCHOR_INFO_KEY, BLOB_INFO_KEY, COMPACTION_TIMESTAMP_KEY, CONFIG_KEY, CURRENT_SCHEMA_VERSION, - DATA_COLUMN_INFO_KEY, PRUNING_CHECKPOINT_KEY, SCHEMA_VERSION_KEY, SPLIT_KEY, - STATE_UPPER_LIMIT_NO_RETAIN, + ANCHOR_FOR_ARCHIVE_NODE, ANCHOR_INFO_KEY, ANCHOR_UNINITIALIZED, BLOB_INFO_KEY, + COMPACTION_TIMESTAMP_KEY, CONFIG_KEY, CURRENT_SCHEMA_VERSION, DATA_COLUMN_INFO_KEY, + PRUNING_CHECKPOINT_KEY, SCHEMA_VERSION_KEY, SPLIT_KEY, STATE_UPPER_LIMIT_NO_RETAIN, }; use crate::state_cache::{PutStateOutcome, StateCache}; use crate::{ - get_data_column_key, get_key_for_col, ChunkWriter, DBColumn, DatabaseBlock, Error, ItemStore, - KeyValueStoreOp, PartialBeaconState, StoreItem, StoreOp, + get_data_column_key, get_key_for_col, DBColumn, DatabaseBlock, Error, ItemStore, + KeyValueStoreOp, StoreItem, StoreOp, }; use crate::{metrics, parse_data_column_key}; -use itertools::process_results; +use itertools::{process_results, Itertools}; use leveldb::iterator::LevelDBIterator; use lru::LruCache; use parking_lot::{Mutex, RwLock}; @@ -38,6 +33,7 @@ use state_processing::{ }; use std::cmp::min; use std::collections::{HashMap, HashSet}; +use std::io::{Read, Write}; use std::marker::PhantomData; use std::num::NonZeroUsize; use std::path::Path; @@ -45,6 +41,7 @@ use std::sync::Arc; use std::time::Duration; use types::data_column_sidecar::{ColumnIndex, DataColumnSidecar, DataColumnSidecarList}; use types::*; +use zstd::{Decoder, Encoder}; /// On-disk database that stores finalized states efficiently. /// @@ -58,12 +55,13 @@ pub struct HotColdDB, Cold: ItemStore> { /// greater than or equal are in the hot DB. pub(crate) split: RwLock, /// The starting slots for the range of blocks & states stored in the database. - anchor_info: RwLock>, + anchor_info: RwLock, /// The starting slots for the range of blobs stored in the database. blob_info: RwLock, /// The starting slots for the range of data columns stored in the database. data_column_info: RwLock, pub(crate) config: StoreConfig, + pub(crate) hierarchy: HierarchyModuli, /// Cold database containing compact historical data. pub cold_db: Cold, /// Database containing blobs. If None, store falls back to use `cold_db`. @@ -78,8 +76,11 @@ pub struct HotColdDB, Cold: ItemStore> { /// /// LOCK ORDERING: this lock must always be locked *after* the `split` if both are required. state_cache: Mutex>, - /// LRU cache of replayed states. - historic_state_cache: Mutex>>, + /// Cache of historic states and hierarchical diff buffers. + /// + /// This cache is never pruned. It is only populated in response to historical queries from the + /// HTTP API. + historic_state_cache: Mutex>, /// Chain spec. pub(crate) spec: Arc, /// Logger. @@ -155,22 +156,27 @@ pub enum HotColdDBError { proposed_split_slot: Slot, }, MissingStateToFreeze(Hash256), - MissingRestorePointHash(u64), + MissingRestorePointState(Slot), MissingRestorePoint(Hash256), MissingColdStateSummary(Hash256), MissingHotStateSummary(Hash256), MissingEpochBoundaryState(Hash256), + MissingPrevState(Hash256), MissingSplitState(Hash256, Slot), + MissingStateDiff(Hash256), + MissingHDiff(Slot), MissingExecutionPayload(Hash256), MissingFullBlockExecutionPayloadPruned(Hash256, Slot), MissingAnchorInfo, + MissingFrozenBlockSlot(Hash256), + MissingFrozenBlock(Slot), + MissingPathToBlobsDatabase, BlobsPreviouslyInDefaultStore, HotStateSummaryError(BeaconStateError), RestorePointDecodeError(ssz::DecodeError), BlockReplayBeaconError(BeaconStateError), BlockReplaySlotError(SlotProcessingError), BlockReplayBlockError(BlockProcessingError), - MissingLowerLimitState(Slot), InvalidSlotsPerRestorePoint { slots_per_restore_point: u64, slots_per_historical_root: u64, @@ -196,11 +202,13 @@ impl HotColdDB, MemoryStore> { spec: Arc, log: Logger, ) -> Result, MemoryStore>, Error> { - Self::verify_config(&config)?; + config.verify::()?; + + let hierarchy = config.hierarchy_config.to_moduli()?; let db = HotColdDB { split: RwLock::new(Split::default()), - anchor_info: RwLock::new(None), + anchor_info: RwLock::new(ANCHOR_UNINITIALIZED), blob_info: RwLock::new(BlobInfo::default()), data_column_info: RwLock::new(DataColumnInfo::default()), cold_db: MemoryStore::open(), @@ -208,8 +216,12 @@ impl HotColdDB, MemoryStore> { hot_db: MemoryStore::open(), block_cache: Mutex::new(BlockCache::new(config.block_cache_size)), state_cache: Mutex::new(StateCache::new(config.state_cache_size)), - historic_state_cache: Mutex::new(LruCache::new(config.historic_state_cache_size)), + historic_state_cache: Mutex::new(HistoricStateCache::new( + config.hdiff_buffer_cache_size, + config.historic_state_cache_size, + )), config, + hierarchy, spec, log, _phantom: PhantomData, @@ -233,51 +245,43 @@ impl HotColdDB, LevelDB> { spec: Arc, log: Logger, ) -> Result, Error> { - Self::verify_slots_per_restore_point(config.slots_per_restore_point)?; + config.verify::()?; - let mut db = HotColdDB { + let hierarchy = config.hierarchy_config.to_moduli()?; + + let hot_db = LevelDB::open(hot_path)?; + let anchor_info = RwLock::new(Self::load_anchor_info(&hot_db)?); + + let db = HotColdDB { split: RwLock::new(Split::default()), - anchor_info: RwLock::new(None), + anchor_info, blob_info: RwLock::new(BlobInfo::default()), data_column_info: RwLock::new(DataColumnInfo::default()), cold_db: LevelDB::open(cold_path)?, blobs_db: LevelDB::open(blobs_db_path)?, - hot_db: LevelDB::open(hot_path)?, + hot_db, block_cache: Mutex::new(BlockCache::new(config.block_cache_size)), state_cache: Mutex::new(StateCache::new(config.state_cache_size)), - historic_state_cache: Mutex::new(LruCache::new(config.historic_state_cache_size)), + historic_state_cache: Mutex::new(HistoricStateCache::new( + config.hdiff_buffer_cache_size, + config.historic_state_cache_size, + )), config, + hierarchy, spec, log, _phantom: PhantomData, }; - // Allow the slots-per-restore-point value to stay at the previous default if the config - // uses the new default. Don't error on a failed read because the config itself may need - // migrating. - if let Ok(Some(disk_config)) = db.load_config() { - if !db.config.slots_per_restore_point_set_explicitly - && disk_config.slots_per_restore_point == PREV_DEFAULT_SLOTS_PER_RESTORE_POINT - && db.config.slots_per_restore_point == DEFAULT_SLOTS_PER_RESTORE_POINT - { - debug!( - db.log, - "Ignoring slots-per-restore-point config in favour of on-disk value"; - "config" => db.config.slots_per_restore_point, - "on_disk" => disk_config.slots_per_restore_point, - ); - - // Mutate the in-memory config so that it's compatible. - db.config.slots_per_restore_point = PREV_DEFAULT_SLOTS_PER_RESTORE_POINT; - } - } + // Load the config from disk but don't error on a failed read because the config itself may + // need migrating. + let _ = db.load_config(); // Load the previous split slot from the database (if any). This ensures we can // stop and restart correctly. This needs to occur *before* running any migrations // because some migrations load states and depend on the split. if let Some(split) = db.load_split()? { *db.split.write() = split; - *db.anchor_info.write() = db.load_anchor_info()?; info!( db.log, @@ -370,7 +374,22 @@ impl HotColdDB, LevelDB> { // Ensure that any on-disk config is compatible with the supplied config. if let Some(disk_config) = db.load_config()? { - db.config.check_compatibility(&disk_config)?; + let split = db.get_split_info(); + let anchor = db.get_anchor_info(); + db.config + .check_compatibility(&disk_config, &split, &anchor)?; + + // Inform user if hierarchy config is changing. + if let Ok(hierarchy_config) = disk_config.hierarchy_config() { + if &db.config.hierarchy_config != hierarchy_config { + info!( + db.log, + "Updating historic state config"; + "previous_config" => %hierarchy_config, + "new_config" => %db.config.hierarchy_config, + ); + } + } } db.store_config()?; @@ -425,6 +444,49 @@ impl, Cold: ItemStore> HotColdDB self.state_cache.lock().len() } + pub fn register_metrics(&self) { + let hsc_metrics = self.historic_state_cache.lock().metrics(); + + metrics::set_gauge( + &metrics::STORE_BEACON_BLOCK_CACHE_SIZE, + self.block_cache.lock().block_cache.len() as i64, + ); + metrics::set_gauge( + &metrics::STORE_BEACON_BLOB_CACHE_SIZE, + self.block_cache.lock().blob_cache.len() as i64, + ); + metrics::set_gauge( + &metrics::STORE_BEACON_STATE_CACHE_SIZE, + self.state_cache.lock().len() as i64, + ); + metrics::set_gauge( + &metrics::STORE_BEACON_HISTORIC_STATE_CACHE_SIZE, + hsc_metrics.num_state as i64, + ); + metrics::set_gauge( + &metrics::STORE_BEACON_HDIFF_BUFFER_CACHE_SIZE, + hsc_metrics.num_hdiff as i64, + ); + metrics::set_gauge( + &metrics::STORE_BEACON_HDIFF_BUFFER_CACHE_BYTE_SIZE, + hsc_metrics.hdiff_byte_size as i64, + ); + + let anchor_info = self.get_anchor_info(); + metrics::set_gauge( + &metrics::STORE_BEACON_ANCHOR_SLOT, + anchor_info.anchor_slot.as_u64() as i64, + ); + metrics::set_gauge( + &metrics::STORE_BEACON_OLDEST_BLOCK_SLOT, + anchor_info.oldest_block_slot.as_u64() as i64, + ); + metrics::set_gauge( + &metrics::STORE_BEACON_STATE_LOWER_LIMIT, + anchor_info.state_lower_limit.as_u64() as i64, + ); + } + /// Store a block and update the LRU cache. pub fn put_block( &self, @@ -1002,14 +1064,13 @@ impl, Cold: ItemStore> HotColdDB start_slot: Slot, end_state: BeaconState, end_block_root: Hash256, - spec: &ChainSpec, ) -> Result> + '_, Error> { HybridForwardsBlockRootsIterator::new( self, + DBColumn::BeaconBlockRoots, start_slot, None, || Ok((end_state, end_block_root)), - spec, ) } @@ -1018,9 +1079,14 @@ impl, Cold: ItemStore> HotColdDB start_slot: Slot, end_slot: Slot, get_state: impl FnOnce() -> Result<(BeaconState, Hash256), Error>, - spec: &ChainSpec, ) -> Result, Error> { - HybridForwardsBlockRootsIterator::new(self, start_slot, Some(end_slot), get_state, spec) + HybridForwardsBlockRootsIterator::new( + self, + DBColumn::BeaconBlockRoots, + start_slot, + Some(end_slot), + get_state, + ) } pub fn forwards_state_roots_iterator( @@ -1028,14 +1094,13 @@ impl, Cold: ItemStore> HotColdDB start_slot: Slot, end_state_root: Hash256, end_state: BeaconState, - spec: &ChainSpec, ) -> Result> + '_, Error> { HybridForwardsStateRootsIterator::new( self, + DBColumn::BeaconStateRoots, start_slot, None, || Ok((end_state, end_state_root)), - spec, ) } @@ -1044,9 +1109,14 @@ impl, Cold: ItemStore> HotColdDB start_slot: Slot, end_slot: Slot, get_state: impl FnOnce() -> Result<(BeaconState, Hash256), Error>, - spec: &ChainSpec, ) -> Result, Error> { - HybridForwardsStateRootsIterator::new(self, start_slot, Some(end_slot), get_state, spec) + HybridForwardsStateRootsIterator::new( + self, + DBColumn::BeaconStateRoots, + start_slot, + Some(end_slot), + get_state, + ) } /// Load an epoch boundary state by using the hot state summary look-up. @@ -1072,7 +1142,7 @@ impl, Cold: ItemStore> HotColdDB Some(state_slot) => { let epoch_boundary_slot = state_slot / E::slots_per_epoch() * E::slots_per_epoch(); - self.load_cold_state_by_slot(epoch_boundary_slot) + self.load_cold_state_by_slot(epoch_boundary_slot).map(Some) } None => Ok(None), } @@ -1497,7 +1567,6 @@ impl, Cold: ItemStore> HotColdDB state.build_all_caches(&self.spec)?; let latest_block_root = state.get_latest_block_root(state_root); - let state_slot = state.slot(); if let PutStateOutcome::New = self.state_cache .lock() @@ -1507,13 +1576,14 @@ impl, Cold: ItemStore> HotColdDB self.log, "Cached ancestor state"; "state_root" => ?state_root, - "slot" => state_slot, + "slot" => slot, ); } Ok(()) }; let blocks = self.load_blocks_to_replay(boundary_state.slot(), slot, latest_block_root)?; + let _t = metrics::start_timer(&metrics::STORE_BEACON_REPLAY_HOT_BLOCKS_TIME); self.replay_blocks( boundary_state, blocks, @@ -1530,48 +1600,142 @@ impl, Cold: ItemStore> HotColdDB } } + pub fn store_cold_state_summary( + &self, + state_root: &Hash256, + slot: Slot, + ops: &mut Vec, + ) -> Result<(), Error> { + ops.push(ColdStateSummary { slot }.as_kv_store_op(*state_root)); + ops.push(KeyValueStoreOp::PutKeyValue( + get_key_for_col( + DBColumn::BeaconStateRoots.into(), + &slot.as_u64().to_be_bytes(), + ), + state_root.as_slice().to_vec(), + )); + Ok(()) + } + /// Store a pre-finalization state in the freezer database. - /// - /// If the state doesn't lie on a restore point boundary then just its summary will be stored. pub fn store_cold_state( &self, state_root: &Hash256, state: &BeaconState, ops: &mut Vec, ) -> Result<(), Error> { - ops.push(ColdStateSummary { slot: state.slot() }.as_kv_store_op(*state_root)); + self.store_cold_state_summary(state_root, state.slot(), ops)?; - if state.slot() % self.config.slots_per_restore_point != 0 { - return Ok(()); + let slot = state.slot(); + match self.hierarchy.storage_strategy(slot)? { + StorageStrategy::ReplayFrom(from) => { + debug!( + self.log, + "Storing cold state"; + "strategy" => "replay", + "from_slot" => from, + "slot" => state.slot(), + ); + // Already have persisted the state summary, don't persist anything else + } + StorageStrategy::Snapshot => { + debug!( + self.log, + "Storing cold state"; + "strategy" => "snapshot", + "slot" => state.slot(), + ); + self.store_cold_state_as_snapshot(state, ops)?; + } + StorageStrategy::DiffFrom(from) => { + debug!( + self.log, + "Storing cold state"; + "strategy" => "diff", + "from_slot" => from, + "slot" => state.slot(), + ); + self.store_cold_state_as_diff(state, from, ops)?; + } } - trace!( - self.log, - "Creating restore point"; - "slot" => state.slot(), - "state_root" => format!("{:?}", state_root) + Ok(()) + } + + pub fn store_cold_state_as_snapshot( + &self, + state: &BeaconState, + ops: &mut Vec, + ) -> Result<(), Error> { + let bytes = state.as_ssz_bytes(); + let compressed_value = { + let _timer = metrics::start_timer(&metrics::STORE_BEACON_STATE_FREEZER_COMPRESS_TIME); + let mut out = Vec::with_capacity(self.config.estimate_compressed_size(bytes.len())); + let mut encoder = Encoder::new(&mut out, self.config.compression_level) + .map_err(Error::Compression)?; + encoder.write_all(&bytes).map_err(Error::Compression)?; + encoder.finish().map_err(Error::Compression)?; + out + }; + + let key = get_key_for_col( + DBColumn::BeaconStateSnapshot.into(), + &state.slot().as_u64().to_be_bytes(), ); + ops.push(KeyValueStoreOp::PutKeyValue(key, compressed_value)); + Ok(()) + } - // 1. Convert to PartialBeaconState and store that in the DB. - let partial_state = PartialBeaconState::from_state_forgetful(state); - let op = partial_state.as_kv_store_op(*state_root); - ops.push(op); + fn load_cold_state_bytes_as_snapshot(&self, slot: Slot) -> Result>, Error> { + match self.cold_db.get_bytes( + DBColumn::BeaconStateSnapshot.into(), + &slot.as_u64().to_be_bytes(), + )? { + Some(bytes) => { + let _timer = + metrics::start_timer(&metrics::STORE_BEACON_STATE_FREEZER_DECOMPRESS_TIME); + let mut ssz_bytes = + Vec::with_capacity(self.config.estimate_decompressed_size(bytes.len())); + let mut decoder = Decoder::new(&*bytes).map_err(Error::Compression)?; + decoder + .read_to_end(&mut ssz_bytes) + .map_err(Error::Compression)?; + Ok(Some(ssz_bytes)) + } + None => Ok(None), + } + } - // 2. Store updated vector entries. - // Block roots need to be written here as well as by the `ChunkWriter` in `migrate_db` - // because states may require older block roots, and the writer only stores block roots - // between the previous split point and the new split point. - let db = &self.cold_db; - store_updated_vector(BlockRoots, db, state, &self.spec, ops)?; - store_updated_vector(StateRoots, db, state, &self.spec, ops)?; - store_updated_vector(HistoricalRoots, db, state, &self.spec, ops)?; - store_updated_vector(RandaoMixes, db, state, &self.spec, ops)?; - store_updated_vector(HistoricalSummaries, db, state, &self.spec, ops)?; + fn load_cold_state_as_snapshot(&self, slot: Slot) -> Result>, Error> { + Ok(self + .load_cold_state_bytes_as_snapshot(slot)? + .map(|bytes| BeaconState::from_ssz_bytes(&bytes, &self.spec)) + .transpose()?) + } - // 3. Store restore point. - let restore_point_index = state.slot().as_u64() / self.config.slots_per_restore_point; - self.store_restore_point_hash(restore_point_index, *state_root, ops); + pub fn store_cold_state_as_diff( + &self, + state: &BeaconState, + from_slot: Slot, + ops: &mut Vec, + ) -> Result<(), Error> { + // Load diff base state bytes. + let (_, base_buffer) = { + let _t = metrics::start_timer(&metrics::STORE_BEACON_HDIFF_BUFFER_LOAD_FOR_STORE_TIME); + self.load_hdiff_buffer_for_slot(from_slot)? + }; + let target_buffer = HDiffBuffer::from_state(state.clone()); + let diff = { + let _timer = metrics::start_timer(&metrics::STORE_BEACON_HDIFF_BUFFER_COMPUTE_TIME); + HDiff::compute(&base_buffer, &target_buffer, &self.config)? + }; + let diff_bytes = diff.as_ssz_bytes(); + let key = get_key_for_col( + DBColumn::BeaconStateDiff.into(), + &state.slot().as_u64().to_be_bytes(), + ); + ops.push(KeyValueStoreOp::PutKeyValue(key, diff_bytes)); Ok(()) } @@ -1580,7 +1744,7 @@ impl, Cold: ItemStore> HotColdDB /// Return `None` if no state with `state_root` lies in the freezer. pub fn load_cold_state(&self, state_root: &Hash256) -> Result>, Error> { match self.load_cold_state_slot(state_root)? { - Some(slot) => self.load_cold_state_by_slot(slot), + Some(slot) => self.load_cold_state_by_slot(slot).map(Some), None => Ok(None), } } @@ -1588,149 +1752,214 @@ impl, Cold: ItemStore> HotColdDB /// Load a pre-finalization state from the freezer database. /// /// Will reconstruct the state if it lies between restore points. - pub fn load_cold_state_by_slot(&self, slot: Slot) -> Result>, Error> { - // Guard against fetching states that do not exist due to gaps in the historic state - // database, which can occur due to checkpoint sync or re-indexing. - // See the comments in `get_historic_state_limits` for more information. - let (lower_limit, upper_limit) = self.get_historic_state_limits(); + pub fn load_cold_state_by_slot(&self, slot: Slot) -> Result, Error> { + let storage_strategy = self.hierarchy.storage_strategy(slot)?; - if slot <= lower_limit || slot >= upper_limit { - if slot % self.config.slots_per_restore_point == 0 { - let restore_point_idx = slot.as_u64() / self.config.slots_per_restore_point; - self.load_restore_point_by_index(restore_point_idx) - } else { - self.load_cold_intermediate_state(slot) + // Search for a state from this slot or a recent prior slot in the historic state cache. + let mut historic_state_cache = self.historic_state_cache.lock(); + + let cached_state = itertools::process_results( + storage_strategy + .replay_from_range(slot) + .rev() + .map(|prior_slot| historic_state_cache.get_state(prior_slot, &self.spec)), + |mut iter| iter.find_map(|cached_state| cached_state), + )?; + drop(historic_state_cache); + + if let Some(cached_state) = cached_state { + if cached_state.slot() == slot { + metrics::inc_counter(&metrics::STORE_BEACON_HISTORIC_STATE_CACHE_HIT); + return Ok(cached_state); } - .map(Some) - } else { - Ok(None) - } - } + metrics::inc_counter(&metrics::STORE_BEACON_HISTORIC_STATE_CACHE_MISS); - /// Load a restore point state by its `state_root`. - fn load_restore_point(&self, state_root: &Hash256) -> Result, Error> { - let partial_state_bytes = self - .cold_db - .get_bytes(DBColumn::BeaconState.into(), state_root.as_slice())? - .ok_or(HotColdDBError::MissingRestorePoint(*state_root))?; - let mut partial_state: PartialBeaconState = - PartialBeaconState::from_ssz_bytes(&partial_state_bytes, &self.spec)?; - - // Fill in the fields of the partial state. - partial_state.load_block_roots(&self.cold_db, &self.spec)?; - partial_state.load_state_roots(&self.cold_db, &self.spec)?; - partial_state.load_historical_roots(&self.cold_db, &self.spec)?; - partial_state.load_randao_mixes(&self.cold_db, &self.spec)?; - partial_state.load_historical_summaries(&self.cold_db, &self.spec)?; - - let mut state: BeaconState = partial_state.try_into()?; - state.apply_pending_mutations()?; - Ok(state) - } - - /// Load a restore point state by its `restore_point_index`. - fn load_restore_point_by_index( - &self, - restore_point_index: u64, - ) -> Result, Error> { - let state_root = self.load_restore_point_hash(restore_point_index)?; - self.load_restore_point(&state_root) - } - - /// Load a frozen state that lies between restore points. - fn load_cold_intermediate_state(&self, slot: Slot) -> Result, Error> { - if let Some(state) = self.historic_state_cache.lock().get(&slot) { - return Ok(state.clone()); + return self.load_cold_state_by_slot_using_replay(cached_state, slot); } - // 1. Load the restore points either side of the intermediate state. - let low_restore_point_idx = slot.as_u64() / self.config.slots_per_restore_point; - let high_restore_point_idx = low_restore_point_idx + 1; + metrics::inc_counter(&metrics::STORE_BEACON_HISTORIC_STATE_CACHE_MISS); - // Use low restore point as the base state. - let mut low_slot: Slot = - Slot::new(low_restore_point_idx * self.config.slots_per_restore_point); - let mut low_state: Option> = None; + // Load using the diff hierarchy. For states that require replay we recurse into this + // function so that we can try to get their pre-state *as a state* rather than an hdiff + // buffer. + match self.hierarchy.storage_strategy(slot)? { + StorageStrategy::Snapshot | StorageStrategy::DiffFrom(_) => { + let buffer_timer = + metrics::start_timer(&metrics::STORE_BEACON_HDIFF_BUFFER_LOAD_TIME); + let (_, buffer) = self.load_hdiff_buffer_for_slot(slot)?; + drop(buffer_timer); + let state = buffer.as_state(&self.spec)?; - // Try to get a more recent state from the cache to avoid massive blocks replay. - for (s, state) in self.historic_state_cache.lock().iter() { - if s.as_u64() / self.config.slots_per_restore_point == low_restore_point_idx - && *s < slot - && low_slot < *s - { - low_slot = *s; - low_state = Some(state.clone()); + self.historic_state_cache + .lock() + .put_both(slot, state.clone(), buffer); + Ok(state) + } + StorageStrategy::ReplayFrom(from) => { + // No prior state found in cache (above), need to load by diffing and then + // replaying. + let base_state = self.load_cold_state_by_slot(from)?; + self.load_cold_state_by_slot_using_replay(base_state, slot) } } - - // If low_state is still None, use load_restore_point_by_index to load the state. - let low_state = match low_state { - Some(state) => state, - None => self.load_restore_point_by_index(low_restore_point_idx)?, - }; - - // Acquire the read lock, so that the split can't change while this is happening. - let split = self.split.read_recursive(); - - let high_restore_point = self.get_restore_point(high_restore_point_idx, &split)?; - - // 2. Load the blocks from the high restore point back to the low point. - let blocks = self.load_blocks_to_replay( - low_slot, - slot, - self.get_high_restore_point_block_root(&high_restore_point, slot)?, - )?; - - // 3. Replay the blocks on top of the low point. - // Use a forwards state root iterator to avoid doing any tree hashing. - // The state root of the high restore point should never be used, so is safely set to 0. - let state_root_iter = self.forwards_state_roots_iterator_until( - low_slot, - slot, - || Ok((high_restore_point, Hash256::zero())), - &self.spec, - )?; - - let mut state = self.replay_blocks(low_state, blocks, slot, Some(state_root_iter), None)?; - state.apply_pending_mutations()?; - - // If state is not error, put it in the cache. - self.historic_state_cache.lock().put(slot, state.clone()); - - Ok(state) } - /// Get the restore point with the given index, or if it is out of bounds, the split state. - pub(crate) fn get_restore_point( + fn load_cold_state_by_slot_using_replay( &self, - restore_point_idx: u64, - split: &Split, - ) -> Result, Error> { - if restore_point_idx * self.config.slots_per_restore_point >= split.slot.as_u64() { - self.get_state(&split.state_root, Some(split.slot))? - .ok_or(HotColdDBError::MissingSplitState( - split.state_root, - split.slot, - )) - .map_err(Into::into) - } else { - self.load_restore_point_by_index(restore_point_idx) - } - } - - /// Get a suitable block root for backtracking from `high_restore_point` to the state at `slot`. - /// - /// Defaults to the block root for `slot`, which *should* be in range. - fn get_high_restore_point_block_root( - &self, - high_restore_point: &BeaconState, + mut base_state: BeaconState, slot: Slot, - ) -> Result { - high_restore_point - .get_block_root(slot) - .or_else(|_| high_restore_point.get_oldest_block_root()) - .copied() - .map_err(HotColdDBError::RestorePointBlockHashError) + ) -> Result, Error> { + if !base_state.all_caches_built() { + // Build all caches and update the historic state cache so that these caches may be used + // at future slots. We do this lazily here rather than when populating the cache in + // order to speed up queries at snapshot/diff slots, which are already slow. + let cache_timer = + metrics::start_timer(&metrics::STORE_BEACON_COLD_BUILD_BEACON_CACHES_TIME); + base_state.build_all_caches(&self.spec)?; + debug!( + self.log, + "Built caches for historic state"; + "target_slot" => slot, + "build_time_ms" => metrics::stop_timer_with_duration(cache_timer).as_millis() + ); + self.historic_state_cache + .lock() + .put_state(base_state.slot(), base_state.clone()); + } + + if base_state.slot() == slot { + return Ok(base_state); + } + + let blocks = self.load_cold_blocks(base_state.slot() + 1, slot)?; + + // Include state root for base state as it is required by block processing to not + // have to hash the state. + let replay_timer = metrics::start_timer(&metrics::STORE_BEACON_REPLAY_COLD_BLOCKS_TIME); + let state_root_iter = + self.forwards_state_roots_iterator_until(base_state.slot(), slot, || { + Err(Error::StateShouldNotBeRequired(slot)) + })?; + let state = self.replay_blocks(base_state, blocks, slot, Some(state_root_iter), None)?; + debug!( + self.log, + "Replayed blocks for historic state"; + "target_slot" => slot, + "replay_time_ms" => metrics::stop_timer_with_duration(replay_timer).as_millis() + ); + + self.historic_state_cache + .lock() + .put_state(slot, state.clone()); + Ok(state) + } + + fn load_hdiff_for_slot(&self, slot: Slot) -> Result { + let bytes = { + let _t = metrics::start_timer(&metrics::BEACON_HDIFF_READ_TIMES); + self.cold_db + .get_bytes( + DBColumn::BeaconStateDiff.into(), + &slot.as_u64().to_be_bytes(), + )? + .ok_or(HotColdDBError::MissingHDiff(slot))? + }; + let hdiff = { + let _t = metrics::start_timer(&metrics::BEACON_HDIFF_DECODE_TIMES); + HDiff::from_ssz_bytes(&bytes)? + }; + Ok(hdiff) + } + + /// Returns `HDiffBuffer` for the specified slot, or `HDiffBuffer` for the `ReplayFrom` slot if + /// the diff for the specified slot is not stored. + fn load_hdiff_buffer_for_slot(&self, slot: Slot) -> Result<(Slot, HDiffBuffer), Error> { + if let Some(buffer) = self.historic_state_cache.lock().get_hdiff_buffer(slot) { + debug!( + self.log, + "Hit hdiff buffer cache"; + "slot" => slot + ); + metrics::inc_counter(&metrics::STORE_BEACON_HDIFF_BUFFER_CACHE_HIT); + return Ok((slot, buffer)); + } + metrics::inc_counter(&metrics::STORE_BEACON_HDIFF_BUFFER_CACHE_MISS); + + // Load buffer for the previous state. + // This amount of recursion (<10 levels) should be OK. + let t = std::time::Instant::now(); + match self.hierarchy.storage_strategy(slot)? { + // Base case. + StorageStrategy::Snapshot => { + let state = self + .load_cold_state_as_snapshot(slot)? + .ok_or(Error::MissingSnapshot(slot))?; + let buffer = HDiffBuffer::from_state(state.clone()); + + self.historic_state_cache + .lock() + .put_both(slot, state, buffer.clone()); + + let load_time_ms = t.elapsed().as_millis(); + debug!( + self.log, + "Cached state and hdiff buffer"; + "load_time_ms" => load_time_ms, + "slot" => slot + ); + + Ok((slot, buffer)) + } + // Recursive case. + StorageStrategy::DiffFrom(from) => { + let (_buffer_slot, mut buffer) = self.load_hdiff_buffer_for_slot(from)?; + + // Load diff and apply it to buffer. + let diff = self.load_hdiff_for_slot(slot)?; + { + let _timer = + metrics::start_timer(&metrics::STORE_BEACON_HDIFF_BUFFER_APPLY_TIME); + diff.apply(&mut buffer, &self.config)?; + } + + self.historic_state_cache + .lock() + .put_hdiff_buffer(slot, buffer.clone()); + + let load_time_ms = t.elapsed().as_millis(); + debug!( + self.log, + "Cached hdiff buffer"; + "load_time_ms" => load_time_ms, + "slot" => slot + ); + + Ok((slot, buffer)) + } + StorageStrategy::ReplayFrom(from) => self.load_hdiff_buffer_for_slot(from), + } + } + + /// Load cold blocks between `start_slot` and `end_slot` inclusive. + pub fn load_cold_blocks( + &self, + start_slot: Slot, + end_slot: Slot, + ) -> Result>, Error> { + let _t = metrics::start_timer(&metrics::STORE_BEACON_LOAD_COLD_BLOCKS_TIME); + let block_root_iter = + self.forwards_block_roots_iterator_until(start_slot, end_slot, || { + Err(Error::StateShouldNotBeRequired(end_slot)) + })?; + process_results(block_root_iter, |iter| { + iter.map(|(block_root, _slot)| block_root) + .dedup() + .map(|block_root| { + self.get_blinded_block(&block_root)? + .ok_or(Error::MissingBlock(block_root)) + }) + .collect() + })? } /// Load the blocks between `start_slot` and `end_slot` by backtracking from `end_block_hash`. @@ -1743,6 +1972,7 @@ impl, Cold: ItemStore> HotColdDB end_slot: Slot, end_block_hash: Hash256, ) -> Result>>, Error> { + let _t = metrics::start_timer(&metrics::STORE_BEACON_LOAD_HOT_BLOCKS_TIME); let mut blocks = ParentRootBlockIterator::new(self, end_block_hash) .map(|result| result.map(|(_, block)| block)) // Include the block at the end slot (if any), it needs to be @@ -1785,6 +2015,8 @@ impl, Cold: ItemStore> HotColdDB state_root_iter: Option>>, pre_slot_hook: Option>, ) -> Result, Error> { + metrics::inc_counter_by(&metrics::STORE_BEACON_REPLAYED_BLOCKS, blocks.len() as u64); + let mut block_replayer = BlockReplayer::new(state, &self.spec) .no_signature_verification() .minimal_block_root_verification(); @@ -1902,30 +2134,6 @@ impl, Cold: ItemStore> HotColdDB }; } - /// Fetch the slot of the most recently stored restore point (if any). - pub fn get_latest_restore_point_slot(&self) -> Option { - let split_slot = self.get_split_slot(); - let anchor = self.get_anchor_info(); - - // There are no restore points stored if the state upper limit lies in the hot database, - // and the lower limit is zero. It hasn't been reached yet, and may never be. - if anchor.as_ref().map_or(false, |a| { - a.state_upper_limit >= split_slot && a.state_lower_limit == 0 - }) { - None - } else if let Some(lower_limit) = anchor - .map(|a| a.state_lower_limit) - .filter(|limit| *limit > 0) - { - Some(lower_limit) - } else { - Some( - (split_slot - 1) / self.config.slots_per_restore_point - * self.config.slots_per_restore_point, - ) - } - } - /// Load the database schema version from disk. fn load_schema_version(&self) -> Result, Error> { self.hot_db.get(&SCHEMA_VERSION_KEY) @@ -1958,36 +2166,33 @@ impl, Cold: ItemStore> HotColdDB retain_historic_states: bool, ) -> Result { let anchor_slot = block.slot(); - let slots_per_restore_point = self.config.slots_per_restore_point; + // Set the `state_upper_limit` to the slot of the *next* checkpoint. + let next_snapshot_slot = self.hierarchy.next_snapshot_slot(anchor_slot)?; let state_upper_limit = if !retain_historic_states { STATE_UPPER_LIMIT_NO_RETAIN - } else if anchor_slot % slots_per_restore_point == 0 { - anchor_slot } else { - // Set the `state_upper_limit` to the slot of the *next* restore point. - // See `get_state_upper_limit` for rationale. - (anchor_slot / slots_per_restore_point + 1) * slots_per_restore_point + next_snapshot_slot }; let anchor_info = if state_upper_limit == 0 && anchor_slot == 0 { // Genesis archive node: no anchor because we *will* store all states. - None + ANCHOR_FOR_ARCHIVE_NODE } else { - Some(AnchorInfo { + AnchorInfo { anchor_slot, oldest_block_slot: anchor_slot, oldest_block_parent: block.parent_root(), state_upper_limit, state_lower_limit: self.spec.genesis_slot, - }) + } }; - self.compare_and_set_anchor_info(None, anchor_info) + self.compare_and_set_anchor_info(ANCHOR_UNINITIALIZED, anchor_info) } /// Get a clone of the store's anchor info. /// /// To do mutations, use `compare_and_set_anchor_info`. - pub fn get_anchor_info(&self) -> Option { + pub fn get_anchor_info(&self) -> AnchorInfo { self.anchor_info.read_recursive().clone() } @@ -2000,8 +2205,8 @@ impl, Cold: ItemStore> HotColdDB /// is not correct. pub fn compare_and_set_anchor_info( &self, - prev_value: Option, - new_value: Option, + prev_value: AnchorInfo, + new_value: AnchorInfo, ) -> Result { let mut anchor_info = self.anchor_info.write(); if *anchor_info == prev_value { @@ -2016,39 +2221,26 @@ impl, Cold: ItemStore> HotColdDB /// As for `compare_and_set_anchor_info`, but also writes the anchor to disk immediately. pub fn compare_and_set_anchor_info_with_write( &self, - prev_value: Option, - new_value: Option, + prev_value: AnchorInfo, + new_value: AnchorInfo, ) -> Result<(), Error> { let kv_store_op = self.compare_and_set_anchor_info(prev_value, new_value)?; self.hot_db.do_atomically(vec![kv_store_op]) } - /// Load the anchor info from disk, but do not set `self.anchor_info`. - fn load_anchor_info(&self) -> Result, Error> { - self.hot_db.get(&ANCHOR_INFO_KEY) + /// Load the anchor info from disk. + fn load_anchor_info(hot_db: &Hot) -> Result { + Ok(hot_db + .get(&ANCHOR_INFO_KEY)? + .unwrap_or(ANCHOR_UNINITIALIZED)) } /// Store the given `anchor_info` to disk. /// /// The argument is intended to be `self.anchor_info`, but is passed manually to avoid issues /// with recursive locking. - fn store_anchor_info_in_batch(&self, anchor_info: &Option) -> KeyValueStoreOp { - if let Some(ref anchor_info) = anchor_info { - anchor_info.as_kv_store_op(ANCHOR_INFO_KEY) - } else { - KeyValueStoreOp::DeleteKey(get_key_for_col( - DBColumn::BeaconMeta.into(), - ANCHOR_INFO_KEY.as_slice(), - )) - } - } - - /// If an anchor exists, return its `anchor_slot` field. - pub fn get_anchor_slot(&self) -> Option { - self.anchor_info - .read_recursive() - .as_ref() - .map(|a| a.anchor_slot) + fn store_anchor_info_in_batch(&self, anchor_info: &AnchorInfo) -> KeyValueStoreOp { + anchor_info.as_kv_store_op(ANCHOR_INFO_KEY) } /// Initialize the `BlobInfo` when starting from genesis or a checkpoint. @@ -2196,7 +2388,7 @@ impl, Cold: ItemStore> HotColdDB /// instance. pub fn get_historic_state_limits(&self) -> (Slot, Slot) { // If checkpoint sync is used then states in the hot DB will always be available, but may - // become unavailable as finalisation advances due to the lack of a restore point in the + // become unavailable as finalisation advances due to the lack of a snapshot in the // database. For this reason we take the minimum of the split slot and the // restore-point-aligned `state_upper_limit`, which should be set _ahead_ of the checkpoint // slot during initialisation. @@ -2207,20 +2399,16 @@ impl, Cold: ItemStore> HotColdDB // a new restore point will be created at that slot, making all states from 4096 onwards // permanently available. let split_slot = self.get_split_slot(); - self.anchor_info - .read_recursive() - .as_ref() - .map_or((split_slot, self.spec.genesis_slot), |a| { - (a.state_lower_limit, min(a.state_upper_limit, split_slot)) - }) + let anchor = self.anchor_info.read_recursive(); + ( + anchor.state_lower_limit, + min(anchor.state_upper_limit, split_slot), + ) } /// Return the minimum slot such that blocks are available for all subsequent slots. pub fn get_oldest_block_slot(&self) -> Slot { - self.anchor_info - .read_recursive() - .as_ref() - .map_or(self.spec.genesis_slot, |anchor| anchor.oldest_block_slot) + self.anchor_info.read_recursive().oldest_block_slot } /// Return the in-memory configuration used by the database. @@ -2263,32 +2451,6 @@ impl, Cold: ItemStore> HotColdDB self.split.read_recursive().as_kv_store_op(SPLIT_KEY) } - /// Load the state root of a restore point. - fn load_restore_point_hash(&self, restore_point_index: u64) -> Result { - let key = Self::restore_point_key(restore_point_index); - self.cold_db - .get(&key)? - .map(|r: RestorePointHash| r.state_root) - .ok_or_else(|| HotColdDBError::MissingRestorePointHash(restore_point_index).into()) - } - - /// Store the state root of a restore point. - fn store_restore_point_hash( - &self, - restore_point_index: u64, - state_root: Hash256, - ops: &mut Vec, - ) { - let value = &RestorePointHash { state_root }; - let op = value.as_kv_store_op(Self::restore_point_key(restore_point_index)); - ops.push(op); - } - - /// Convert a `restore_point_index` into a database key. - fn restore_point_key(restore_point_index: u64) -> Hash256 { - Hash256::from_low_u64_be(restore_point_index) - } - /// Load a frozen state's slot, given its root. pub fn load_cold_state_slot(&self, state_root: &Hash256) -> Result, Error> { Ok(self @@ -2316,52 +2478,6 @@ impl, Cold: ItemStore> HotColdDB self.hot_db.get(state_root) } - /// Verify that a parsed config is valid. - fn verify_config(config: &StoreConfig) -> Result<(), HotColdDBError> { - Self::verify_slots_per_restore_point(config.slots_per_restore_point)?; - Self::verify_epochs_per_blob_prune(config.epochs_per_blob_prune) - } - - /// Check that the restore point frequency is valid. - /// - /// Specifically, check that it is: - /// (1) A divisor of the number of slots per historical root, and - /// (2) Divisible by the number of slots per epoch - /// - /// - /// (1) ensures that we have at least one restore point within range of our state - /// root history when iterating backwards (and allows for more frequent restore points if - /// desired). - /// - /// (2) ensures that restore points align with hot state summaries, making it - /// quick to migrate hot to cold. - fn verify_slots_per_restore_point(slots_per_restore_point: u64) -> Result<(), HotColdDBError> { - let slots_per_historical_root = E::SlotsPerHistoricalRoot::to_u64(); - let slots_per_epoch = E::slots_per_epoch(); - if slots_per_restore_point > 0 - && slots_per_historical_root % slots_per_restore_point == 0 - && slots_per_restore_point % slots_per_epoch == 0 - { - Ok(()) - } else { - Err(HotColdDBError::InvalidSlotsPerRestorePoint { - slots_per_restore_point, - slots_per_historical_root, - slots_per_epoch, - }) - } - } - - // Check that epochs_per_blob_prune is at least 1 epoch to avoid attempting to prune the same - // epochs over and over again. - fn verify_epochs_per_blob_prune(epochs_per_blob_prune: u64) -> Result<(), HotColdDBError> { - if epochs_per_blob_prune > 0 { - Ok(()) - } else { - Err(HotColdDBError::ZeroEpochsPerBlobPrune) - } - } - /// Run a compaction pass to free up space used by deleted states. pub fn compact(&self) -> Result<(), Error> { self.hot_db.compact()?; @@ -2418,12 +2534,12 @@ impl, Cold: ItemStore> HotColdDB block_root: Hash256, ) -> Result, Error> { let mut ops = vec![]; - let mut block_root_writer = - ChunkWriter::::new(&self.cold_db, start_slot.as_usize())?; - for slot in start_slot.as_usize()..end_slot.as_usize() { - block_root_writer.set(slot, block_root, &mut ops)?; + for slot in start_slot.as_u64()..end_slot.as_u64() { + ops.push(KeyValueStoreOp::PutKeyValue( + get_key_for_col(DBColumn::BeaconBlockRoots.into(), &slot.to_be_bytes()), + block_root.as_slice().to_vec(), + )); } - block_root_writer.write(&mut ops)?; Ok(ops) } @@ -2474,7 +2590,7 @@ impl, Cold: ItemStore> HotColdDB "Pruning finalized payloads"; "info" => "you may notice degraded I/O performance while this runs" ); - let anchor_slot = self.get_anchor_info().map(|info| info.anchor_slot); + let anchor_slot = self.get_anchor_info().anchor_slot; let mut ops = vec![]; let mut last_pruned_block_root = None; @@ -2515,7 +2631,7 @@ impl, Cold: ItemStore> HotColdDB ops.push(StoreOp::DeleteExecutionPayload(block_root)); } - if Some(slot) == anchor_slot { + if slot == anchor_slot { info!( self.log, "Payload pruning reached anchor state"; @@ -2622,16 +2738,15 @@ impl, Cold: ItemStore> HotColdDB } // Sanity checks. - if let Some(anchor) = self.get_anchor_info() { - if oldest_blob_slot < anchor.oldest_block_slot { - error!( - self.log, - "Oldest blob is older than oldest block"; - "oldest_blob_slot" => oldest_blob_slot, - "oldest_block_slot" => anchor.oldest_block_slot - ); - return Err(HotColdDBError::BlobPruneLogicError.into()); - } + let anchor = self.get_anchor_info(); + if oldest_blob_slot < anchor.oldest_block_slot { + error!( + self.log, + "Oldest blob is older than oldest block"; + "oldest_blob_slot" => oldest_blob_slot, + "oldest_block_slot" => anchor.oldest_block_slot + ); + return Err(HotColdDBError::BlobPruneLogicError.into()); } // Iterate block roots forwards from the oldest blob slot. @@ -2646,21 +2761,16 @@ impl, Cold: ItemStore> HotColdDB let mut ops = vec![]; let mut last_pruned_block_root = None; - for res in self.forwards_block_roots_iterator_until( - oldest_blob_slot, - end_slot, - || { - let (_, split_state) = self - .get_advanced_hot_state(split.block_root, split.slot, split.state_root)? - .ok_or(HotColdDBError::MissingSplitState( - split.state_root, - split.slot, - ))?; + for res in self.forwards_block_roots_iterator_until(oldest_blob_slot, end_slot, || { + let (_, split_state) = self + .get_advanced_hot_state(split.block_root, split.slot, split.state_root)? + .ok_or(HotColdDBError::MissingSplitState( + split.state_root, + split.slot, + ))?; - Ok((split_state, split.block_root)) - }, - &self.spec, - )? { + Ok((split_state, split.block_root)) + })? { let (block_root, slot) = match res { Ok(tuple) => tuple, Err(e) => { @@ -2724,84 +2834,6 @@ impl, Cold: ItemStore> HotColdDB Ok(()) } - /// This function fills in missing block roots between last restore point slot and split - /// slot, if any. - pub fn heal_freezer_block_roots_at_split(&self) -> Result<(), Error> { - let split = self.get_split_info(); - let last_restore_point_slot = (split.slot - 1) / self.config.slots_per_restore_point - * self.config.slots_per_restore_point; - - // Load split state (which has access to block roots). - let (_, split_state) = self - .get_advanced_hot_state(split.block_root, split.slot, split.state_root)? - .ok_or(HotColdDBError::MissingSplitState( - split.state_root, - split.slot, - ))?; - - let mut batch = vec![]; - let mut chunk_writer = ChunkWriter::::new( - &self.cold_db, - last_restore_point_slot.as_usize(), - )?; - - for slot in (last_restore_point_slot.as_u64()..split.slot.as_u64()).map(Slot::new) { - let block_root = *split_state.get_block_root(slot)?; - chunk_writer.set(slot.as_usize(), block_root, &mut batch)?; - } - chunk_writer.write(&mut batch)?; - self.cold_db.do_atomically(batch)?; - - Ok(()) - } - - pub fn heal_freezer_block_roots_at_genesis(&self) -> Result<(), Error> { - let oldest_block_slot = self.get_oldest_block_slot(); - let split_slot = self.get_split_slot(); - - // Check if backfill has been completed AND the freezer db has data in it - if oldest_block_slot != 0 || split_slot == 0 { - return Ok(()); - } - - let mut block_root_iter = self.forwards_block_roots_iterator_until( - Slot::new(0), - split_slot - 1, - || { - Err(Error::DBError { - message: "Should not require end state".to_string(), - }) - }, - &self.spec, - )?; - - let (genesis_block_root, _) = block_root_iter.next().ok_or_else(|| Error::DBError { - message: "Genesis block root missing".to_string(), - })??; - - let slots_to_fix = itertools::process_results(block_root_iter, |iter| { - iter.take_while(|(block_root, _)| block_root.is_zero()) - .map(|(_, slot)| slot) - .collect::>() - })?; - - let Some(first_slot) = slots_to_fix.first() else { - return Ok(()); - }; - - let mut chunk_writer = - ChunkWriter::::new(&self.cold_db, first_slot.as_usize())?; - let mut ops = vec![]; - for slot in slots_to_fix { - chunk_writer.set(slot.as_usize(), genesis_block_root, &mut ops)?; - } - - chunk_writer.write(&mut ops)?; - self.cold_db.do_atomically(ops)?; - - Ok(()) - } - /// Delete *all* states from the freezer database and update the anchor accordingly. /// /// WARNING: this method deletes the genesis state and replaces it with the provided @@ -2813,46 +2845,48 @@ impl, Cold: ItemStore> HotColdDB genesis_state_root: Hash256, genesis_state: &BeaconState, ) -> Result<(), Error> { - // Make sure there is no missing block roots before pruning - self.heal_freezer_block_roots_at_split()?; - // Update the anchor to use the dummy state upper limit and disable historic state storage. let old_anchor = self.get_anchor_info(); - let new_anchor = if let Some(old_anchor) = old_anchor.clone() { - AnchorInfo { - state_upper_limit: STATE_UPPER_LIMIT_NO_RETAIN, - state_lower_limit: Slot::new(0), - ..old_anchor.clone() - } - } else { - AnchorInfo { - anchor_slot: Slot::new(0), - oldest_block_slot: Slot::new(0), - oldest_block_parent: Hash256::zero(), - state_upper_limit: STATE_UPPER_LIMIT_NO_RETAIN, - state_lower_limit: Slot::new(0), - } + let new_anchor = AnchorInfo { + state_upper_limit: STATE_UPPER_LIMIT_NO_RETAIN, + state_lower_limit: Slot::new(0), + ..old_anchor.clone() }; // Commit the anchor change immediately: if the cold database ops fail they can always be // retried, and we can't do them atomically with this change anyway. - self.compare_and_set_anchor_info_with_write(old_anchor, Some(new_anchor))?; + self.compare_and_set_anchor_info_with_write(old_anchor, new_anchor)?; // Stage freezer data for deletion. Do not bother loading and deserializing values as this // wastes time and is less schema-agnostic. My hope is that this method will be useful for // migrating to the tree-states schema (delete everything in the freezer then start afresh). let mut cold_ops = vec![]; - let columns = [ - DBColumn::BeaconState, - DBColumn::BeaconStateSummary, - DBColumn::BeaconRestorePoint, + let current_schema_columns = vec![ + DBColumn::BeaconColdStateSummary, + DBColumn::BeaconStateSnapshot, + DBColumn::BeaconStateDiff, DBColumn::BeaconStateRoots, + ]; + + // This function is intended to be able to clean up leftover V21 freezer database stuff in + // the case where the V22 schema upgrade failed *after* commiting the version increment but + // *before* cleaning up the freezer DB. + // + // We can remove this once schema V21 has been gone for a while. + let previous_schema_columns = vec![ + DBColumn::BeaconStateSummary, + DBColumn::BeaconBlockRootsChunked, + DBColumn::BeaconStateRootsChunked, + DBColumn::BeaconRestorePoint, DBColumn::BeaconHistoricalRoots, DBColumn::BeaconRandaoMixes, DBColumn::BeaconHistoricalSummaries, ]; + let mut columns = current_schema_columns; + columns.extend(previous_schema_columns); + for column in columns { for res in self.cold_db.iter_column_keys::>(column) { let key = res?; @@ -2862,20 +2896,9 @@ impl, Cold: ItemStore> HotColdDB ))); } } + let delete_ops = cold_ops.len(); - // XXX: We need to commit the mass deletion here *before* re-storing the genesis state, as - // the current schema performs reads as part of `store_cold_state`. This can be deleted - // once the target schema is tree-states. If the process is killed before the genesis state - // is written this can be fixed by re-running. - info!( - self.log, - "Deleting historic states"; - "num_kv" => cold_ops.len(), - ); - self.cold_db.do_atomically(std::mem::take(&mut cold_ops))?; - - // If we just deleted the the genesis state, re-store it using the *current* schema, which - // may be different from the schema of the genesis state we just deleted. + // If we just deleted the genesis state, re-store it using the current* schema. if self.get_split_slot() > 0 { info!( self.log, @@ -2883,9 +2906,15 @@ impl, Cold: ItemStore> HotColdDB "state_root" => ?genesis_state_root, ); self.store_cold_state(&genesis_state_root, genesis_state, &mut cold_ops)?; - self.cold_db.do_atomically(cold_ops)?; } + info!( + self.log, + "Deleting historic states"; + "delete_ops" => delete_ops, + ); + self.cold_db.do_atomically(cold_ops)?; + // In order to reclaim space, we need to compact the freezer DB as well. self.cold_db.compact()?; @@ -2962,7 +2991,6 @@ pub fn migrate_database, Cold: ItemStore>( // boundary (in order for the hot state summary scheme to work). let current_split_slot = store.split.read_recursive().slot; let anchor_info = store.anchor_info.read_recursive().clone(); - let anchor_slot = anchor_info.as_ref().map(|a| a.anchor_slot); if finalized_state.slot() < current_split_slot { return Err(HotColdDBError::FreezeSlotError { @@ -2979,28 +3007,20 @@ pub fn migrate_database, Cold: ItemStore>( } let mut hot_db_ops = vec![]; - let mut cold_db_ops = vec![]; + let mut cold_db_block_ops = vec![]; let mut epoch_boundary_blocks = HashSet::new(); let mut non_checkpoint_block_roots = HashSet::new(); - // Chunk writer for the linear block roots in the freezer DB. - // Start at the new upper limit because we iterate backwards. - let new_frozen_block_root_upper_limit = finalized_state.slot().as_usize().saturating_sub(1); - let mut block_root_writer = - ChunkWriter::::new(&store.cold_db, new_frozen_block_root_upper_limit)?; - - // 1. Copy all of the states between the new finalized state and the split slot, from the hot DB - // to the cold DB. Delete the execution payloads of these now-finalized blocks. - let state_root_iter = RootsIterator::new(&store, finalized_state); - for maybe_tuple in state_root_iter.take_while(|result| match result { - Ok((_, _, slot)) => { - slot >= ¤t_split_slot - && anchor_slot.map_or(true, |anchor_slot| slot >= &anchor_slot) - } - Err(_) => true, - }) { - let (block_root, state_root, slot) = maybe_tuple?; + // Iterate in descending order until the current split slot + let state_roots = RootsIterator::new(&store, finalized_state) + .take_while(|result| match result { + Ok((_, _, slot)) => *slot >= current_split_slot, + Err(_) => true, + }) + .collect::, _>>()?; + // Then, iterate states in slot ascending order, as they are stored wrt previous states. + for (block_root, state_root, slot) in state_roots.into_iter().rev() { // Delete the execution payload if payload pruning is enabled. At a skipped slot we may // delete the payload for the finalized block itself, but that's OK as we only guarantee // that payloads are present for slots >= the split slot. The payload fetching code is also @@ -3009,6 +3029,15 @@ pub fn migrate_database, Cold: ItemStore>( hot_db_ops.push(StoreOp::DeleteExecutionPayload(block_root)); } + // Store the slot to block root mapping. + cold_db_block_ops.push(KeyValueStoreOp::PutKeyValue( + get_key_for_col( + DBColumn::BeaconBlockRoots.into(), + &slot.as_u64().to_be_bytes(), + ), + block_root.as_slice().to_vec(), + )); + // At a missed slot, `state_root_iter` will return the block root // from the previous non-missed slot. This ensures that the block root at an // epoch boundary is always a checkpoint block root. We keep track of block roots @@ -3028,40 +3057,36 @@ pub fn migrate_database, Cold: ItemStore>( // Delete the old summary, and the full state if we lie on an epoch boundary. hot_db_ops.push(StoreOp::DeleteState(state_root, Some(slot))); - // Store the block root for this slot in the linear array of frozen block roots. - block_root_writer.set(slot.as_usize(), block_root, &mut cold_db_ops)?; - // Do not try to store states if a restore point is yet to be stored, or will never be // stored (see `STATE_UPPER_LIMIT_NO_RETAIN`). Make an exception for the genesis state // which always needs to be copied from the hot DB to the freezer and should not be deleted. - if slot != 0 - && anchor_info - .as_ref() - .map_or(false, |anchor| slot < anchor.state_upper_limit) - { + if slot != 0 && slot < anchor_info.state_upper_limit { debug!(store.log, "Pruning finalized state"; "slot" => slot); - continue; } - // Store a pointer from this state root to its slot, so we can later reconstruct states - // from their state root alone. - let cold_state_summary = ColdStateSummary { slot }; - let op = cold_state_summary.as_kv_store_op(state_root); - cold_db_ops.push(op); + let mut cold_db_ops = vec![]; - if slot % store.config.slots_per_restore_point == 0 { - let state: BeaconState = get_full_state(&store.hot_db, &state_root, &store.spec)? + // Only store the cold state if it's on a diff boundary. + // Calling `store_cold_state_summary` instead of `store_cold_state` for those allows us + // to skip loading many hot states. + if matches!( + store.hierarchy.storage_strategy(slot)?, + StorageStrategy::ReplayFrom(..) + ) { + // Store slot -> state_root and state_root -> slot mappings. + store.store_cold_state_summary(&state_root, slot, &mut cold_db_ops)?; + } else { + let state: BeaconState = store + .get_hot_state(&state_root)? .ok_or(HotColdDBError::MissingStateToFreeze(state_root))?; store.store_cold_state(&state_root, &state, &mut cold_db_ops)?; - - // Commit the batch of cold DB ops whenever a full state is written. Each state stored - // may read the linear fields of previous states stored. - store - .cold_db - .do_atomically(std::mem::take(&mut cold_db_ops))?; } + + // Cold states are diffed with respect to each other, so we need to finish writing previous + // states before storing new ones. + store.cold_db.do_atomically(cold_db_ops)?; } // Prune sync committee branch data for all non checkpoint block roots. @@ -3077,10 +3102,6 @@ pub fn migrate_database, Cold: ItemStore>( hot_db_ops.push(StoreOp::DeleteSyncCommitteeBranch(block_root)); }); - // Finish writing the block roots and commit the remaining cold DB ops. - block_root_writer.write(&mut cold_db_ops)?; - store.cold_db.do_atomically(cold_db_ops)?; - // Warning: Critical section. We have to take care not to put any of the two databases in an // inconsistent state if the OS process dies at any point during the freezing // procedure. @@ -3090,8 +3111,7 @@ pub fn migrate_database, Cold: ItemStore>( // at any point below but it may happen that some states won't be deleted from the hot database // and will remain there forever. Since dying in these particular few lines should be an // exceedingly rare event, this should be an acceptable tradeoff. - - // Flush to disk all the states that have just been migrated to the cold store. + store.cold_db.do_atomically(cold_db_block_ops)?; store.cold_db.sync()?; { let mut split_guard = store.split.write(); @@ -3237,27 +3257,7 @@ pub(crate) struct ColdStateSummary { impl StoreItem for ColdStateSummary { fn db_column() -> DBColumn { - DBColumn::BeaconStateSummary - } - - fn as_store_bytes(&self) -> Vec { - self.as_ssz_bytes() - } - - fn from_store_bytes(bytes: &[u8]) -> Result { - Ok(Self::from_ssz_bytes(bytes)?) - } -} - -/// Struct for storing the state root of a restore point in the database. -#[derive(Debug, Clone, Copy, Default, Encode, Decode)] -struct RestorePointHash { - state_root: Hash256, -} - -impl StoreItem for RestorePointHash { - fn db_column() -> DBColumn { - DBColumn::BeaconRestorePoint + DBColumn::BeaconColdStateSummary } fn as_store_bytes(&self) -> Vec { diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index 1d02bfbb3c..0498c7c1e2 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -7,7 +7,6 @@ //! //! Provides a simple API for storing/retrieving all types that sometimes needs type-hints. See //! tests for implementation examples. -mod chunk_writer; pub mod chunked_iter; pub mod chunked_vector; pub mod config; @@ -15,25 +14,25 @@ pub mod consensus_context; pub mod errors; mod forwards_iter; mod garbage_collection; +pub mod hdiff; +pub mod historic_state_cache; pub mod hot_cold_store; mod impls; mod leveldb_store; mod memory_store; pub mod metadata; pub mod metrics; -mod partial_beacon_state; +pub mod partial_beacon_state; pub mod reconstruct; pub mod state_cache; pub mod iter; -pub use self::chunk_writer::ChunkWriter; pub use self::config::StoreConfig; pub use self::consensus_context::OnDiskConsensusContext; pub use self::hot_cold_store::{HotColdDB, HotStateSummary, Split}; pub use self::leveldb_store::LevelDB; pub use self::memory_store::MemoryStore; -pub use self::partial_beacon_state::PartialBeaconState; pub use crate::metadata::BlobInfo; pub use errors::Error; pub use impls::beacon_state::StorageContainer as BeaconStateStorageContainer; @@ -251,6 +250,11 @@ pub enum DBColumn { /// For data related to the database itself. #[strum(serialize = "bma")] BeaconMeta, + /// Data related to blocks. + /// + /// - Key: `Hash256` block root. + /// - Value in hot DB: SSZ-encoded blinded block. + /// - Value in cold DB: 8-byte slot of block. #[strum(serialize = "blk")] BeaconBlock, #[strum(serialize = "blb")] @@ -260,9 +264,21 @@ pub enum DBColumn { /// For full `BeaconState`s in the hot database (finalized or fork-boundary states). #[strum(serialize = "ste")] BeaconState, - /// For the mapping from state roots to their slots or summaries. + /// For beacon state snapshots in the freezer DB. + #[strum(serialize = "bsn")] + BeaconStateSnapshot, + /// For compact `BeaconStateDiff`s in the freezer DB. + #[strum(serialize = "bsd")] + BeaconStateDiff, + /// Mapping from state root to `HotStateSummary` in the hot DB. + /// + /// Previously this column also served a role in the freezer DB, mapping state roots to + /// `ColdStateSummary`. However that role is now filled by `BeaconColdStateSummary`. #[strum(serialize = "bss")] BeaconStateSummary, + /// Mapping from state root to `ColdStateSummary` in the cold DB. + #[strum(serialize = "bcs")] + BeaconColdStateSummary, /// For the list of temporary states stored during block import, /// and then made non-temporary by the deletion of their state root from this column. #[strum(serialize = "bst")] @@ -281,15 +297,37 @@ pub enum DBColumn { ForkChoice, #[strum(serialize = "pkc")] PubkeyCache, - /// For the table mapping restore point numbers to state roots. + /// For the legacy table mapping restore point numbers to state roots. + /// + /// DEPRECATED. Can be removed once schema v22 is buried by a hard fork. #[strum(serialize = "brp")] BeaconRestorePoint, - #[strum(serialize = "bbr")] - BeaconBlockRoots, - #[strum(serialize = "bsr")] + /// Mapping from slot to beacon state root in the freezer DB. + /// + /// This new column was created to replace the previous `bsr` column. The replacement was + /// necessary to guarantee atomicity of the upgrade migration. + #[strum(serialize = "bsx")] BeaconStateRoots, + /// DEPRECATED. This is the previous column for beacon state roots stored by "chunk index". + /// + /// Can be removed once schema v22 is buried by a hard fork. + #[strum(serialize = "bsr")] + BeaconStateRootsChunked, + /// Mapping from slot to beacon block root in the freezer DB. + /// + /// This new column was created to replace the previous `bbr` column. The replacement was + /// necessary to guarantee atomicity of the upgrade migration. + #[strum(serialize = "bbx")] + BeaconBlockRoots, + /// DEPRECATED. This is the previous column for beacon block roots stored by "chunk index". + /// + /// Can be removed once schema v22 is buried by a hard fork. + #[strum(serialize = "bbr")] + BeaconBlockRootsChunked, + /// DEPRECATED. Can be removed once schema v22 is buried by a hard fork. #[strum(serialize = "bhr")] BeaconHistoricalRoots, + /// DEPRECATED. Can be removed once schema v22 is buried by a hard fork. #[strum(serialize = "brm")] BeaconRandaoMixes, #[strum(serialize = "dht")] @@ -297,6 +335,7 @@ pub enum DBColumn { /// For Optimistically Imported Merge Transition Blocks #[strum(serialize = "otb")] OptimisticTransitionBlock, + /// DEPRECATED. Can be removed once schema v22 is buried by a hard fork. #[strum(serialize = "bhs")] BeaconHistoricalSummaries, #[strum(serialize = "olc")] @@ -338,6 +377,7 @@ impl DBColumn { | Self::BeaconState | Self::BeaconBlob | Self::BeaconStateSummary + | Self::BeaconColdStateSummary | Self::BeaconStateTemporary | Self::ExecPayload | Self::BeaconChain @@ -349,10 +389,14 @@ impl DBColumn { | Self::DhtEnrs | Self::OptimisticTransitionBlock => 32, Self::BeaconBlockRoots + | Self::BeaconBlockRootsChunked | Self::BeaconStateRoots + | Self::BeaconStateRootsChunked | Self::BeaconHistoricalRoots | Self::BeaconHistoricalSummaries | Self::BeaconRandaoMixes + | Self::BeaconStateSnapshot + | Self::BeaconStateDiff | Self::SyncCommittee | Self::SyncCommitteeBranch | Self::LightClientUpdate => 8, diff --git a/beacon_node/store/src/metadata.rs b/beacon_node/store/src/metadata.rs index 0c93251fe2..3f076a767a 100644 --- a/beacon_node/store/src/metadata.rs +++ b/beacon_node/store/src/metadata.rs @@ -4,7 +4,7 @@ use ssz::{Decode, Encode}; use ssz_derive::{Decode, Encode}; use types::{Checkpoint, Hash256, Slot}; -pub const CURRENT_SCHEMA_VERSION: SchemaVersion = SchemaVersion(21); +pub const CURRENT_SCHEMA_VERSION: SchemaVersion = SchemaVersion(22); // All the keys that get stored under the `BeaconMeta` column. // @@ -21,6 +21,27 @@ pub const DATA_COLUMN_INFO_KEY: Hash256 = Hash256::repeat_byte(7); /// State upper limit value used to indicate that a node is not storing historic states. pub const STATE_UPPER_LIMIT_NO_RETAIN: Slot = Slot::new(u64::MAX); +/// The `AnchorInfo` encoding full availability of all historic blocks & states. +pub const ANCHOR_FOR_ARCHIVE_NODE: AnchorInfo = AnchorInfo { + anchor_slot: Slot::new(0), + oldest_block_slot: Slot::new(0), + oldest_block_parent: Hash256::ZERO, + state_upper_limit: Slot::new(0), + state_lower_limit: Slot::new(0), +}; + +/// The `AnchorInfo` encoding an uninitialized anchor. +/// +/// This value should never exist except on initial start-up prior to the anchor being initialised +/// by `init_anchor_info`. +pub const ANCHOR_UNINITIALIZED: AnchorInfo = AnchorInfo { + anchor_slot: Slot::new(u64::MAX), + oldest_block_slot: Slot::new(u64::MAX), + oldest_block_parent: Hash256::ZERO, + state_upper_limit: Slot::new(u64::MAX), + state_lower_limit: Slot::new(0), +}; + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct SchemaVersion(pub u64); @@ -88,17 +109,47 @@ impl StoreItem for CompactionTimestamp { /// Database parameters relevant to weak subjectivity sync. #[derive(Debug, PartialEq, Eq, Clone, Encode, Decode, Serialize, Deserialize)] pub struct AnchorInfo { - /// The slot at which the anchor state is present and which we cannot revert. + /// The slot at which the anchor state is present and which we cannot revert. Values on start: + /// - Genesis start: 0 + /// - Checkpoint sync: Slot of the finalized checkpoint block + /// + /// Immutable pub anchor_slot: Slot, - /// The slot from which historical blocks are available (>=). + /// All blocks with slots greater than or equal to this value are available in the database. + /// Additionally, the genesis block is always available. + /// + /// Values on start: + /// - Genesis start: 0 + /// - Checkpoint sync: Slot of the finalized checkpoint block + /// + /// Progressively decreases during backfill sync until reaching 0. pub oldest_block_slot: Slot, /// The block root of the next block that needs to be added to fill in the history. /// /// Zero if we know all blocks back to genesis. pub oldest_block_parent: Hash256, - /// The slot from which historical states are available (>=). + /// All states with slots _greater than or equal to_ `min(split.slot, state_upper_limit)` are + /// available in the database. If `state_upper_limit` is higher than `split.slot`, states are + /// not being written to the freezer database. + /// + /// Values on start if state reconstruction is enabled: + /// - Genesis start: 0 + /// - Checkpoint sync: Slot of the next scheduled snapshot + /// + /// Value on start if state reconstruction is disabled: + /// - 2^64 - 1 representing no historic state storage. + /// + /// Immutable until state reconstruction completes. pub state_upper_limit: Slot, - /// The slot before which historical states are available (<=). + /// All states with slots _less than or equal to_ this value are available in the database. + /// The minimum value is 0, indicating that the genesis state is always available. + /// + /// Values on start: + /// - Genesis start: 0 + /// - Checkpoint sync: 0 + /// + /// When full block backfill completes (`oldest_block_slot == 0`) state reconstruction starts and + /// this value will progressively increase until reaching `state_upper_limit`. pub state_lower_limit: Slot, } @@ -109,6 +160,21 @@ impl AnchorInfo { pub fn block_backfill_complete(&self, target_slot: Slot) -> bool { self.oldest_block_slot <= target_slot } + + /// Return true if all historic states are stored, i.e. if state reconstruction is complete. + pub fn all_historic_states_stored(&self) -> bool { + self.state_lower_limit == self.state_upper_limit + } + + /// Return true if no historic states other than genesis are stored in the database. + pub fn no_historic_states_stored(&self, split_slot: Slot) -> bool { + self.state_lower_limit == 0 && self.state_upper_limit >= split_slot + } + + /// Return true if no historic states other than genesis *will ever be stored*. + pub fn full_state_pruning_enabled(&self) -> bool { + self.state_lower_limit == 0 && self.state_upper_limit == STATE_UPPER_LIMIT_NO_RETAIN + } } impl StoreItem for AnchorInfo { diff --git a/beacon_node/store/src/metrics.rs b/beacon_node/store/src/metrics.rs index 1921b9b327..f0dd061790 100644 --- a/beacon_node/store/src/metrics.rs +++ b/beacon_node/store/src/metrics.rs @@ -73,6 +73,27 @@ pub static DISK_DB_DELETE_COUNT: LazyLock> = LazyLock::new &["col"], ) }); +/* + * Anchor Info + */ +pub static STORE_BEACON_ANCHOR_SLOT: LazyLock> = LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_anchor_slot", + "Current anchor info anchor_slot value", + ) +}); +pub static STORE_BEACON_OLDEST_BLOCK_SLOT: LazyLock> = LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_oldest_block_slot", + "Current anchor info oldest_block_slot value", + ) +}); +pub static STORE_BEACON_STATE_LOWER_LIMIT: LazyLock> = LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_state_lower_limit", + "Current anchor info state_lower_limit value", + ) +}); /* * Beacon State */ @@ -130,6 +151,24 @@ pub static BEACON_STATE_WRITE_BYTES: LazyLock> = LazyLock::ne "Total number of beacon state bytes written to the DB", ) }); +pub static BEACON_HDIFF_READ_TIMES: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "store_hdiff_read_seconds", + "Time required to read the hierarchical diff bytes from the database", + ) +}); +pub static BEACON_HDIFF_DECODE_TIMES: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "store_hdiff_decode_seconds", + "Time required to decode hierarchical diff bytes", + ) +}); +pub static BEACON_HDIFF_BUFFER_CLONE_TIMES: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "store_hdiff_buffer_clone_seconds", + "Time required to clone hierarchical diff buffer bytes", + ) +}); /* * Beacon Block */ @@ -145,12 +184,181 @@ pub static BEACON_BLOCK_CACHE_HIT_COUNT: LazyLock> = LazyLock "Number of hits to the store's block cache", ) }); + +/* + * Caches + */ pub static BEACON_BLOBS_CACHE_HIT_COUNT: LazyLock> = LazyLock::new(|| { try_create_int_counter( "store_beacon_blobs_cache_hit_total", "Number of hits to the store's blob cache", ) }); +pub static STORE_BEACON_BLOCK_CACHE_SIZE: LazyLock> = LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_block_cache_size", + "Current count of items in beacon store block cache", + ) +}); +pub static STORE_BEACON_BLOB_CACHE_SIZE: LazyLock> = LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_blob_cache_size", + "Current count of items in beacon store blob cache", + ) +}); +pub static STORE_BEACON_STATE_CACHE_SIZE: LazyLock> = LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_state_cache_size", + "Current count of items in beacon store state cache", + ) +}); +pub static STORE_BEACON_HISTORIC_STATE_CACHE_SIZE: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_historic_state_cache_size", + "Current count of states in the historic state cache", + ) + }); +pub static STORE_BEACON_HDIFF_BUFFER_CACHE_SIZE: LazyLock> = LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_hdiff_buffer_cache_size", + "Current count of hdiff buffers in the historic state cache", + ) +}); +pub static STORE_BEACON_HDIFF_BUFFER_CACHE_BYTE_SIZE: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_hdiff_buffer_cache_byte_size", + "Memory consumed by hdiff buffers in the historic state cache", + ) + }); +pub static STORE_BEACON_STATE_FREEZER_COMPRESS_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "store_beacon_state_compress_seconds", + "Time taken to compress a state snapshot for the freezer DB", + ) + }); +pub static STORE_BEACON_STATE_FREEZER_DECOMPRESS_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "store_beacon_state_decompress_seconds", + "Time taken to decompress a state snapshot for the freezer DB", + ) + }); +pub static STORE_BEACON_HDIFF_BUFFER_APPLY_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "store_beacon_hdiff_buffer_apply_seconds", + "Time taken to apply hdiff buffer to a state buffer", + ) + }); +pub static STORE_BEACON_HDIFF_BUFFER_COMPUTE_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "store_beacon_hdiff_buffer_compute_seconds", + "Time taken to compute hdiff buffer to a state buffer", + ) + }); +pub static STORE_BEACON_HDIFF_BUFFER_LOAD_TIME: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "store_beacon_hdiff_buffer_load_seconds", + "Time taken to load an hdiff buffer", + ) +}); +pub static STORE_BEACON_HDIFF_BUFFER_LOAD_FOR_STORE_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "store_beacon_hdiff_buffer_load_for_store_seconds", + "Time taken to load an hdiff buffer to store another hdiff", + ) + }); +pub static STORE_BEACON_HISTORIC_STATE_CACHE_HIT: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "store_beacon_historic_state_cache_hit_total", + "Total count of historic state cache hits for full states", + ) + }); +pub static STORE_BEACON_HISTORIC_STATE_CACHE_MISS: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "store_beacon_historic_state_cache_miss_total", + "Total count of historic state cache misses for full states", + ) + }); +pub static STORE_BEACON_HDIFF_BUFFER_CACHE_HIT: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "store_beacon_hdiff_buffer_cache_hit_total", + "Total count of hdiff buffer cache hits", + ) + }); +pub static STORE_BEACON_HDIFF_BUFFER_CACHE_MISS: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "store_beacon_hdiff_buffer_cache_miss_total", + "Total count of hdiff buffer cache miss", + ) + }); +pub static STORE_BEACON_HDIFF_BUFFER_INTO_STATE_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "store_beacon_hdiff_buffer_into_state_seconds", + "Time taken to recreate a BeaconState from an hdiff buffer", + ) + }); +pub static STORE_BEACON_HDIFF_BUFFER_FROM_STATE_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "store_beacon_hdiff_buffer_from_state_seconds", + "Time taken to create an hdiff buffer from a BeaconState", + ) + }); +pub static STORE_BEACON_REPLAYED_BLOCKS: LazyLock> = LazyLock::new(|| { + try_create_int_counter( + "store_beacon_replayed_blocks_total", + "Total count of replayed blocks", + ) +}); +pub static STORE_BEACON_LOAD_COLD_BLOCKS_TIME: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "store_beacon_load_cold_blocks_time", + "Time spent loading blocks to replay for historic states", + ) +}); +pub static STORE_BEACON_LOAD_HOT_BLOCKS_TIME: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "store_beacon_load_hot_blocks_time", + "Time spent loading blocks to replay for hot states", + ) +}); +pub static STORE_BEACON_REPLAY_COLD_BLOCKS_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "store_beacon_replay_cold_blocks_time", + "Time spent replaying blocks for historic states", + ) + }); +pub static STORE_BEACON_COLD_BUILD_BEACON_CACHES_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "store_beacon_cold_build_beacon_caches_time", + "Time spent building caches on historic states", + ) + }); +pub static STORE_BEACON_REPLAY_HOT_BLOCKS_TIME: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "store_beacon_replay_hot_blocks_time", + "Time spent replaying blocks for hot states", + ) +}); +pub static STORE_BEACON_RECONSTRUCTION_TIME: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "store_beacon_reconstruction_time_seconds", + "Time taken to run a reconstruct historic states batch", + ) +}); pub static BEACON_DATA_COLUMNS_CACHE_HIT_COUNT: LazyLock> = LazyLock::new(|| { try_create_int_counter( diff --git a/beacon_node/store/src/partial_beacon_state.rs b/beacon_node/store/src/partial_beacon_state.rs index ea71ace53e..22eecdcc60 100644 --- a/beacon_node/store/src/partial_beacon_state.rs +++ b/beacon_node/store/src/partial_beacon_state.rs @@ -1,18 +1,20 @@ use crate::chunked_vector::{ - load_variable_list_from_db, load_vector_from_db, BlockRoots, HistoricalRoots, - HistoricalSummaries, RandaoMixes, StateRoots, + load_variable_list_from_db, load_vector_from_db, BlockRootsChunked, HistoricalRoots, + HistoricalSummaries, RandaoMixes, StateRootsChunked, }; -use crate::{get_key_for_col, DBColumn, Error, KeyValueStore, KeyValueStoreOp}; -use ssz::{Decode, DecodeError, Encode}; +use crate::{Error, KeyValueStore}; +use ssz::{Decode, DecodeError}; use ssz_derive::{Decode, Encode}; use std::sync::Arc; use types::historical_summary::HistoricalSummary; use types::superstruct; use types::*; -/// Lightweight variant of the `BeaconState` that is stored in the database. +/// DEPRECATED Lightweight variant of the `BeaconState` that is stored in the database. /// /// Utilises lazy-loading from separate storage for its vector fields. +/// +/// This can be deleted once schema versions prior to V22 are no longer supported. #[superstruct( variants(Base, Altair, Bellatrix, Capella, Deneb, Electra), variant_attributes(derive(Debug, PartialEq, Clone, Encode, Decode)) @@ -142,163 +144,7 @@ where pub pending_consolidations: List, } -/// Implement the conversion function from BeaconState -> PartialBeaconState. -macro_rules! impl_from_state_forgetful { - ($s:ident, $outer:ident, $variant_name:ident, $struct_name:ident, [$($extra_fields:ident),*], [$($extra_fields_opt:ident),*]) => { - PartialBeaconState::$variant_name($struct_name { - // Versioning - genesis_time: $s.genesis_time, - genesis_validators_root: $s.genesis_validators_root, - slot: $s.slot, - fork: $s.fork, - - // History - latest_block_header: $s.latest_block_header.clone(), - block_roots: None, - state_roots: None, - historical_roots: None, - - // Eth1 - eth1_data: $s.eth1_data.clone(), - eth1_data_votes: $s.eth1_data_votes.clone(), - eth1_deposit_index: $s.eth1_deposit_index, - - // Validator registry - validators: $s.validators.clone(), - balances: $s.balances.clone(), - - // Shuffling - latest_randao_value: *$outer - .get_randao_mix($outer.current_epoch()) - .expect("randao at current epoch is OK"), - randao_mixes: None, - - // Slashings - slashings: $s.slashings.clone(), - - // Finality - justification_bits: $s.justification_bits.clone(), - previous_justified_checkpoint: $s.previous_justified_checkpoint, - current_justified_checkpoint: $s.current_justified_checkpoint, - finalized_checkpoint: $s.finalized_checkpoint, - - // Variant-specific fields - $( - $extra_fields: $s.$extra_fields.clone() - ),*, - - // Variant-specific optional - $( - $extra_fields_opt: None - ),* - }) - } -} - impl PartialBeaconState { - /// Convert a `BeaconState` to a `PartialBeaconState`, while dropping the optional fields. - pub fn from_state_forgetful(outer: &BeaconState) -> Self { - match outer { - BeaconState::Base(s) => impl_from_state_forgetful!( - s, - outer, - Base, - PartialBeaconStateBase, - [previous_epoch_attestations, current_epoch_attestations], - [] - ), - BeaconState::Altair(s) => impl_from_state_forgetful!( - s, - outer, - Altair, - PartialBeaconStateAltair, - [ - previous_epoch_participation, - current_epoch_participation, - current_sync_committee, - next_sync_committee, - inactivity_scores - ], - [] - ), - BeaconState::Bellatrix(s) => impl_from_state_forgetful!( - s, - outer, - Bellatrix, - PartialBeaconStateBellatrix, - [ - previous_epoch_participation, - current_epoch_participation, - current_sync_committee, - next_sync_committee, - inactivity_scores, - latest_execution_payload_header - ], - [] - ), - BeaconState::Capella(s) => impl_from_state_forgetful!( - s, - outer, - Capella, - PartialBeaconStateCapella, - [ - previous_epoch_participation, - current_epoch_participation, - current_sync_committee, - next_sync_committee, - inactivity_scores, - latest_execution_payload_header, - next_withdrawal_index, - next_withdrawal_validator_index - ], - [historical_summaries] - ), - BeaconState::Deneb(s) => impl_from_state_forgetful!( - s, - outer, - Deneb, - PartialBeaconStateDeneb, - [ - previous_epoch_participation, - current_epoch_participation, - current_sync_committee, - next_sync_committee, - inactivity_scores, - latest_execution_payload_header, - next_withdrawal_index, - next_withdrawal_validator_index - ], - [historical_summaries] - ), - BeaconState::Electra(s) => impl_from_state_forgetful!( - s, - outer, - Electra, - PartialBeaconStateElectra, - [ - previous_epoch_participation, - current_epoch_participation, - current_sync_committee, - next_sync_committee, - inactivity_scores, - latest_execution_payload_header, - next_withdrawal_index, - next_withdrawal_validator_index, - deposit_requests_start_index, - deposit_balance_to_consume, - exit_balance_to_consume, - earliest_exit_epoch, - consolidation_balance_to_consume, - earliest_consolidation_epoch, - pending_deposits, - pending_partial_withdrawals, - pending_consolidations - ], - [historical_summaries] - ), - } - } - /// SSZ decode. pub fn from_ssz_bytes(bytes: &[u8], spec: &ChainSpec) -> Result { // Slot is after genesis_time (u64) and genesis_validators_root (Hash256). @@ -321,19 +167,13 @@ impl PartialBeaconState { )) } - /// Prepare the partial state for storage in the KV database. - pub fn as_kv_store_op(&self, state_root: Hash256) -> KeyValueStoreOp { - let db_key = get_key_for_col(DBColumn::BeaconState.into(), state_root.as_slice()); - KeyValueStoreOp::PutKeyValue(db_key, self.as_ssz_bytes()) - } - pub fn load_block_roots>( &mut self, store: &S, spec: &ChainSpec, ) -> Result<(), Error> { if self.block_roots().is_none() { - *self.block_roots_mut() = Some(load_vector_from_db::( + *self.block_roots_mut() = Some(load_vector_from_db::( store, self.slot(), spec, @@ -348,7 +188,7 @@ impl PartialBeaconState { spec: &ChainSpec, ) -> Result<(), Error> { if self.state_roots().is_none() { - *self.state_roots_mut() = Some(load_vector_from_db::( + *self.state_roots_mut() = Some(load_vector_from_db::( store, self.slot(), spec, diff --git a/beacon_node/store/src/reconstruct.rs b/beacon_node/store/src/reconstruct.rs index 8ef4886565..9bec83a35c 100644 --- a/beacon_node/store/src/reconstruct.rs +++ b/beacon_node/store/src/reconstruct.rs @@ -1,14 +1,16 @@ //! Implementation of historic state reconstruction (given complete block history). use crate::hot_cold_store::{HotColdDB, HotColdDBError}; +use crate::metadata::ANCHOR_FOR_ARCHIVE_NODE; +use crate::metrics; use crate::{Error, ItemStore}; use itertools::{process_results, Itertools}; -use slog::info; +use slog::{debug, info}; use state_processing::{ per_block_processing, per_slot_processing, BlockSignatureStrategy, ConsensusContext, VerifyBlockRoot, }; use std::sync::Arc; -use types::{EthSpec, Hash256}; +use types::EthSpec; impl HotColdDB where @@ -16,11 +18,16 @@ where Hot: ItemStore, Cold: ItemStore, { - pub fn reconstruct_historic_states(self: &Arc) -> Result<(), Error> { - let Some(mut anchor) = self.get_anchor_info() else { - // Nothing to do, history is complete. + pub fn reconstruct_historic_states( + self: &Arc, + num_blocks: Option, + ) -> Result<(), Error> { + let mut anchor = self.get_anchor_info(); + + // Nothing to do, history is complete. + if anchor.all_historic_states_stored() { return Ok(()); - }; + } // Check that all historic blocks are known. if anchor.oldest_block_slot != 0 { @@ -29,37 +36,30 @@ where }); } - info!( + debug!( self.log, - "Beginning historic state reconstruction"; + "Starting state reconstruction batch"; "start_slot" => anchor.state_lower_limit, ); - let slots_per_restore_point = self.config.slots_per_restore_point; + let _t = metrics::start_timer(&metrics::STORE_BEACON_RECONSTRUCTION_TIME); // Iterate blocks from the state lower limit to the upper limit. - let lower_limit_slot = anchor.state_lower_limit; let split = self.get_split_info(); - let upper_limit_state = self.get_restore_point( - anchor.state_upper_limit.as_u64() / slots_per_restore_point, - &split, - )?; - let upper_limit_slot = upper_limit_state.slot(); + let lower_limit_slot = anchor.state_lower_limit; + let upper_limit_slot = std::cmp::min(split.slot, anchor.state_upper_limit); - // Use a dummy root, as we never read the block for the upper limit state. - let upper_limit_block_root = Hash256::repeat_byte(0xff); - - let block_root_iter = self.forwards_block_roots_iterator( - lower_limit_slot, - upper_limit_state, - upper_limit_block_root, - &self.spec, - )?; + // If `num_blocks` is not specified iterate all blocks. Add 1 so that we end on an epoch + // boundary when `num_blocks` is a multiple of an epoch boundary. We want to be *inclusive* + // of the state at slot `lower_limit_slot + num_blocks`. + let block_root_iter = self + .forwards_block_roots_iterator_until(lower_limit_slot, upper_limit_slot - 1, || { + Err(Error::StateShouldNotBeRequired(upper_limit_slot - 1)) + })? + .take(num_blocks.map_or(usize::MAX, |n| n + 1)); // The state to be advanced. - let mut state = self - .load_cold_state_by_slot(lower_limit_slot)? - .ok_or(HotColdDBError::MissingLowerLimitState(lower_limit_slot))?; + let mut state = self.load_cold_state_by_slot(lower_limit_slot)?; state.build_caches(&self.spec)?; @@ -110,8 +110,19 @@ where // Stage state for storage in freezer DB. self.store_cold_state(&state_root, &state, &mut io_batch)?; - // If the slot lies on an epoch boundary, commit the batch and update the anchor. - if slot % slots_per_restore_point == 0 || slot + 1 == upper_limit_slot { + let batch_complete = + num_blocks.map_or(false, |n_blocks| slot == lower_limit_slot + n_blocks as u64); + let reconstruction_complete = slot + 1 == upper_limit_slot; + + // Commit the I/O batch if: + // + // - The diff/snapshot for this slot is required for future slots, or + // - The reconstruction batch is complete (we are about to return), or + // - Reconstruction is complete. + if self.hierarchy.should_commit_immediately(slot)? + || batch_complete + || reconstruction_complete + { info!( self.log, "State reconstruction in progress"; @@ -122,9 +133,9 @@ where self.cold_db.do_atomically(std::mem::take(&mut io_batch))?; // Update anchor. - let old_anchor = Some(anchor.clone()); + let old_anchor = anchor.clone(); - if slot + 1 == upper_limit_slot { + if reconstruction_complete { // The two limits have met in the middle! We're done! // Perform one last integrity check on the state reached. let computed_state_root = state.update_tree_hash_cache()?; @@ -136,23 +147,36 @@ where }); } - self.compare_and_set_anchor_info_with_write(old_anchor, None)?; + self.compare_and_set_anchor_info_with_write( + old_anchor, + ANCHOR_FOR_ARCHIVE_NODE, + )?; return Ok(()); } else { // The lower limit has been raised, store it. anchor.state_lower_limit = slot; - self.compare_and_set_anchor_info_with_write( - old_anchor, - Some(anchor.clone()), - )?; + self.compare_and_set_anchor_info_with_write(old_anchor, anchor.clone())?; + } + + // If this is the end of the batch, return Ok. The caller will run another + // batch when there is idle capacity. + if batch_complete { + debug!( + self.log, + "Finished state reconstruction batch"; + "start_slot" => lower_limit_slot, + "end_slot" => slot, + ); + return Ok(()); } } } - // Should always reach the `upper_limit_slot` and return early above. - Err(Error::StateReconstructionDidNotComplete) + // Should always reach the `upper_limit_slot` or the end of the batch and return early + // above. + Err(Error::StateReconstructionLogicError) })??; // Check that the split point wasn't mutated during the state reconstruction process. diff --git a/beacon_node/tests/test.rs b/beacon_node/tests/test.rs index 4be6536df9..0738b12ec0 100644 --- a/beacon_node/tests/test.rs +++ b/beacon_node/tests/test.rs @@ -26,7 +26,6 @@ fn build_node(env: &mut Environment) -> LocalBeaconNode { fn http_server_genesis_state() { let mut env = env_builder() .test_logger() - //.async_logger("debug", None) .expect("should build env logger") .multi_threaded_tokio_runtime() .expect("should start tokio runtime") diff --git a/book/src/advanced_database.md b/book/src/advanced_database.md index 345fff6981..d8d6ea61a1 100644 --- a/book/src/advanced_database.md +++ b/book/src/advanced_database.md @@ -7,59 +7,70 @@ the _freezer_ or _cold DB_, and the portion storing recent states as the _hot DB In both the hot and cold DBs, full `BeaconState` data structures are only stored periodically, and intermediate states are reconstructed by quickly replaying blocks on top of the nearest state. For example, to fetch a state at slot 7 the database might fetch a full state from slot 0, and replay -blocks from slots 1-7 while omitting redundant signature checks and Merkle root calculations. The -full states upon which blocks are replayed are referred to as _restore points_ in the case of the +blocks from slots 1-7 while omitting redundant signature checks and Merkle root calculations. In +the freezer DB, Lighthouse also uses hierarchical state diffs to jump larger distances (described in +more detail below). + +The full states upon which blocks are replayed are referred to as _snapshots_ in the case of the freezer DB, and _epoch boundary states_ in the case of the hot DB. The frequency at which the hot database stores full `BeaconState`s is fixed to one-state-per-epoch in order to keep loads of recent states performant. For the freezer DB, the frequency is -configurable via the `--slots-per-restore-point` CLI flag, which is the topic of the next section. +configurable via the `--hierarchy-exponents` CLI flag, which is the topic of the next section. -## Freezer DB Space-time Trade-offs +## Hierarchical State Diffs -Frequent restore points use more disk space but accelerate the loading of historical states. -Conversely, infrequent restore points use much less space, but cause the loading of historical -states to slow down dramatically. A lower _slots per restore point_ value (SPRP) corresponds to more -frequent restore points, while a higher SPRP corresponds to less frequent. The table below shows -some example values. +Since v6.0.0, Lighthouse's freezer database uses _hierarchical state diffs_ or _hdiffs_ for short. +These diffs allow Lighthouse to reconstruct any historic state relatively quickly from a very +compact database. The essence of the hdiffs is that full states (snapshots) are stored only around +once per year. To reconstruct a particular state, Lighthouse fetches the last snapshot prior to that +state, and then applies several _layers_ of diffs. For example, to access a state from November +2022, we might fetch the yearly snapshot for the start of 2022, then apply a monthly diff to jump to +November, and then more granular diffs to reach the particular week, day and epoch desired. +Usually for the last stretch between the start of the epoch and the state requested, some blocks +will be _replayed_. -| Use Case | SPRP | Yearly Disk Usage*| Load Historical State | -|----------------------------|------|-------------------|-----------------------| -| Research | 32 | more than 10 TB | 155 ms | -| Enthusiast (prev. default) | 2048 | hundreds of GB | 10.2 s | -| Validator only (default) | 8192 | tens of GB | 41 s | +The following diagram shows part of the layout of diffs in the default configuration. There is a +full snapshot stored every `2^21` slots. In the next layer there are diffs every `2^18` slots which +approximately correspond to "monthly" diffs. Following this are more granular diffs every `2^16` +slots, every `2^13` slots, and so on down to the per-epoch diffs every `2^5` slots. -*Last update: Dec 2023. +![Tree diagram displaying hierarchical state diffs](./imgs/db-freezer-layout.png) -As we can see, it's a high-stakes trade-off! The relationships to disk usage and historical state -load time are both linear – doubling SPRP halves disk usage and doubles load time. The minimum SPRP -is 32, and the maximum is 8192. +The number of layers and frequency of diffs is configurable via the `--hierarchy-exponents` flag, +which has a default value of `5,9,11,13,16,18,21`. The hierarchy exponents must be provided in order +from smallest to largest. The smallest exponent determines the frequency of the "closest" layer +of diffs, with the default value of 5 corresponding to a diff every `2^5` slots (every epoch). +The largest number determines the frequency of full snapshots, with the default value of 21 +corresponding to a snapshot every `2^21` slots (every 291 days). -The default value is 8192 for databases synced from scratch using Lighthouse v2.2.0 or later, or -2048 for prior versions. Please see the section on [Defaults](#defaults) below. +The number of possible `--hierarchy-exponents` configurations is extremely large and our exploration +of possible configurations is still in its relative infancy. If you experiment with non-default +values of `--hierarchy-exponents` we would be interested to hear how it goes. A few rules of thumb +that we have observed are: -The values shown in the table are approximate, calculated using a simple heuristic: each -`BeaconState` consumes around 145MB of disk space, and each block replayed takes around 5ms. The -**Yearly Disk Usage** column shows the approximate size of the freezer DB _alone_ (hot DB not included), calculated proportionally using the total freezer database disk usage. -The **Load Historical State** time is the worst-case load time for a state in the last slot -before a restore point. +- **More frequent snapshots = more space**. This is quite intuitive - if you store full states more + often then these will take up more space than diffs. However what you lose in space efficiency you + may gain in speed. It would be possible to achieve a configuration similar to Lighthouse's + previous `--slots-per-restore-point 32` using `--hierarchy-exponents 5`, although this would use + _a lot_ of space. It's even possible to push beyond that with `--hierarchy-exponents 0` which + would store a full state every single slot (NOT RECOMMENDED). +- **Less diff layers are not necessarily faster**. One might expect that the fewer diff layers there + are, the less work Lighthouse would have to do to reconstruct any particular state. In practise + this seems to be offset by the increased size of diffs in each layer making the diffs take longer + to apply. We observed no significant performance benefit from `--hierarchy-exponents 5,7,11`, and + a substantial increase in space consumed. -To run a full archival node with fast access to beacon states and a SPRP of 32, the disk usage will be more than 10 TB per year, which is impractical for many users. As such, users may consider running the [tree-states](https://github.com/sigp/lighthouse/releases/tag/v5.0.111-exp) release, which only uses less than 200 GB for a full archival node. The caveat is that it is currently experimental and in alpha release (as of Dec 2023), thus not recommended for running mainnet validators. Nevertheless, it is suitable to be used for analysis purposes, and if you encounter any issues in tree-states, we do appreciate any feedback. We plan to have a stable release of tree-states in 1H 2024. - -### Defaults - -As of Lighthouse v2.2.0, the default slots-per-restore-point value has been increased from 2048 -to 8192 in order to conserve disk space. Existing nodes will continue to use SPRP=2048 unless -re-synced. Note that it is currently not possible to change the SPRP without re-syncing, although -fast re-syncing may be achieved with [Checkpoint Sync](./checkpoint-sync.md). +If in doubt, we recommend running with the default configuration! It takes a long time to +reconstruct states in any given configuration, so it might be some time before the optimal +configuration is determined. ### CLI Configuration -To configure your Lighthouse node's database with a non-default SPRP, run your Beacon Node with -the `--slots-per-restore-point` flag: +To configure your Lighthouse node's database, run your beacon node with the `--hierarchy-exponents` flag: ```bash -lighthouse beacon_node --slots-per-restore-point 32 +lighthouse beacon_node --hierarchy-exponents "5,7,11" ``` ### Historic state cache @@ -72,17 +83,20 @@ The historical state cache size can be specified with the flag `--historic-state lighthouse beacon_node --historic-state-cache-size 4 ``` -> Note: This feature will cause high memory usage. +> Note: Use a large cache limit can lead to high memory usage. ## Glossary -* _Freezer DB_: part of the database storing finalized states. States are stored in a sparser +- _Freezer DB_: part of the database storing finalized states. States are stored in a sparser format, and usually less frequently than in the hot DB. -* _Cold DB_: see _Freezer DB_. -* _Hot DB_: part of the database storing recent states, all blocks, and other runtime data. Full +- _Cold DB_: see _Freezer DB_. +- _HDiff_: hierarchical state diff. +- _Hierarchy Exponents_: configuration for hierarchical state diffs, which determines the density + of stored diffs and snapshots in the freezer DB. +- _Hot DB_: part of the database storing recent states, all blocks, and other runtime data. Full states are stored every epoch. -* _Restore Point_: a full `BeaconState` stored periodically in the freezer DB. -* _Slots Per Restore Point (SPRP)_: the number of slots between restore points in the freezer DB. -* _Split Slot_: the slot at which states are divided between the hot and the cold DBs. All states +- _Snapshot_: a full `BeaconState` stored periodically in the freezer DB. Approximately yearly by + default (every ~291 days). +- _Split Slot_: the slot at which states are divided between the hot and the cold DBs. All states from slots less than the split slot are in the freezer, while all states with slots greater than or equal to the split slot are in the hot DB. diff --git a/book/src/help_bn.md b/book/src/help_bn.md index fa4a473ec0..a4ab44748c 100644 --- a/book/src/help_bn.md +++ b/book/src/help_bn.md @@ -166,9 +166,23 @@ Options: --graffiti Specify your custom graffiti to be included in blocks. Defaults to the current version and commit, truncated to fit in 32 bytes. + --hdiff-buffer-cache-size + Number of hierarchical diff (hdiff) buffers to cache in memory. Each + buffer is around the size of a BeaconState so you should be cautious + about setting this value too high. This flag is irrelevant for most + nodes, which run with state pruning enabled. [default: 16] + --hierarchy-exponents + Specifies the frequency for storing full state snapshots and + hierarchical diffs in the freezer DB. Accepts a comma-separated list + of ascending exponents. Each exponent defines an interval for storing + diffs to the layer above. The last exponent defines the interval for + full snapshots. For example, a config of '4,8,12' would store a full + snapshot every 4096 (2^12) slots, first-level diffs every 256 (2^8) + slots, and second-level diffs every 16 (2^4) slots. Cannot be changed + after initialization. [default: 5,9,11,13,16,18,21] --historic-state-cache-size - Specifies how many states from the freezer database should cache in - memory [default: 1] + Specifies how many states from the freezer database should be cached + in memory [default: 1] --http-address
Set the listen address for the RESTful HTTP API server. --http-allow-origin @@ -364,9 +378,7 @@ Options: --slasher-validator-chunk-size Number of validators per chunk stored on disk. --slots-per-restore-point - Specifies how often a freezer DB restore point should be stored. - Cannot be changed after initialization. [default: 8192 (mainnet) or 64 - (minimal)] + DEPRECATED. This flag has no effect. --state-cache-size Specifies the size of the state cache [default: 128] --suggested-fee-recipient @@ -468,9 +480,6 @@ Flags: --disable-upnp Disables UPnP support. Setting this will prevent Lighthouse from attempting to automatically establish external port mappings. - --dummy-eth1 - If present, uses an eth1 backend that generates static dummy - data.Identical to the method used at the 2019 Canada interop. -e, --enr-match Sets the local ENR IP address and port to match those set for lighthouse. Specifically, the IP address will be the value of @@ -478,10 +487,6 @@ Flags: --enable-private-discovery Lighthouse by default does not discover private IP addresses. Set this flag to enable connection attempts to local addresses. - --eth1 - If present the node will connect to an eth1 node. This is required for - block production, you must use this flag if you wish to serve a - validator. --eth1-purge-cache Purges the eth1 block and deposit caches --genesis-backfill @@ -549,8 +554,7 @@ Flags: --staking Standard option for a staking beacon node. This will enable the HTTP server on localhost:5052 and import deposit logs from the execution - node. This is equivalent to `--http` on merge-ready networks, or - `--http --eth1` pre-merge + node. --stdin-inputs If present, read all user inputs from stdin instead of tty. --subscribe-all-subnets diff --git a/book/src/imgs/db-freezer-layout.png b/book/src/imgs/db-freezer-layout.png new file mode 100644 index 0000000000..1870eb4267 Binary files /dev/null and b/book/src/imgs/db-freezer-layout.png differ diff --git a/boot_node/src/server.rs b/boot_node/src/server.rs index 00738462e0..96032dddcc 100644 --- a/boot_node/src/server.rs +++ b/boot_node/src/server.rs @@ -136,8 +136,8 @@ pub async fn run( "active_sessions" => metrics.active_sessions, "requests/s" => format_args!("{:.2}", metrics.unsolicited_requests_per_second), "ipv4_nodes" => ipv4_only_reachable, - "ipv6_nodes" => ipv6_only_reachable, - "ipv6_and_ipv4_nodes" => ipv4_ipv6_reachable, + "ipv6_only_nodes" => ipv6_only_reachable, + "dual_stack_nodes" => ipv4_ipv6_reachable, "unreachable_nodes" => unreachable_nodes, ); diff --git a/common/eth2/src/lighthouse.rs b/common/eth2/src/lighthouse.rs index e978d92245..309d8228aa 100644 --- a/common/eth2/src/lighthouse.rs +++ b/common/eth2/src/lighthouse.rs @@ -361,7 +361,7 @@ pub struct DatabaseInfo { pub schema_version: u64, pub config: StoreConfig, pub split: Split, - pub anchor: Option, + pub anchor: AnchorInfo, pub blob_info: BlobInfo, } diff --git a/common/eth2_config/src/lib.rs b/common/eth2_config/src/lib.rs index cd5d7a8bd4..f13e90490e 100644 --- a/common/eth2_config/src/lib.rs +++ b/common/eth2_config/src/lib.rs @@ -32,6 +32,7 @@ const HOLESKY_GENESIS_STATE_SOURCE: GenesisStateSource = GenesisStateSource::Url ], checksum: "0xd750639607c337bbb192b15c27f447732267bf72d1650180a0e44c2d93a80741", genesis_validators_root: "0x9143aa7c615a7f7115e2b6aac319c03529df8242ae705fba9df39b79c59fa8b1", + genesis_state_root: "0x0ea3f6f9515823b59c863454675fefcd1d8b4f2dbe454db166206a41fda060a0", }; const CHIADO_GENESIS_STATE_SOURCE: GenesisStateSource = GenesisStateSource::Url { @@ -39,6 +40,7 @@ const CHIADO_GENESIS_STATE_SOURCE: GenesisStateSource = GenesisStateSource::Url urls: &[], checksum: "0xd4a039454c7429f1dfaa7e11e397ef3d0f50d2d5e4c0e4dc04919d153aa13af1", genesis_validators_root: "0x9d642dac73058fbf39c0ae41ab1e34e4d889043cb199851ded7095bc99eb4c1e", + genesis_state_root: "0xa48419160f8f146ecaa53d12a5d6e1e6af414a328afdc56b60d5002bb472a077", }; /// The core configuration of a Lighthouse beacon node. @@ -100,6 +102,10 @@ pub enum GenesisStateSource { /// /// The format should be 0x-prefixed ASCII bytes. genesis_validators_root: &'static str, + /// The genesis state root. + /// + /// The format should be 0x-prefixed ASCII bytes. + genesis_state_root: &'static str, }, } diff --git a/common/eth2_network_config/src/lib.rs b/common/eth2_network_config/src/lib.rs index 3d0ffc5b9e..5d5a50574b 100644 --- a/common/eth2_network_config/src/lib.rs +++ b/common/eth2_network_config/src/lib.rs @@ -154,6 +154,32 @@ impl Eth2NetworkConfig { } } + /// Get the genesis state root for this network. + /// + /// `Ok(None)` will be returned if the genesis state is not known. No network requests will be + /// made by this function. This function will not error unless the genesis state configuration + /// is corrupted. + pub fn genesis_state_root(&self) -> Result, String> { + match self.genesis_state_source { + GenesisStateSource::Unknown => Ok(None), + GenesisStateSource::Url { + genesis_state_root, .. + } => Hash256::from_str(genesis_state_root) + .map(Option::Some) + .map_err(|e| format!("Unable to parse genesis state root: {:?}", e)), + GenesisStateSource::IncludedBytes => { + self.get_genesis_state_from_bytes::() + .and_then(|mut state| { + Ok(Some( + state + .canonical_root() + .map_err(|e| format!("Hashing error: {e:?}"))?, + )) + }) + } + } + } + /// Construct a consolidated `ChainSpec` from the YAML config. pub fn chain_spec(&self) -> Result { ChainSpec::from_config::(&self.config).ok_or_else(|| { @@ -185,6 +211,7 @@ impl Eth2NetworkConfig { urls: built_in_urls, checksum, genesis_validators_root, + .. } => { let checksum = Hash256::from_str(checksum).map_err(|e| { format!("Unable to parse genesis state bytes checksum: {:?}", e) @@ -507,6 +534,7 @@ mod tests { urls, checksum, genesis_validators_root, + .. } = net.genesis_state_source { Hash256::from_str(checksum).expect("the checksum must be a valid 32-byte value"); diff --git a/common/metrics/src/lib.rs b/common/metrics/src/lib.rs index 1f2ac71aea..22513af8bc 100644 --- a/common/metrics/src/lib.rs +++ b/common/metrics/src/lib.rs @@ -283,6 +283,14 @@ pub fn stop_timer(timer: Option) { } } +/// Stops a timer created with `start_timer(..)`. +/// +/// Return the duration that the timer was running for, or 0.0 if it was `None` due to incorrect +/// initialisation. +pub fn stop_timer_with_duration(timer: Option) -> Duration { + Duration::from_secs_f64(timer.map_or(0.0, |t| t.stop_and_record())) +} + pub fn observe_vec(vec: &Result, name: &[&str], value: f64) { if let Some(h) = get_histogram(vec, name) { h.observe(value) diff --git a/common/system_health/src/lib.rs b/common/system_health/src/lib.rs index feec08af84..3431189842 100644 --- a/common/system_health/src/lib.rs +++ b/common/system_health/src/lib.rs @@ -198,23 +198,61 @@ pub fn observe_system_health_vc( } } +/// The current state of Lighthouse NAT/connectivity. +#[derive(Serialize, Deserialize)] +pub struct NatState { + /// Contactable on discovery ipv4. + discv5_ipv4: bool, + /// Contactable on discovery ipv6. + discv5_ipv6: bool, + /// Contactable on libp2p ipv4. + libp2p_ipv4: bool, + /// Contactable on libp2p ipv6. + libp2p_ipv6: bool, +} + +impl NatState { + pub fn is_anything_open(&self) -> bool { + self.discv5_ipv4 || self.discv5_ipv6 || self.libp2p_ipv4 || self.libp2p_ipv6 + } +} + /// Observes if NAT traversal is possible. -pub fn observe_nat() -> bool { - let discv5_nat = lighthouse_network::metrics::get_int_gauge( +pub fn observe_nat() -> NatState { + let discv5_ipv4 = lighthouse_network::metrics::get_int_gauge( &lighthouse_network::metrics::NAT_OPEN, - &["discv5"], + &["discv5_ipv4"], ) .map(|g| g.get() == 1) .unwrap_or_default(); - let libp2p_nat = lighthouse_network::metrics::get_int_gauge( + let discv5_ipv6 = lighthouse_network::metrics::get_int_gauge( + &lighthouse_network::metrics::NAT_OPEN, + &["discv5_ipv6"], + ) + .map(|g| g.get() == 1) + .unwrap_or_default(); + + let libp2p_ipv4 = lighthouse_network::metrics::get_int_gauge( &lighthouse_network::metrics::NAT_OPEN, &["libp2p"], ) .map(|g| g.get() == 1) .unwrap_or_default(); - discv5_nat || libp2p_nat + let libp2p_ipv6 = lighthouse_network::metrics::get_int_gauge( + &lighthouse_network::metrics::NAT_OPEN, + &["libp2p"], + ) + .map(|g| g.get() == 1) + .unwrap_or_default(); + + NatState { + discv5_ipv4, + discv5_ipv6, + libp2p_ipv4, + libp2p_ipv6, + } } /// Observes the Beacon Node system health. @@ -242,7 +280,7 @@ pub fn observe_system_health_bn( .unwrap_or_else(|| (String::from("None"), 0, 0)); // Determine if the NAT is open or not. - let nat_open = observe_nat(); + let nat_open = observe_nat().is_anything_open(); SystemHealthBN { system_health, diff --git a/consensus/state_processing/src/common/update_progressive_balances_cache.rs b/consensus/state_processing/src/common/update_progressive_balances_cache.rs index 101e861683..1fdfe802c4 100644 --- a/consensus/state_processing/src/common/update_progressive_balances_cache.rs +++ b/consensus/state_processing/src/common/update_progressive_balances_cache.rs @@ -1,6 +1,6 @@ /// A collection of all functions that mutates the `ProgressiveBalancesCache`. use crate::metrics::{ - PARTICIPATION_CURR_EPOCH_TARGET_ATTESTING_GWEI_PROGRESSIVE_TOTAL, + self, PARTICIPATION_CURR_EPOCH_TARGET_ATTESTING_GWEI_PROGRESSIVE_TOTAL, PARTICIPATION_PREV_EPOCH_TARGET_ATTESTING_GWEI_PROGRESSIVE_TOTAL, }; use crate::{BlockProcessingError, EpochProcessingError}; @@ -21,6 +21,8 @@ pub fn initialize_progressive_balances_cache( return Ok(()); } + let _timer = metrics::start_timer(&metrics::BUILD_PROGRESSIVE_BALANCES_CACHE_TIME); + // Calculate the total flag balances for previous & current epoch in a single iteration. // This calculates `get_total_balance(unslashed_participating_indices(..))` for each flag in // the current and previous epoch. diff --git a/consensus/state_processing/src/epoch_cache.rs b/consensus/state_processing/src/epoch_cache.rs index 5af5e639fd..dc1d79709e 100644 --- a/consensus/state_processing/src/epoch_cache.rs +++ b/consensus/state_processing/src/epoch_cache.rs @@ -1,6 +1,7 @@ use crate::common::altair::BaseRewardPerIncrement; use crate::common::base::SqrtTotalActiveBalance; use crate::common::{altair, base}; +use crate::metrics; use safe_arith::SafeArith; use types::epoch_cache::{EpochCache, EpochCacheError, EpochCacheKey}; use types::{ @@ -138,6 +139,8 @@ pub fn initialize_epoch_cache( return Ok(()); } + let _timer = metrics::start_timer(&metrics::BUILD_EPOCH_CACHE_TIME); + let current_epoch = state.current_epoch(); let next_epoch = state.next_epoch().map_err(EpochCacheError::BeaconState)?; let decision_block_root = state diff --git a/consensus/state_processing/src/metrics.rs b/consensus/state_processing/src/metrics.rs index b53dee96d9..8772dbd4f8 100644 --- a/consensus/state_processing/src/metrics.rs +++ b/consensus/state_processing/src/metrics.rs @@ -41,6 +41,20 @@ pub static PROCESS_EPOCH_TIME: LazyLock> = LazyLock::new(|| { "Time required for process_epoch", ) }); +pub static BUILD_EPOCH_CACHE_TIME: LazyLock> = LazyLock::new(|| { + try_create_histogram( + "beacon_state_processing_epoch_cache", + "Time required to build the epoch cache", + ) +}); +pub static BUILD_PROGRESSIVE_BALANCES_CACHE_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "beacon_state_processing_progressive_balances_cache", + "Time required to build the progressive balances cache", + ) + }); + /* * Participation Metrics (progressive balances) */ diff --git a/consensus/types/src/beacon_block_body.rs b/consensus/types/src/beacon_block_body.rs index c81e7bcde9..1090b2cc03 100644 --- a/consensus/types/src/beacon_block_body.rs +++ b/consensus/types/src/beacon_block_body.rs @@ -147,7 +147,7 @@ impl<'a, E: EthSpec, Payload: AbstractExecPayload> BeaconBlockBodyRef<'a, E, } } - fn body_merkle_leaves(&self) -> Vec { + pub(crate) fn body_merkle_leaves(&self) -> Vec { let mut leaves = vec![]; match self { Self::Base(body) => { @@ -178,57 +178,71 @@ impl<'a, E: EthSpec, Payload: AbstractExecPayload> BeaconBlockBodyRef<'a, E, leaves } - /// Produces the proof of inclusion for a `KzgCommitment` in `self.blob_kzg_commitments` - /// at `index`. + /// Calculate a KZG commitment merkle proof. + /// + /// Prefer to use `complete_kzg_commitment_merkle_proof` with a reused proof for the + /// `blob_kzg_commitments` field. pub fn kzg_commitment_merkle_proof( &self, index: usize, ) -> Result, Error> { - // We compute the branches by generating 2 merkle trees: - // 1. Merkle tree for the `blob_kzg_commitments` List object - // 2. Merkle tree for the `BeaconBlockBody` container - // We then merge the branches for both the trees all the way up to the root. + let kzg_commitments_proof = self.kzg_commitments_merkle_proof()?; + let proof = self.complete_kzg_commitment_merkle_proof(index, &kzg_commitments_proof)?; + Ok(proof) + } - // Part1 (Branches for the subtree rooted at `blob_kzg_commitments`) - // - // Branches for `blob_kzg_commitments` without length mix-in - let blob_leaves = self - .blob_kzg_commitments()? - .iter() - .map(|commitment| commitment.tree_hash_root()) - .collect::>(); - let depth = E::max_blob_commitments_per_block() - .next_power_of_two() - .ilog2(); - let tree = MerkleTree::create(&blob_leaves, depth as usize); - let (_, mut proof) = tree - .generate_proof(index, depth as usize) - .map_err(Error::MerkleTreeError)?; + /// Produces the proof of inclusion for a `KzgCommitment` in `self.blob_kzg_commitments` + /// at `index` using an existing proof for the `blob_kzg_commitments` field. + pub fn complete_kzg_commitment_merkle_proof( + &self, + index: usize, + kzg_commitments_proof: &[Hash256], + ) -> Result, Error> { + match self { + Self::Base(_) | Self::Altair(_) | Self::Bellatrix(_) | Self::Capella(_) => { + Err(Error::IncorrectStateVariant) + } + Self::Deneb(_) | Self::Electra(_) => { + // We compute the branches by generating 2 merkle trees: + // 1. Merkle tree for the `blob_kzg_commitments` List object + // 2. Merkle tree for the `BeaconBlockBody` container + // We then merge the branches for both the trees all the way up to the root. - // Add the branch corresponding to the length mix-in. - let length = blob_leaves.len(); - let usize_len = std::mem::size_of::(); - let mut length_bytes = [0; BYTES_PER_CHUNK]; - length_bytes - .get_mut(0..usize_len) - .ok_or(Error::MerkleTreeError(MerkleTreeError::PleaseNotifyTheDevs))? - .copy_from_slice(&length.to_le_bytes()); - let length_root = Hash256::from_slice(length_bytes.as_slice()); - proof.push(length_root); + // Part1 (Branches for the subtree rooted at `blob_kzg_commitments`) + // + // Branches for `blob_kzg_commitments` without length mix-in + let blob_leaves = self + .blob_kzg_commitments()? + .iter() + .map(|commitment| commitment.tree_hash_root()) + .collect::>(); + let depth = E::max_blob_commitments_per_block() + .next_power_of_two() + .ilog2(); + let tree = MerkleTree::create(&blob_leaves, depth as usize); + let (_, mut proof) = tree + .generate_proof(index, depth as usize) + .map_err(Error::MerkleTreeError)?; - // Part 2 - // Branches for `BeaconBlockBody` container - let body_leaves = self.body_merkle_leaves(); - let beacon_block_body_depth = body_leaves.len().next_power_of_two().ilog2() as usize; - let tree = MerkleTree::create(&body_leaves, beacon_block_body_depth); - let (_, mut proof_body) = tree - .generate_proof(BLOB_KZG_COMMITMENTS_INDEX, beacon_block_body_depth) - .map_err(Error::MerkleTreeError)?; - // Join the proofs for the subtree and the main tree - proof.append(&mut proof_body); - debug_assert_eq!(proof.len(), E::kzg_proof_inclusion_proof_depth()); + // Add the branch corresponding to the length mix-in. + let length = blob_leaves.len(); + let usize_len = std::mem::size_of::(); + let mut length_bytes = [0; BYTES_PER_CHUNK]; + length_bytes + .get_mut(0..usize_len) + .ok_or(Error::MerkleTreeError(MerkleTreeError::PleaseNotifyTheDevs))? + .copy_from_slice(&length.to_le_bytes()); + let length_root = Hash256::from_slice(length_bytes.as_slice()); + proof.push(length_root); - Ok(proof.into()) + // Part 2 + // Branches for `BeaconBlockBody` container + // Join the proofs for the subtree and the main tree + proof.extend_from_slice(kzg_commitments_proof); + + Ok(FixedVector::new(proof)?) + } + } } /// Produces the proof of inclusion for `self.blob_kzg_commitments`. @@ -241,7 +255,7 @@ impl<'a, E: EthSpec, Payload: AbstractExecPayload> BeaconBlockBodyRef<'a, E, let (_, proof) = tree .generate_proof(BLOB_KZG_COMMITMENTS_INDEX, beacon_block_body_depth) .map_err(Error::MerkleTreeError)?; - Ok(proof.into()) + Ok(FixedVector::new(proof)?) } pub fn block_body_merkle_proof(&self, generalized_index: usize) -> Result, Error> { diff --git a/consensus/types/src/beacon_state.rs b/consensus/types/src/beacon_state.rs index 9ea0ed94c5..a405714368 100644 --- a/consensus/types/src/beacon_state.rs +++ b/consensus/types/src/beacon_state.rs @@ -155,7 +155,6 @@ pub enum Error { current_fork: ForkName, }, TotalActiveBalanceDiffUninitialized, - MissingImmutableValidator(usize), IndexNotSupported(usize), InvalidFlagIndex(usize), MerkleTreeError(merkle_proof::MerkleTreeError), diff --git a/consensus/types/src/blob_sidecar.rs b/consensus/types/src/blob_sidecar.rs index 0f7dbb2673..5a330388cc 100644 --- a/consensus/types/src/blob_sidecar.rs +++ b/consensus/types/src/blob_sidecar.rs @@ -150,6 +150,37 @@ impl BlobSidecar { }) } + pub fn new_with_existing_proof( + index: usize, + blob: Blob, + signed_block: &SignedBeaconBlock, + signed_block_header: SignedBeaconBlockHeader, + kzg_commitments_inclusion_proof: &[Hash256], + kzg_proof: KzgProof, + ) -> Result { + let expected_kzg_commitments = signed_block + .message() + .body() + .blob_kzg_commitments() + .map_err(|_e| BlobSidecarError::PreDeneb)?; + let kzg_commitment = *expected_kzg_commitments + .get(index) + .ok_or(BlobSidecarError::MissingKzgCommitment)?; + let kzg_commitment_inclusion_proof = signed_block + .message() + .body() + .complete_kzg_commitment_merkle_proof(index, kzg_commitments_inclusion_proof)?; + + Ok(Self { + index: index as u64, + blob, + kzg_commitment, + kzg_proof, + signed_block_header, + kzg_commitment_inclusion_proof, + }) + } + pub fn id(&self) -> BlobIdentifier { BlobIdentifier { block_root: self.block_root(), diff --git a/consensus/types/src/historical_summary.rs b/consensus/types/src/historical_summary.rs index 76bb111ea2..8c82d52b81 100644 --- a/consensus/types/src/historical_summary.rs +++ b/consensus/types/src/historical_summary.rs @@ -15,6 +15,7 @@ use tree_hash_derive::TreeHash; #[derive( Debug, PartialEq, + Eq, Serialize, Deserialize, Encode, diff --git a/consensus/types/src/signed_beacon_block.rs b/consensus/types/src/signed_beacon_block.rs index b52adcfe41..bb5e1ea34b 100644 --- a/consensus/types/src/signed_beacon_block.rs +++ b/consensus/types/src/signed_beacon_block.rs @@ -1,6 +1,7 @@ -use crate::beacon_block_body::format_kzg_commitments; +use crate::beacon_block_body::{format_kzg_commitments, BLOB_KZG_COMMITMENTS_INDEX}; use crate::*; use derivative::Derivative; +use merkle_proof::MerkleTree; use serde::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use std::fmt; @@ -239,6 +240,45 @@ impl> SignedBeaconBlock } } + /// Produce a signed beacon block header AND a merkle proof for the KZG commitments. + /// + /// This method is more efficient than generating each part separately as it reuses hashing. + pub fn signed_block_header_and_kzg_commitments_proof( + &self, + ) -> Result< + ( + SignedBeaconBlockHeader, + FixedVector, + ), + Error, + > { + // Create the block body merkle tree + let body_leaves = self.message().body().body_merkle_leaves(); + let beacon_block_body_depth = body_leaves.len().next_power_of_two().ilog2() as usize; + let body_merkle_tree = MerkleTree::create(&body_leaves, beacon_block_body_depth); + + // Compute the KZG commitments inclusion proof + let (_, proof) = body_merkle_tree + .generate_proof(BLOB_KZG_COMMITMENTS_INDEX, beacon_block_body_depth) + .map_err(Error::MerkleTreeError)?; + let kzg_commitments_inclusion_proof = FixedVector::new(proof)?; + + let block_header = BeaconBlockHeader { + slot: self.slot(), + proposer_index: self.message().proposer_index(), + parent_root: self.parent_root(), + state_root: self.state_root(), + body_root: body_merkle_tree.hash(), + }; + + let signed_header = SignedBeaconBlockHeader { + message: block_header, + signature: self.signature().clone(), + }; + + Ok((signed_header, kzg_commitments_inclusion_proof)) + } + /// Convenience accessor for the block's slot. pub fn slot(&self) -> Slot { self.message().slot() diff --git a/consensus/types/src/test_utils/generate_random_block_and_blobs.rs b/consensus/types/src/test_utils/generate_random_block_and_blobs.rs index ab7ded0409..cf240c3f1f 100644 --- a/consensus/types/src/test_utils/generate_random_block_and_blobs.rs +++ b/consensus/types/src/test_utils/generate_random_block_and_blobs.rs @@ -83,6 +83,35 @@ mod test { } } + #[test] + fn test_verify_blob_inclusion_proof_from_existing_proof() { + let (block, mut blob_sidecars) = + generate_rand_block_and_blobs::(ForkName::Deneb, 1, &mut thread_rng()); + let BlobSidecar { + index, + blob, + kzg_proof, + .. + } = blob_sidecars.pop().unwrap(); + + // Compute the commitments inclusion proof and use it for building blob sidecar. + let (signed_block_header, kzg_commitments_inclusion_proof) = block + .signed_block_header_and_kzg_commitments_proof() + .unwrap(); + + let blob_sidecar = BlobSidecar::new_with_existing_proof( + index as usize, + blob, + &block, + signed_block_header, + &kzg_commitments_inclusion_proof, + kzg_proof, + ) + .unwrap(); + + assert!(blob_sidecar.verify_blob_sidecar_inclusion_proof()); + } + #[test] fn test_verify_blob_inclusion_proof_invalid() { let (_block, blobs) = diff --git a/consensus/types/src/validator.rs b/consensus/types/src/validator.rs index 159f2f48c7..222b9292a2 100644 --- a/consensus/types/src/validator.rs +++ b/consensus/types/src/validator.rs @@ -15,6 +15,7 @@ use tree_hash_derive::TreeHash; Debug, Clone, PartialEq, + Eq, Serialize, Deserialize, Encode, diff --git a/database_manager/src/cli.rs b/database_manager/src/cli.rs index 5521b97805..4246a51f89 100644 --- a/database_manager/src/cli.rs +++ b/database_manager/src/cli.rs @@ -3,6 +3,7 @@ use clap_utils::get_color_style; use clap_utils::FLAG_HEADER; use serde::{Deserialize, Serialize}; use std::path::PathBuf; +use store::hdiff::HierarchyConfig; use crate::InspectTarget; @@ -21,13 +22,14 @@ use crate::InspectTarget; pub struct DatabaseManager { #[clap( long, - value_name = "SLOT_COUNT", - help = "Specifies how often a freezer DB restore point should be stored. \ - Cannot be changed after initialization. \ - [default: 2048 (mainnet) or 64 (minimal)]", + global = true, + value_name = "N0,N1,N2,...", + help = "Specifies the frequency for storing full state snapshots and hierarchical \ + diffs in the freezer DB.", + default_value_t = HierarchyConfig::default(), display_order = 0 )] - pub slots_per_restore_point: Option, + pub hierarchy_exponents: HierarchyConfig, #[clap( long, diff --git a/database_manager/src/lib.rs b/database_manager/src/lib.rs index 3d55631848..fc15e98616 100644 --- a/database_manager/src/lib.rs +++ b/database_manager/src/lib.rs @@ -6,7 +6,7 @@ use beacon_chain::{ builder::Witness, eth1_chain::CachingEth1Backend, schema_change::migrate_schema, slot_clock::SystemTimeSlotClock, }; -use beacon_node::{get_data_dir, get_slots_per_restore_point, ClientConfig}; +use beacon_node::{get_data_dir, ClientConfig}; use clap::ArgMatches; use clap::ValueEnum; use cli::{Compact, Inspect}; @@ -16,7 +16,6 @@ use slog::{info, warn, Logger}; use std::fs; use std::io::Write; use std::path::PathBuf; -use store::metadata::STATE_UPPER_LIMIT_NO_RETAIN; use store::{ errors::Error, metadata::{SchemaVersion, CURRENT_SCHEMA_VERSION}, @@ -39,13 +38,8 @@ fn parse_client_config( client_config .blobs_db_path .clone_from(&database_manager_config.blobs_dir); - - let (sprp, sprp_explicit) = - get_slots_per_restore_point::(database_manager_config.slots_per_restore_point)?; - - client_config.store.slots_per_restore_point = sprp; - client_config.store.slots_per_restore_point_set_explicitly = sprp_explicit; client_config.store.blob_prune_margin_epochs = database_manager_config.blob_prune_margin_epochs; + client_config.store.hierarchy_config = database_manager_config.hierarchy_exponents.clone(); Ok(client_config) } @@ -298,6 +292,7 @@ fn parse_migrate_config(migrate_config: &Migrate) -> Result( migrate_config: MigrateConfig, client_config: ClientConfig, + mut genesis_state: BeaconState, runtime_context: &RuntimeContext, log: Logger, ) -> Result<(), Error> { @@ -328,13 +323,13 @@ pub fn migrate_db( "to" => to.as_u64(), ); + let genesis_state_root = genesis_state.canonical_root()?; migrate_schema::, _, _, _>>( db, - client_config.eth1.deposit_contract_deploy_block, + Some(genesis_state_root), from, to, log, - &spec, ) } @@ -426,8 +421,7 @@ pub fn prune_states( // correct network, and that we don't end up storing the wrong genesis state. let genesis_from_db = db .load_cold_state_by_slot(Slot::new(0)) - .map_err(|e| format!("Error reading genesis state: {e:?}"))? - .ok_or("Error: genesis state missing from database. Check schema version.")?; + .map_err(|e| format!("Error reading genesis state: {e:?}"))?; if genesis_from_db.genesis_validators_root() != genesis_state.genesis_validators_root() { return Err(format!( @@ -438,18 +432,12 @@ pub fn prune_states( // Check that the user has confirmed they want to proceed. if !prune_config.confirm { - match db.get_anchor_info() { - Some(anchor_info) - if anchor_info.state_lower_limit == 0 - && anchor_info.state_upper_limit == STATE_UPPER_LIMIT_NO_RETAIN => - { - info!(log, "States have already been pruned"); - return Ok(()); - } - _ => { - info!(log, "Ready to prune states"); - } + if db.get_anchor_info().full_state_pruning_enabled() { + info!(log, "States have already been pruned"); + return Ok(()); } + + info!(log, "Ready to prune states"); warn!( log, "Pruning states is irreversible"; @@ -484,10 +472,33 @@ pub fn run( let log = context.log().clone(); let format_err = |e| format!("Fatal error: {:?}", e); + let get_genesis_state = || { + let executor = env.core_context().executor; + let network_config = context + .eth2_network_config + .clone() + .ok_or("Missing network config")?; + + executor + .block_on_dangerous( + network_config.genesis_state::( + client_config.genesis_state_url.as_deref(), + client_config.genesis_state_url_timeout, + &log, + ), + "get_genesis_state", + ) + .ok_or("Shutting down")? + .map_err(|e| format!("Error getting genesis state: {e}"))? + .ok_or("Genesis state missing".to_string()) + }; + match &db_manager_config.subcommand { cli::DatabaseManagerSubcommand::Migrate(migrate_config) => { let migrate_config = parse_migrate_config(migrate_config)?; - migrate_db(migrate_config, client_config, &context, log).map_err(format_err) + let genesis_state = get_genesis_state()?; + migrate_db(migrate_config, client_config, genesis_state, &context, log) + .map_err(format_err) } cli::DatabaseManagerSubcommand::Inspect(inspect_config) => { let inspect_config = parse_inspect_config(inspect_config)?; @@ -503,27 +514,8 @@ pub fn run( prune_blobs(client_config, &context, log).map_err(format_err) } cli::DatabaseManagerSubcommand::PruneStates(prune_states_config) => { - let executor = env.core_context().executor; - let network_config = context - .eth2_network_config - .clone() - .ok_or("Missing network config")?; - - let genesis_state = executor - .block_on_dangerous( - network_config.genesis_state::( - client_config.genesis_state_url.as_deref(), - client_config.genesis_state_url_timeout, - &log, - ), - "get_genesis_state", - ) - .ok_or("Shutting down")? - .map_err(|e| format!("Error getting genesis state: {e}"))? - .ok_or("Genesis state missing")?; - let prune_config = parse_prune_states_config(prune_states_config)?; - + let genesis_state = get_genesis_state()?; prune_states(client_config, prune_config, genesis_state, &context, log) } cli::DatabaseManagerSubcommand::Compact(compact_config) => { diff --git a/lighthouse/Cargo.toml b/lighthouse/Cargo.toml index 1125697c7c..dd1cb68f06 100644 --- a/lighthouse/Cargo.toml +++ b/lighthouse/Cargo.toml @@ -71,6 +71,9 @@ sensitive_url = { workspace = true } eth1 = { workspace = true } eth2 = { workspace = true } beacon_processor = { workspace = true } +beacon_node_fallback = { workspace = true } +initialized_validators = { workspace = true } + [[test]] name = "lighthouse_tests" diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index ffa6e300a7..80986653c1 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -396,13 +396,14 @@ fn genesis_backfill_with_historic_flag() { } // Tests for Eth1 flags. +// DEPRECATED but should not crash #[test] fn dummy_eth1_flag() { CommandLineTest::new() .flag("dummy-eth1", None) - .run_with_zero_port() - .with_config(|config| assert!(config.dummy_eth1_backend)); + .run_with_zero_port(); } +// DEPRECATED but should not crash #[test] fn eth1_flag() { CommandLineTest::new() @@ -814,6 +815,27 @@ fn network_enable_sampling_flag() { .run_with_zero_port() .with_config(|config| assert!(config.chain.enable_sampling)); } +#[test] +fn blob_publication_batches() { + CommandLineTest::new() + .flag("blob-publication-batches", Some("3")) + .run_with_zero_port() + .with_config(|config| assert_eq!(config.chain.blob_publication_batches, 3)); +} + +#[test] +fn blob_publication_batch_interval() { + CommandLineTest::new() + .flag("blob-publication-batch-interval", Some("400")) + .run_with_zero_port() + .with_config(|config| { + assert_eq!( + config.chain.blob_publication_batch_interval, + Duration::from_millis(400) + ) + }); +} + #[test] fn network_enable_sampling_flag_default() { CommandLineTest::new() @@ -1798,45 +1820,12 @@ fn validator_monitor_metrics_threshold_custom() { } // Tests for Store flags. +// DEPRECATED but should still be accepted. #[test] fn slots_per_restore_point_flag() { CommandLineTest::new() .flag("slots-per-restore-point", Some("64")) - .run_with_zero_port() - .with_config(|config| assert_eq!(config.store.slots_per_restore_point, 64)); -} -#[test] -fn slots_per_restore_point_update_prev_default() { - use beacon_node::beacon_chain::store::config::{ - DEFAULT_SLOTS_PER_RESTORE_POINT, PREV_DEFAULT_SLOTS_PER_RESTORE_POINT, - }; - - CommandLineTest::new() - .flag("slots-per-restore-point", Some("2048")) - .run_with_zero_port() - .with_config_and_dir(|config, dir| { - // Check that 2048 is the previous default. - assert_eq!( - config.store.slots_per_restore_point, - PREV_DEFAULT_SLOTS_PER_RESTORE_POINT - ); - - // Restart the BN with the same datadir and the new default SPRP. It should - // allow this. - CommandLineTest::new() - .flag("datadir", Some(&dir.path().display().to_string())) - .flag("zero-ports", None) - .run_with_no_datadir() - .with_config(|config| { - // The dumped config will have the new default 8192 value, but the fact that - // the BN started and ran (with the same datadir) means that the override - // was successful. - assert_eq!( - config.store.slots_per_restore_point, - DEFAULT_SLOTS_PER_RESTORE_POINT - ); - }); - }) + .run_with_zero_port(); } #[test] @@ -1884,6 +1873,27 @@ fn historic_state_cache_size_default() { }); } #[test] +fn hdiff_buffer_cache_size_flag() { + CommandLineTest::new() + .flag("hdiff-buffer-cache-size", Some("1")) + .run_with_zero_port() + .with_config(|config| { + assert_eq!(config.store.hdiff_buffer_cache_size.get(), 1); + }); +} +#[test] +fn hdiff_buffer_cache_size_default() { + use beacon_node::beacon_chain::store::config::DEFAULT_HDIFF_BUFFER_CACHE_SIZE; + CommandLineTest::new() + .run_with_zero_port() + .with_config(|config| { + assert_eq!( + config.store.hdiff_buffer_cache_size, + DEFAULT_HDIFF_BUFFER_CACHE_SIZE + ); + }); +} +#[test] fn auto_compact_db_flag() { CommandLineTest::new() .flag("auto-compact-db", Some("false")) @@ -2474,6 +2484,21 @@ fn sync_eth1_chain_disable_deposit_contract_sync_flag() { .with_config(|config| assert_eq!(config.sync_eth1_chain, false)); } +#[test] +#[should_panic] +fn disable_deposit_contract_sync_conflicts_with_staking() { + let dir = TempDir::new().expect("Unable to create temporary directory"); + CommandLineTest::new_with_no_execution_endpoint() + .flag("disable-deposit-contract-sync", None) + .flag("staking", None) + .flag("execution-endpoints", Some("http://localhost:8551/")) + .flag( + "execution-jwt", + dir.path().join("jwt-file").as_os_str().to_str(), + ) + .run_with_zero_port(); +} + #[test] fn light_client_server_default() { CommandLineTest::new() diff --git a/lighthouse/tests/validator_client.rs b/lighthouse/tests/validator_client.rs index 147a371f0e..34fe04cc45 100644 --- a/lighthouse/tests/validator_client.rs +++ b/lighthouse/tests/validator_client.rs @@ -1,9 +1,8 @@ -use validator_client::{ - config::DEFAULT_WEB3SIGNER_KEEP_ALIVE, ApiTopic, BeaconNodeSyncDistanceTiers, Config, -}; +use beacon_node_fallback::{beacon_node_health::BeaconNodeSyncDistanceTiers, ApiTopic}; use crate::exec::CommandLineTestExec; use bls::{Keypair, PublicKeyBytes}; +use initialized_validators::DEFAULT_WEB3SIGNER_KEEP_ALIVE; use sensitive_url::SensitiveUrl; use std::fs::File; use std::io::Write; @@ -15,6 +14,7 @@ use std::string::ToString; use std::time::Duration; use tempfile::TempDir; use types::{Address, Slot}; +use validator_client::Config; /// Returns the `lighthouse validator_client` command. fn base_cmd() -> Command { @@ -240,7 +240,7 @@ fn fee_recipient_flag() { .run() .with_config(|config| { assert_eq!( - config.fee_recipient, + config.validator_store.fee_recipient, Some(Address::from_str("0x00000000219ab540356cbb839cbe05303d7705fa").unwrap()) ) }); @@ -430,7 +430,7 @@ fn no_doppelganger_protection_flag() { fn no_gas_limit_flag() { CommandLineTest::new() .run() - .with_config(|config| assert!(config.gas_limit.is_none())); + .with_config(|config| assert!(config.validator_store.gas_limit.is_none())); } #[test] fn gas_limit_flag() { @@ -438,46 +438,46 @@ fn gas_limit_flag() { .flag("gas-limit", Some("600")) .flag("builder-proposals", None) .run() - .with_config(|config| assert_eq!(config.gas_limit, Some(600))); + .with_config(|config| assert_eq!(config.validator_store.gas_limit, Some(600))); } #[test] fn no_builder_proposals_flag() { CommandLineTest::new() .run() - .with_config(|config| assert!(!config.builder_proposals)); + .with_config(|config| assert!(!config.validator_store.builder_proposals)); } #[test] fn builder_proposals_flag() { CommandLineTest::new() .flag("builder-proposals", None) .run() - .with_config(|config| assert!(config.builder_proposals)); + .with_config(|config| assert!(config.validator_store.builder_proposals)); } #[test] fn builder_boost_factor_flag() { CommandLineTest::new() .flag("builder-boost-factor", Some("150")) .run() - .with_config(|config| assert_eq!(config.builder_boost_factor, Some(150))); + .with_config(|config| assert_eq!(config.validator_store.builder_boost_factor, Some(150))); } #[test] fn no_builder_boost_factor_flag() { CommandLineTest::new() .run() - .with_config(|config| assert_eq!(config.builder_boost_factor, None)); + .with_config(|config| assert_eq!(config.validator_store.builder_boost_factor, None)); } #[test] fn prefer_builder_proposals_flag() { CommandLineTest::new() .flag("prefer-builder-proposals", None) .run() - .with_config(|config| assert!(config.prefer_builder_proposals)); + .with_config(|config| assert!(config.validator_store.prefer_builder_proposals)); } #[test] fn no_prefer_builder_proposals_flag() { CommandLineTest::new() .run() - .with_config(|config| assert!(!config.prefer_builder_proposals)); + .with_config(|config| assert!(!config.validator_store.prefer_builder_proposals)); } #[test] fn no_builder_registration_timestamp_override_flag() { @@ -624,7 +624,7 @@ fn validator_registration_batch_size_zero_value() { #[test] fn validator_disable_web3_signer_slashing_protection_default() { CommandLineTest::new().run().with_config(|config| { - assert!(config.enable_web3signer_slashing_protection); + assert!(config.validator_store.enable_web3signer_slashing_protection); }); } @@ -634,7 +634,7 @@ fn validator_disable_web3_signer_slashing_protection() { .flag("disable-slashing-protection-web3signer", None) .run() .with_config(|config| { - assert!(!config.enable_web3signer_slashing_protection); + assert!(!config.validator_store.enable_web3signer_slashing_protection); }); } @@ -642,7 +642,7 @@ fn validator_disable_web3_signer_slashing_protection() { fn validator_web3_signer_keep_alive_default() { CommandLineTest::new().run().with_config(|config| { assert_eq!( - config.web3_signer_keep_alive_timeout, + config.initialized_validators.web3_signer_keep_alive_timeout, DEFAULT_WEB3SIGNER_KEEP_ALIVE ); }); @@ -655,7 +655,7 @@ fn validator_web3_signer_keep_alive_override() { .run() .with_config(|config| { assert_eq!( - config.web3_signer_keep_alive_timeout, + config.initialized_validators.web3_signer_keep_alive_timeout, Some(Duration::from_secs(1)) ); }); diff --git a/testing/ef_tests/src/cases/fork_choice.rs b/testing/ef_tests/src/cases/fork_choice.rs index 8d933a6fcd..33ae132e8a 100644 --- a/testing/ef_tests/src/cases/fork_choice.rs +++ b/testing/ef_tests/src/cases/fork_choice.rs @@ -505,8 +505,8 @@ impl Tester { } Err(_) => GossipVerifiedBlob::__assumed_valid(blob_sidecar), }; - let result = self - .block_on_dangerous(self.harness.chain.process_gossip_blob(blob, || Ok(())))?; + let result = + self.block_on_dangerous(self.harness.chain.process_gossip_blob(blob))?; if valid { assert!(result.is_ok()); } diff --git a/testing/node_test_rig/Cargo.toml b/testing/node_test_rig/Cargo.toml index 4696d8d2f1..97e73b8a2f 100644 --- a/testing/node_test_rig/Cargo.toml +++ b/testing/node_test_rig/Cargo.toml @@ -11,6 +11,7 @@ types = { workspace = true } tempfile = { workspace = true } eth2 = { workspace = true } validator_client = { workspace = true } +beacon_node_fallback = { workspace = true } validator_dir = { workspace = true, features = ["insecure_keys"] } sensitive_url = { workspace = true } execution_layer = { workspace = true } diff --git a/testing/node_test_rig/src/lib.rs b/testing/node_test_rig/src/lib.rs index 3320898642..ac01c84b9d 100644 --- a/testing/node_test_rig/src/lib.rs +++ b/testing/node_test_rig/src/lib.rs @@ -16,12 +16,13 @@ use validator_client::ProductionValidatorClient; use validator_dir::insecure_keys::build_deterministic_validator_dirs; pub use beacon_node::{ClientConfig, ClientGenesis, ProductionClient}; +pub use beacon_node_fallback::ApiTopic; pub use environment; pub use eth2; pub use execution_layer::test_utils::{ Config as MockServerConfig, MockExecutionConfig, MockServer, }; -pub use validator_client::{ApiTopic, Config as ValidatorConfig}; +pub use validator_client::Config as ValidatorConfig; /// The global timeout for HTTP requests to the beacon node. const HTTP_TIMEOUT: Duration = Duration::from_secs(8); @@ -103,8 +104,6 @@ pub fn testing_client_config() -> ClientConfig { client_config.http_api.enabled = true; client_config.http_api.listen_port = 0; - client_config.dummy_eth1_backend = true; - let now = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("should get system time") diff --git a/testing/simulator/src/basic_sim.rs b/testing/simulator/src/basic_sim.rs index 5c9baa2349..8f659a893f 100644 --- a/testing/simulator/src/basic_sim.rs +++ b/testing/simulator/src/basic_sim.rs @@ -175,7 +175,8 @@ pub fn run_basic_sim(matches: &ArgMatches) -> Result<(), String> { executor.spawn( async move { let mut validator_config = testing_validator_config(); - validator_config.fee_recipient = Some(SUGGESTED_FEE_RECIPIENT.into()); + validator_config.validator_store.fee_recipient = + Some(SUGGESTED_FEE_RECIPIENT.into()); println!("Adding validator client {}", i); // Enable broadcast on every 4th node. diff --git a/testing/simulator/src/fallback_sim.rs b/testing/simulator/src/fallback_sim.rs index 0690ab242c..b3b9a46001 100644 --- a/testing/simulator/src/fallback_sim.rs +++ b/testing/simulator/src/fallback_sim.rs @@ -178,7 +178,8 @@ pub fn run_fallback_sim(matches: &ArgMatches) -> Result<(), String> { executor.spawn( async move { let mut validator_config = testing_validator_config(); - validator_config.fee_recipient = Some(SUGGESTED_FEE_RECIPIENT.into()); + validator_config.validator_store.fee_recipient = + Some(SUGGESTED_FEE_RECIPIENT.into()); println!("Adding validator client {}", i); network_1 .add_validator_client_with_fallbacks( diff --git a/testing/web3signer_tests/Cargo.toml b/testing/web3signer_tests/Cargo.toml index db5c53e0ac..0096d74f64 100644 --- a/testing/web3signer_tests/Cargo.toml +++ b/testing/web3signer_tests/Cargo.toml @@ -15,7 +15,6 @@ tempfile = { workspace = true } tokio = { workspace = true } reqwest = { workspace = true } url = { workspace = true } -validator_client = { workspace = true } slot_clock = { workspace = true } futures = { workspace = true } task_executor = { workspace = true } @@ -28,3 +27,6 @@ serde_json = { workspace = true } zip = { workspace = true } parking_lot = { workspace = true } logging = { workspace = true } +initialized_validators = { workspace = true } +slashing_protection = { workspace = true } +validator_store = { workspace = true } diff --git a/testing/web3signer_tests/src/lib.rs b/testing/web3signer_tests/src/lib.rs index 3a039d3c80..a58dcb5fa0 100644 --- a/testing/web3signer_tests/src/lib.rs +++ b/testing/web3signer_tests/src/lib.rs @@ -22,10 +22,14 @@ mod tests { }; use eth2_keystore::KeystoreBuilder; use eth2_network_config::Eth2NetworkConfig; + use initialized_validators::{ + load_pem_certificate, load_pkcs12_identity, InitializedValidators, + }; use logging::test_logger; use parking_lot::Mutex; use reqwest::Client; use serde::Serialize; + use slashing_protection::{SlashingDatabase, SLASHING_PROTECTION_FILENAME}; use slot_clock::{SlotClock, TestingSlotClock}; use std::env; use std::fmt::Debug; @@ -41,13 +45,7 @@ mod tests { use tokio::time::sleep; use types::{attestation::AttestationBase, *}; use url::Url; - use validator_client::{ - initialized_validators::{ - load_pem_certificate, load_pkcs12_identity, InitializedValidators, - }, - validator_store::{Error as ValidatorStoreError, ValidatorStore}, - SlashingDatabase, SLASHING_PROTECTION_FILENAME, - }; + use validator_store::{Error as ValidatorStoreError, ValidatorStore}; /// If the we are unable to reach the Web3Signer HTTP API within this time out then we will /// assume it failed to start. @@ -322,7 +320,7 @@ mod tests { let log = test_logger(); let validator_dir = TempDir::new().unwrap(); - let config = validator_client::Config::default(); + let config = initialized_validators::Config::default(); let validator_definitions = ValidatorDefinitions::from(validator_definitions); let initialized_validators = InitializedValidators::from_definitions( validator_definitions, @@ -354,7 +352,7 @@ mod tests { let slot_clock = TestingSlotClock::new(Slot::new(0), Duration::from_secs(0), Duration::from_secs(1)); - let config = validator_client::Config { + let config = validator_store::Config { enable_web3signer_slashing_protection: slashing_protection_config.local, ..Default::default() }; diff --git a/validator_client/Cargo.toml b/validator_client/Cargo.toml index 86825a9ee3..044a622d54 100644 --- a/validator_client/Cargo.toml +++ b/validator_client/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "validator_client" version = "0.3.5" -authors = ["Paul Hauner ", "Age Manning ", "Luke Anderson "] +authors = ["Sigma Prime "] edition = { workspace = true } [lib] @@ -12,52 +12,32 @@ path = "src/lib.rs" tokio = { workspace = true } [dependencies] -tree_hash = { workspace = true } -clap = { workspace = true } -slashing_protection = { workspace = true } -slot_clock = { workspace = true } -types = { workspace = true } -safe_arith = { workspace = true } -serde = { workspace = true } -bincode = { workspace = true } -serde_json = { workspace = true } -slog = { workspace = true } -tokio = { workspace = true } -tokio-stream = { workspace = true } -futures = { workspace = true } -dirs = { workspace = true } -directory = { workspace = true } -lockfile = { workspace = true } -environment = { workspace = true } -parking_lot = { workspace = true } -filesystem = { workspace = true } -hex = { workspace = true } -deposit_contract = { workspace = true } -bls = { workspace = true } -eth2 = { workspace = true } -tempfile = { workspace = true } -validator_dir = { workspace = true } -clap_utils = { workspace = true } -eth2_keystore = { workspace = true } account_utils = { workspace = true } -lighthouse_version = { workspace = true } -warp_utils = { workspace = true } -warp = { workspace = true } +beacon_node_fallback = { workspace = true } +clap = { workspace = true } +clap_utils = { workspace = true } +directory = { workspace = true } +doppelganger_service = { workspace = true } +dirs = { workspace = true } +eth2 = { workspace = true } +environment = { workspace = true } +graffiti_file = { workspace = true } hyper = { workspace = true } -ethereum_serde_utils = { workspace = true } -libsecp256k1 = { workspace = true } -ring = { workspace = true } -rand = { workspace = true, features = ["small_rng"] } +initialized_validators = { workspace = true } metrics = { workspace = true } monitoring_api = { workspace = true } +parking_lot = { workspace = true } +reqwest = { workspace = true } sensitive_url = { workspace = true } -task_executor = { workspace = true } -reqwest = { workspace = true, features = ["native-tls"] } -url = { workspace = true } -malloc_utils = { workspace = true } -sysinfo = { workspace = true } -system_health = { path = "../common/system_health" } -logging = { workspace = true } -strum = { workspace = true } -itertools = { workspace = true } +slashing_protection = { workspace = true } +serde = { workspace = true } +slog = { workspace = true } +slot_clock = { workspace = true } +types = { workspace = true } +validator_http_api = { workspace = true } +validator_http_metrics = { workspace = true } +validator_metrics = { workspace = true } +validator_services = { workspace = true } +validator_store = { workspace = true } +tokio = { workspace = true } fdlimit = "0.3.0" diff --git a/validator_client/beacon_node_fallback/Cargo.toml b/validator_client/beacon_node_fallback/Cargo.toml new file mode 100644 index 0000000000..c15ded43d7 --- /dev/null +++ b/validator_client/beacon_node_fallback/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "beacon_node_fallback" +version = "0.1.0" +edition = { workspace = true } +authors = ["Sigma Prime "] + +[lib] +name = "beacon_node_fallback" +path = "src/lib.rs" + +[dependencies] +environment = { workspace = true } +eth2 = { workspace = true } +futures = { workspace = true } +itertools = { workspace = true } +serde = { workspace = true } +slog = { workspace = true } +slot_clock = { workspace = true } +strum = { workspace = true } +tokio = { workspace = true } +types = { workspace = true } +validator_metrics = { workspace = true } diff --git a/validator_client/src/beacon_node_health.rs b/validator_client/beacon_node_fallback/src/beacon_node_health.rs similarity index 95% rename from validator_client/src/beacon_node_health.rs rename to validator_client/beacon_node_fallback/src/beacon_node_health.rs index 1783bb312c..e5b0487656 100644 --- a/validator_client/src/beacon_node_health.rs +++ b/validator_client/beacon_node_fallback/src/beacon_node_health.rs @@ -1,5 +1,8 @@ +use super::CandidateError; +use eth2::BeaconNodeHttpClient; use itertools::Itertools; use serde::{Deserialize, Serialize}; +use slog::{warn, Logger}; use std::cmp::Ordering; use std::fmt::{Debug, Display, Formatter}; use std::str::FromStr; @@ -285,6 +288,30 @@ impl BeaconNodeHealth { } } +pub async fn check_node_health( + beacon_node: &BeaconNodeHttpClient, + log: &Logger, +) -> Result<(Slot, bool, bool), CandidateError> { + let resp = match beacon_node.get_node_syncing().await { + Ok(resp) => resp, + Err(e) => { + warn!( + log, + "Unable connect to beacon node"; + "error" => %e + ); + + return Err(CandidateError::Offline); + } + }; + + Ok(( + resp.data.head_slot, + resp.data.is_optimistic, + resp.data.el_offline, + )) +} + #[cfg(test)] mod tests { use super::ExecutionEngineHealth::{Healthy, Unhealthy}; @@ -292,7 +319,7 @@ mod tests { BeaconNodeHealth, BeaconNodeHealthTier, BeaconNodeSyncDistanceTiers, IsOptimistic, SyncDistanceTier, }; - use crate::beacon_node_fallback::Config; + use crate::Config; use std::str::FromStr; use types::Slot; diff --git a/validator_client/src/beacon_node_fallback.rs b/validator_client/beacon_node_fallback/src/lib.rs similarity index 99% rename from validator_client/src/beacon_node_fallback.rs rename to validator_client/beacon_node_fallback/src/lib.rs index e5fe419983..95a221f189 100644 --- a/validator_client/src/beacon_node_fallback.rs +++ b/validator_client/beacon_node_fallback/src/lib.rs @@ -2,12 +2,11 @@ //! "fallback" behaviour; it will try a request on all of the nodes until one or none of them //! succeed. -use crate::beacon_node_health::{ - BeaconNodeHealth, BeaconNodeSyncDistanceTiers, ExecutionEngineHealth, IsOptimistic, - SyncDistanceTier, +pub mod beacon_node_health; +use beacon_node_health::{ + check_node_health, BeaconNodeHealth, BeaconNodeSyncDistanceTiers, ExecutionEngineHealth, + IsOptimistic, SyncDistanceTier, }; -use crate::check_synced::check_node_health; -use crate::http_metrics::metrics::{inc_counter_vec, ENDPOINT_ERRORS, ENDPOINT_REQUESTS}; use environment::RuntimeContext; use eth2::BeaconNodeHttpClient; use futures::future; @@ -24,6 +23,7 @@ use std::time::{Duration, Instant}; use strum::{EnumString, EnumVariantNames}; use tokio::{sync::RwLock, time::sleep}; use types::{ChainSpec, Config as ConfigSpec, EthSpec, Slot}; +use validator_metrics::{inc_counter_vec, ENDPOINT_ERRORS, ENDPOINT_REQUESTS}; /// Message emitted when the VC detects the BN is using a different spec. const UPDATE_REQUIRED_LOG_HINT: &str = "this VC or the remote BN may need updating"; @@ -739,7 +739,7 @@ impl ApiTopic { mod tests { use super::*; use crate::beacon_node_health::BeaconNodeHealthTier; - use crate::SensitiveUrl; + use eth2::SensitiveUrl; use eth2::Timeouts; use std::str::FromStr; use strum::VariantNames; diff --git a/validator_client/doppelganger_service/Cargo.toml b/validator_client/doppelganger_service/Cargo.toml new file mode 100644 index 0000000000..e5f7d3f2ba --- /dev/null +++ b/validator_client/doppelganger_service/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "doppelganger_service" +version = "0.1.0" +edition = { workspace = true } +authors = ["Sigma Prime "] + +[dependencies] +beacon_node_fallback = { workspace = true } +environment = { workspace = true } +eth2 = { workspace = true } +parking_lot = { workspace = true } +slog = { workspace = true } +slot_clock = { workspace = true } +task_executor = { workspace = true } +tokio = { workspace = true } +types = { workspace = true } + +[dev-dependencies] +futures = { workspace = true } +logging = {workspace = true } diff --git a/validator_client/src/doppelganger_service.rs b/validator_client/doppelganger_service/src/lib.rs similarity index 98% rename from validator_client/src/doppelganger_service.rs rename to validator_client/doppelganger_service/src/lib.rs index 1d552cc5ad..35228fe354 100644 --- a/validator_client/src/doppelganger_service.rs +++ b/validator_client/doppelganger_service/src/lib.rs @@ -29,8 +29,7 @@ //! //! Doppelganger protection is a best-effort, last-line-of-defence mitigation. Do not rely upon it. -use crate::beacon_node_fallback::BeaconNodeFallback; -use crate::validator_store::ValidatorStore; +use beacon_node_fallback::BeaconNodeFallback; use environment::RuntimeContext; use eth2::types::LivenessResponseData; use parking_lot::RwLock; @@ -114,6 +113,13 @@ struct LivenessResponses { /// validators on the network. pub const DEFAULT_REMAINING_DETECTION_EPOCHS: u64 = 1; +/// This crate cannot depend on ValidatorStore as validator_store depends on this crate and +/// initialises the doppelganger protection. For this reason, we abstract the validator store +/// functions this service needs through the following trait +pub trait DoppelgangerValidatorStore { + fn get_validator_index(&self, pubkey: &PublicKeyBytes) -> Option; +} + /// Store the per-validator status of doppelganger checking. #[derive(Debug, PartialEq)] pub struct DoppelgangerState { @@ -280,15 +286,20 @@ impl DoppelgangerService { /// Starts a reoccurring future which will try to keep the doppelganger service updated each /// slot. - pub fn start_update_service( + pub fn start_update_service( service: Arc, context: RuntimeContext, - validator_store: Arc>, + validator_store: Arc, beacon_nodes: Arc>, slot_clock: T, - ) -> Result<(), String> { + ) -> Result<(), String> + where + E: EthSpec, + T: 'static + SlotClock, + V: DoppelgangerValidatorStore + Send + Sync + 'static, + { // Define the `get_index` function as one that uses the validator store. - let get_index = move |pubkey| validator_store.validator_index(&pubkey); + let get_index = move |pubkey| validator_store.get_validator_index(&pubkey); // Define the `get_liveness` function as one that queries the beacon node API. let log = service.log.clone(); diff --git a/validator_client/graffiti_file/Cargo.toml b/validator_client/graffiti_file/Cargo.toml new file mode 100644 index 0000000000..02e48849d1 --- /dev/null +++ b/validator_client/graffiti_file/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "graffiti_file" +version = "0.1.0" +edition = { workspace = true } +authors = ["Sigma Prime "] + +[lib] +name = "graffiti_file" +path = "src/lib.rs" + +[dependencies] +serde = { workspace = true } +bls = { workspace = true } +types = { workspace = true } +slog = { workspace = true } + +[dev-dependencies] +tempfile = { workspace = true } +hex = { workspace = true } diff --git a/validator_client/src/graffiti_file.rs b/validator_client/graffiti_file/src/lib.rs similarity index 89% rename from validator_client/src/graffiti_file.rs rename to validator_client/graffiti_file/src/lib.rs index 29da3dca5a..0328c14eeb 100644 --- a/validator_client/src/graffiti_file.rs +++ b/validator_client/graffiti_file/src/lib.rs @@ -1,4 +1,5 @@ use serde::{Deserialize, Serialize}; +use slog::warn; use std::collections::HashMap; use std::fs::File; use std::io::{prelude::*, BufReader}; @@ -100,6 +101,27 @@ fn read_line(line: &str) -> Result<(Option, Graffiti), Error> { } } +// Given the various graffiti control methods, determine the graffiti that will be used for +// the next block produced by the validator with the given public key. +pub fn determine_graffiti( + validator_pubkey: &PublicKeyBytes, + log: &slog::Logger, + graffiti_file: Option, + validator_definition_graffiti: Option, + graffiti_flag: Option, +) -> Option { + graffiti_file + .and_then(|mut g| match g.load_graffiti(validator_pubkey) { + Ok(g) => g, + Err(e) => { + warn!(log, "Failed to read graffiti file"; "error" => ?e); + None + } + }) + .or(validator_definition_graffiti) + .or(graffiti_flag) +} + #[cfg(test)] mod tests { use super::*; diff --git a/validator_client/http_api/Cargo.toml b/validator_client/http_api/Cargo.toml new file mode 100644 index 0000000000..b83acdc782 --- /dev/null +++ b/validator_client/http_api/Cargo.toml @@ -0,0 +1,50 @@ +[package] +name = "validator_http_api" +version = "0.1.0" +edition = { workspace = true } +authors = ["Sigma Prime "] + +[lib] +name = "validator_http_api" +path = "src/lib.rs" + +[dependencies] +account_utils = { workspace = true } +bls = { workspace = true } +beacon_node_fallback = { workspace = true } +deposit_contract = { workspace = true } +doppelganger_service = { workspace = true } +graffiti_file = { workspace = true } +eth2 = { workspace = true } +eth2_keystore = { workspace = true } +ethereum_serde_utils = { workspace = true } +initialized_validators = { workspace = true } +lighthouse_version = { workspace = true } +logging = { workspace = true } +parking_lot = { workspace = true } +filesystem = { workspace = true } +rand = { workspace = true } +serde = { workspace = true } +signing_method = { workspace = true } +sensitive_url = { workspace = true } +slashing_protection = { workspace = true } +slog = { workspace = true } +slot_clock = { workspace = true } +sysinfo = { workspace = true } +system_health = { workspace = true } +task_executor = { workspace = true } +tempfile = { workspace = true } +tokio = { workspace = true } +tokio-stream = { workspace = true } +types = { workspace = true } +validator_dir = { workspace = true } +validator_store = { workspace = true } +validator_services = { workspace = true } +url = { workspace = true } +warp_utils = { workspace = true } +warp = { workspace = true } + +[dev-dependencies] +itertools = { workspace = true } +futures = { workspace = true } +rand = { workspace = true, features = ["small_rng"] } diff --git a/validator_client/src/http_api/api_secret.rs b/validator_client/http_api/src/api_secret.rs similarity index 100% rename from validator_client/src/http_api/api_secret.rs rename to validator_client/http_api/src/api_secret.rs diff --git a/validator_client/src/http_api/create_signed_voluntary_exit.rs b/validator_client/http_api/src/create_signed_voluntary_exit.rs similarity index 98% rename from validator_client/src/http_api/create_signed_voluntary_exit.rs rename to validator_client/http_api/src/create_signed_voluntary_exit.rs index a9586da57e..32269b202b 100644 --- a/validator_client/src/http_api/create_signed_voluntary_exit.rs +++ b/validator_client/http_api/src/create_signed_voluntary_exit.rs @@ -1,10 +1,10 @@ -use crate::validator_store::ValidatorStore; use bls::{PublicKey, PublicKeyBytes}; use eth2::types::GenericResponse; use slog::{info, Logger}; use slot_clock::SlotClock; use std::sync::Arc; use types::{Epoch, EthSpec, SignedVoluntaryExit, VoluntaryExit}; +use validator_store::ValidatorStore; pub async fn create_signed_voluntary_exit( pubkey: PublicKey, diff --git a/validator_client/src/http_api/create_validator.rs b/validator_client/http_api/src/create_validator.rs similarity index 99% rename from validator_client/src/http_api/create_validator.rs rename to validator_client/http_api/src/create_validator.rs index afa5d4fed1..dfd092e8b4 100644 --- a/validator_client/src/http_api/create_validator.rs +++ b/validator_client/http_api/src/create_validator.rs @@ -1,4 +1,3 @@ -use crate::ValidatorStore; use account_utils::validator_definitions::{PasswordStorage, ValidatorDefinition}; use account_utils::{ eth2_keystore::Keystore, @@ -11,6 +10,7 @@ use std::path::{Path, PathBuf}; use types::ChainSpec; use types::EthSpec; use validator_dir::{keystore_password_path, Builder as ValidatorDirBuilder}; +use validator_store::ValidatorStore; /// Create some validator EIP-2335 keystores and store them on disk. Then, enroll the validators in /// this validator client. diff --git a/validator_client/src/http_api/graffiti.rs b/validator_client/http_api/src/graffiti.rs similarity index 98% rename from validator_client/src/http_api/graffiti.rs rename to validator_client/http_api/src/graffiti.rs index 79d4fd61f3..86238a697c 100644 --- a/validator_client/src/http_api/graffiti.rs +++ b/validator_client/http_api/src/graffiti.rs @@ -1,8 +1,8 @@ -use crate::validator_store::ValidatorStore; use bls::PublicKey; use slot_clock::SlotClock; use std::sync::Arc; use types::{graffiti::GraffitiString, EthSpec, Graffiti}; +use validator_store::ValidatorStore; pub fn get_graffiti( validator_pubkey: PublicKey, diff --git a/validator_client/src/http_api/keystores.rs b/validator_client/http_api/src/keystores.rs similarity index 99% rename from validator_client/src/http_api/keystores.rs rename to validator_client/http_api/src/keystores.rs index e5477ff8df..5822c89cb8 100644 --- a/validator_client/src/http_api/keystores.rs +++ b/validator_client/http_api/src/keystores.rs @@ -1,8 +1,4 @@ //! Implementation of the standard keystore management API. -use crate::{ - initialized_validators::Error, signing_method::SigningMethod, InitializedValidators, - ValidatorStore, -}; use account_utils::{validator_definitions::PasswordStorage, ZeroizeString}; use eth2::lighthouse_vc::{ std_types::{ @@ -13,6 +9,8 @@ use eth2::lighthouse_vc::{ types::{ExportKeystoresResponse, SingleExportKeystoresResponse}, }; use eth2_keystore::Keystore; +use initialized_validators::{Error, InitializedValidators}; +use signing_method::SigningMethod; use slog::{info, warn, Logger}; use slot_clock::SlotClock; use std::path::PathBuf; @@ -21,6 +19,7 @@ use task_executor::TaskExecutor; use tokio::runtime::Handle; use types::{EthSpec, PublicKeyBytes}; use validator_dir::{keystore_password_path, Builder as ValidatorDirBuilder}; +use validator_store::ValidatorStore; use warp::Rejection; use warp_utils::reject::{custom_bad_request, custom_server_error}; diff --git a/validator_client/src/http_api/mod.rs b/validator_client/http_api/src/lib.rs similarity index 99% rename from validator_client/src/http_api/mod.rs rename to validator_client/http_api/src/lib.rs index ded25abbcd..b58c7ccec0 100644 --- a/validator_client/src/http_api/mod.rs +++ b/validator_client/http_api/src/lib.rs @@ -8,16 +8,18 @@ mod tests; pub mod test_utils; -use crate::beacon_node_fallback::CandidateInfo; -use crate::http_api::graffiti::{delete_graffiti, get_graffiti, set_graffiti}; +use graffiti::{delete_graffiti, get_graffiti, set_graffiti}; + +use create_signed_voluntary_exit::create_signed_voluntary_exit; +use graffiti_file::{determine_graffiti, GraffitiFile}; +use validator_store::ValidatorStore; -use crate::http_api::create_signed_voluntary_exit::create_signed_voluntary_exit; -use crate::{determine_graffiti, BlockService, GraffitiFile, ValidatorStore}; use account_utils::{ mnemonic_from_phrase, validator_definitions::{SigningDefinition, ValidatorDefinition, Web3SignerDefinition}, }; pub use api_secret::ApiSecret; +use beacon_node_fallback::CandidateInfo; use create_validator::{ create_validators_mnemonic, create_validators_web3signer, get_voting_password_storage, }; @@ -46,6 +48,7 @@ use task_executor::TaskExecutor; use tokio_stream::{wrappers::BroadcastStream, StreamExt}; use types::{ChainSpec, ConfigAndPreset, EthSpec}; use validator_dir::Builder as ValidatorDirBuilder; +use validator_services::block_service::BlockService; use warp::{sse::Event, Filter}; use warp_utils::task::blocking_json_task; diff --git a/validator_client/src/http_api/remotekeys.rs b/validator_client/http_api/src/remotekeys.rs similarity index 98% rename from validator_client/src/http_api/remotekeys.rs rename to validator_client/http_api/src/remotekeys.rs index 053bbcb4b2..289be57182 100644 --- a/validator_client/src/http_api/remotekeys.rs +++ b/validator_client/http_api/src/remotekeys.rs @@ -1,5 +1,4 @@ //! Implementation of the standard remotekey management API. -use crate::{initialized_validators::Error, InitializedValidators, ValidatorStore}; use account_utils::validator_definitions::{ SigningDefinition, ValidatorDefinition, Web3SignerDefinition, }; @@ -8,6 +7,7 @@ use eth2::lighthouse_vc::std_types::{ ImportRemotekeyStatus, ImportRemotekeysRequest, ImportRemotekeysResponse, ListRemotekeysResponse, SingleListRemotekeysResponse, Status, }; +use initialized_validators::{Error, InitializedValidators}; use slog::{info, warn, Logger}; use slot_clock::SlotClock; use std::sync::Arc; @@ -15,6 +15,7 @@ use task_executor::TaskExecutor; use tokio::runtime::Handle; use types::{EthSpec, PublicKeyBytes}; use url::Url; +use validator_store::ValidatorStore; use warp::Rejection; use warp_utils::reject::custom_server_error; diff --git a/validator_client/src/http_api/test_utils.rs b/validator_client/http_api/src/test_utils.rs similarity index 97% rename from validator_client/src/http_api/test_utils.rs rename to validator_client/http_api/src/test_utils.rs index 119c611553..931c4ea08e 100644 --- a/validator_client/src/http_api/test_utils.rs +++ b/validator_client/http_api/src/test_utils.rs @@ -1,21 +1,19 @@ -use crate::doppelganger_service::DoppelgangerService; -use crate::key_cache::{KeyCache, CACHE_FILENAME}; -use crate::{ - http_api::{ApiSecret, Config as HttpConfig, Context}, - initialized_validators::{InitializedValidators, OnDecryptFailure}, - Config, ValidatorDefinitions, ValidatorStore, -}; +use crate::{ApiSecret, Config as HttpConfig, Context}; +use account_utils::validator_definitions::ValidatorDefinitions; use account_utils::{ eth2_wallet::WalletBuilder, mnemonic_from_phrase, random_mnemonic, random_password, ZeroizeString, }; use deposit_contract::decode_eth1_tx_data; +use doppelganger_service::DoppelgangerService; use eth2::{ lighthouse_vc::{http_client::ValidatorClientHttpClient, types::*}, types::ErrorMessage as ApiErrorMessage, Error as ApiError, }; use eth2_keystore::KeystoreBuilder; +use initialized_validators::key_cache::{KeyCache, CACHE_FILENAME}; +use initialized_validators::{InitializedValidators, OnDecryptFailure}; use logging::test_logger; use parking_lot::RwLock; use sensitive_url::SensitiveUrl; @@ -29,6 +27,7 @@ use std::time::Duration; use task_executor::test_utils::TestRuntime; use tempfile::{tempdir, TempDir}; use tokio::sync::oneshot; +use validator_store::{Config as ValidatorStoreConfig, ValidatorStore}; pub const PASSWORD_BYTES: &[u8] = &[42, 50, 37]; pub const TEST_DEFAULT_FEE_RECIPIENT: Address = Address::repeat_byte(42); @@ -89,16 +88,14 @@ impl ApiTester { let api_secret = ApiSecret::create_or_open(validator_dir.path()).unwrap(); let api_pubkey = api_secret.api_token(); - let config = Config { - validator_dir: validator_dir.path().into(), - secrets_dir: secrets_dir.path().into(), + let config = ValidatorStoreConfig { fee_recipient: Some(TEST_DEFAULT_FEE_RECIPIENT), ..Default::default() }; let spec = Arc::new(E::default_spec()); - let slashing_db_path = config.validator_dir.join(SLASHING_PROTECTION_FILENAME); + let slashing_db_path = validator_dir.path().join(SLASHING_PROTECTION_FILENAME); let slashing_protection = SlashingDatabase::open_or_create(&slashing_db_path).unwrap(); let slot_clock = diff --git a/validator_client/src/http_api/tests.rs b/validator_client/http_api/src/tests.rs similarity index 97% rename from validator_client/src/http_api/tests.rs rename to validator_client/http_api/src/tests.rs index ba3b7f685b..76a6952153 100644 --- a/validator_client/src/http_api/tests.rs +++ b/validator_client/http_api/src/tests.rs @@ -3,15 +3,13 @@ mod keystores; -use crate::doppelganger_service::DoppelgangerService; -use crate::{ - http_api::{ApiSecret, Config as HttpConfig, Context}, - initialized_validators::InitializedValidators, - Config, ValidatorDefinitions, ValidatorStore, -}; +use doppelganger_service::DoppelgangerService; +use initialized_validators::{Config as InitializedValidatorsConfig, InitializedValidators}; + +use crate::{ApiSecret, Config as HttpConfig, Context}; use account_utils::{ eth2_wallet::WalletBuilder, mnemonic_from_phrase, random_mnemonic, random_password, - random_password_string, ZeroizeString, + random_password_string, validator_definitions::ValidatorDefinitions, ZeroizeString, }; use deposit_contract::decode_eth1_tx_data; use eth2::{ @@ -34,6 +32,7 @@ use std::time::Duration; use task_executor::test_utils::TestRuntime; use tempfile::{tempdir, TempDir}; use types::graffiti::GraffitiString; +use validator_store::{Config as ValidatorStoreConfig, ValidatorStore}; const PASSWORD_BYTES: &[u8] = &[42, 50, 37]; pub const TEST_DEFAULT_FEE_RECIPIENT: Address = Address::repeat_byte(42); @@ -47,17 +46,18 @@ struct ApiTester { url: SensitiveUrl, slot_clock: TestingSlotClock, _validator_dir: TempDir, + _secrets_dir: TempDir, _test_runtime: TestRuntime, } impl ApiTester { pub async fn new() -> Self { - let mut config = Config::default(); + let mut config = ValidatorStoreConfig::default(); config.fee_recipient = Some(TEST_DEFAULT_FEE_RECIPIENT); Self::new_with_config(config).await } - pub async fn new_with_config(mut config: Config) -> Self { + pub async fn new_with_config(config: ValidatorStoreConfig) -> Self { let log = test_logger(); let validator_dir = tempdir().unwrap(); @@ -68,7 +68,7 @@ impl ApiTester { let initialized_validators = InitializedValidators::from_definitions( validator_defs, validator_dir.path().into(), - Config::default(), + InitializedValidatorsConfig::default(), log.clone(), ) .await @@ -77,12 +77,9 @@ impl ApiTester { let api_secret = ApiSecret::create_or_open(validator_dir.path()).unwrap(); let api_pubkey = api_secret.api_token(); - config.validator_dir = validator_dir.path().into(); - config.secrets_dir = secrets_dir.path().into(); - let spec = Arc::new(E::default_spec()); - let slashing_db_path = config.validator_dir.join(SLASHING_PROTECTION_FILENAME); + let slashing_db_path = validator_dir.path().join(SLASHING_PROTECTION_FILENAME); let slashing_protection = SlashingDatabase::open_or_create(&slashing_db_path).unwrap(); let genesis_time: u64 = 0; @@ -157,6 +154,7 @@ impl ApiTester { url, slot_clock, _validator_dir: validator_dir, + _secrets_dir: secrets_dir, _test_runtime: test_runtime, } } @@ -1147,11 +1145,11 @@ async fn validator_builder_boost_factor() { /// `prefer_builder_proposals` and `builder_boost_factor` values. #[tokio::test] async fn validator_derived_builder_boost_factor_with_process_defaults() { - let config = Config { + let config = ValidatorStoreConfig { builder_proposals: true, prefer_builder_proposals: false, builder_boost_factor: Some(80), - ..Config::default() + ..ValidatorStoreConfig::default() }; ApiTester::new_with_config(config) .await @@ -1181,11 +1179,11 @@ async fn validator_derived_builder_boost_factor_with_process_defaults() { #[tokio::test] async fn validator_builder_boost_factor_global_builder_proposals_true() { - let config = Config { + let config = ValidatorStoreConfig { builder_proposals: true, prefer_builder_proposals: false, builder_boost_factor: None, - ..Config::default() + ..ValidatorStoreConfig::default() }; ApiTester::new_with_config(config) .await @@ -1194,11 +1192,11 @@ async fn validator_builder_boost_factor_global_builder_proposals_true() { #[tokio::test] async fn validator_builder_boost_factor_global_builder_proposals_false() { - let config = Config { + let config = ValidatorStoreConfig { builder_proposals: false, prefer_builder_proposals: false, builder_boost_factor: None, - ..Config::default() + ..ValidatorStoreConfig::default() }; ApiTester::new_with_config(config) .await @@ -1207,11 +1205,11 @@ async fn validator_builder_boost_factor_global_builder_proposals_false() { #[tokio::test] async fn validator_builder_boost_factor_global_prefer_builder_proposals_true() { - let config = Config { + let config = ValidatorStoreConfig { builder_proposals: true, prefer_builder_proposals: true, builder_boost_factor: None, - ..Config::default() + ..ValidatorStoreConfig::default() }; ApiTester::new_with_config(config) .await @@ -1220,11 +1218,11 @@ async fn validator_builder_boost_factor_global_prefer_builder_proposals_true() { #[tokio::test] async fn validator_builder_boost_factor_global_prefer_builder_proposals_true_override() { - let config = Config { + let config = ValidatorStoreConfig { builder_proposals: false, prefer_builder_proposals: true, builder_boost_factor: None, - ..Config::default() + ..ValidatorStoreConfig::default() }; ApiTester::new_with_config(config) .await diff --git a/validator_client/src/http_api/tests/keystores.rs b/validator_client/http_api/src/tests/keystores.rs similarity index 99% rename from validator_client/src/http_api/tests/keystores.rs rename to validator_client/http_api/src/tests/keystores.rs index b6923d1c78..f3f6de548b 100644 --- a/validator_client/src/http_api/tests/keystores.rs +++ b/validator_client/http_api/src/tests/keystores.rs @@ -1,4 +1,3 @@ -use super::super::super::validator_store::DEFAULT_GAS_LIMIT; use super::*; use account_utils::random_password_string; use bls::PublicKeyBytes; @@ -14,6 +13,7 @@ use slashing_protection::interchange::{Interchange, InterchangeMetadata}; use std::{collections::HashMap, path::Path}; use tokio::runtime::Handle; use types::{attestation::AttestationBase, Address}; +use validator_store::DEFAULT_GAS_LIMIT; fn new_keystore(password: ZeroizeString) -> Keystore { let keypair = Keypair::random(); diff --git a/validator_client/http_metrics/Cargo.toml b/validator_client/http_metrics/Cargo.toml new file mode 100644 index 0000000000..a9de26a55b --- /dev/null +++ b/validator_client/http_metrics/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "validator_http_metrics" +version = "0.1.0" +edition = { workspace = true } +authors = ["Sigma Prime "] + +[dependencies] +malloc_utils = { workspace = true } +slot_clock = { workspace = true } +metrics = { workspace = true } +parking_lot = { workspace = true } +serde = { workspace = true } +slog = { workspace = true } +warp_utils = { workspace = true } +warp = { workspace = true } +lighthouse_version = { workspace = true } +validator_services = { workspace = true } +validator_store = { workspace = true } +validator_metrics = { workspace = true } +types = { workspace = true } diff --git a/validator_client/src/http_metrics/mod.rs b/validator_client/http_metrics/src/lib.rs similarity index 68% rename from validator_client/src/http_metrics/mod.rs rename to validator_client/http_metrics/src/lib.rs index 67cab2bdc3..984b752e5a 100644 --- a/validator_client/src/http_metrics/mod.rs +++ b/validator_client/http_metrics/src/lib.rs @@ -1,18 +1,20 @@ //! This crate provides a HTTP server that is solely dedicated to serving the `/metrics` endpoint. //! //! For other endpoints, see the `http_api` crate. -pub mod metrics; -use crate::{DutiesService, ValidatorStore}; use lighthouse_version::version_with_platform; +use malloc_utils::scrape_allocator_metrics; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; use slog::{crit, info, Logger}; -use slot_clock::SystemTimeSlotClock; +use slot_clock::{SlotClock, SystemTimeSlotClock}; use std::future::Future; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; use types::EthSpec; +use validator_services::duties_service::DutiesService; +use validator_store::ValidatorStore; use warp::{http::Response, Filter}; #[derive(Debug)] @@ -120,7 +122,7 @@ pub fn serve( .map(move || inner_ctx.clone()) .and_then(|ctx: Arc>| async move { Ok::<_, warp::Rejection>( - metrics::gather_prometheus_metrics(&ctx) + gather_prometheus_metrics(&ctx) .map(|body| { Response::builder() .status(200) @@ -156,3 +158,59 @@ pub fn serve( Ok((listening_socket, server)) } + +pub fn gather_prometheus_metrics( + ctx: &Context, +) -> std::result::Result { + use validator_metrics::*; + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + + { + let shared = ctx.shared.read(); + + if let Some(genesis_time) = shared.genesis_time { + if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) { + let distance = now.as_secs() as i64 - genesis_time as i64; + set_gauge(&GENESIS_DISTANCE, distance); + } + } + + if let Some(duties_service) = &shared.duties_service { + if let Some(slot) = duties_service.slot_clock.now() { + let current_epoch = slot.epoch(E::slots_per_epoch()); + let next_epoch = current_epoch + 1; + + set_int_gauge( + &PROPOSER_COUNT, + &[CURRENT_EPOCH], + duties_service.proposer_count(current_epoch) as i64, + ); + set_int_gauge( + &ATTESTER_COUNT, + &[CURRENT_EPOCH], + duties_service.attester_count(current_epoch) as i64, + ); + set_int_gauge( + &ATTESTER_COUNT, + &[NEXT_EPOCH], + duties_service.attester_count(next_epoch) as i64, + ); + } + } + } + + // It's important to ensure these metrics are explicitly enabled in the case that users aren't + // using glibc and this function causes panics. + if ctx.config.allocator_metrics_enabled { + scrape_allocator_metrics(); + } + + warp_utils::metrics::scrape_health_metrics(); + + encoder + .encode(&metrics::gather(), &mut buffer) + .map_err(|e| format!("{e:?}"))?; + + String::from_utf8(buffer).map_err(|e| format!("Failed to encode prometheus info: {:?}", e)) +} diff --git a/validator_client/initialized_validators/Cargo.toml b/validator_client/initialized_validators/Cargo.toml new file mode 100644 index 0000000000..426cb303f6 --- /dev/null +++ b/validator_client/initialized_validators/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "initialized_validators" +version = "0.1.0" +edition = { workspace = true } +authors = ["Sigma Prime "] + +[dependencies] +signing_method = { workspace = true } +account_utils = { workspace = true } +eth2_keystore = { workspace = true } +metrics = { workspace = true } +lockfile = { workspace = true } +parking_lot = { workspace = true } +reqwest = { workspace = true } +slog = { workspace = true } +types = { workspace = true } +url = { workspace = true } +validator_dir = { workspace = true } +rand = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +bls = { workspace = true } +tokio = { workspace = true } +bincode = { workspace = true } +filesystem = { workspace = true } +validator_metrics = { workspace = true } diff --git a/validator_client/src/key_cache.rs b/validator_client/initialized_validators/src/key_cache.rs similarity index 100% rename from validator_client/src/key_cache.rs rename to validator_client/initialized_validators/src/key_cache.rs diff --git a/validator_client/src/initialized_validators.rs b/validator_client/initialized_validators/src/lib.rs similarity index 98% rename from validator_client/src/initialized_validators.rs rename to validator_client/initialized_validators/src/lib.rs index 0ef9a6a13d..0b36dbd62c 100644 --- a/validator_client/src/initialized_validators.rs +++ b/validator_client/initialized_validators/src/lib.rs @@ -6,7 +6,8 @@ //! The `InitializedValidators` struct in this file serves as the source-of-truth of which //! validators are managed by this validator client. -use crate::signing_method::SigningMethod; +pub mod key_cache; + use account_utils::{ read_password, read_password_from_user, read_password_string, validator_definitions::{ @@ -20,6 +21,8 @@ use lockfile::{Lockfile, LockfileError}; use metrics::set_gauge; use parking_lot::{MappedMutexGuard, Mutex, MutexGuard}; use reqwest::{Certificate, Client, Error as ReqwestError, Identity}; +use serde::{Deserialize, Serialize}; +use signing_method::SigningMethod; use slog::{debug, error, info, warn, Logger}; use std::collections::{HashMap, HashSet}; use std::fs::{self, File}; @@ -32,9 +35,7 @@ use types::{Address, Graffiti, Keypair, PublicKey, PublicKeyBytes}; use url::{ParseError, Url}; use validator_dir::Builder as ValidatorDirBuilder; -use crate::key_cache; -use crate::key_cache::KeyCache; -use crate::Config; +use key_cache::KeyCache; /// Default timeout for a request to a remote signer for a signature. /// @@ -45,6 +46,24 @@ const DEFAULT_REMOTE_SIGNER_REQUEST_TIMEOUT: Duration = Duration::from_secs(12); // Use TTY instead of stdin to capture passwords from users. const USE_STDIN: bool = false; +pub const DEFAULT_WEB3SIGNER_KEEP_ALIVE: Option = Some(Duration::from_secs(20)); + +// The configuration for initialised validators. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Config { + pub web3_signer_keep_alive_timeout: Option, + pub web3_signer_max_idle_connections: Option, +} + +impl Default for Config { + fn default() -> Self { + Config { + web3_signer_keep_alive_timeout: DEFAULT_WEB3SIGNER_KEEP_ALIVE, + web3_signer_max_idle_connections: None, + } + } +} + pub enum OnDecryptFailure { /// If the key cache fails to decrypt, create a new cache. CreateNew, @@ -1194,7 +1213,7 @@ impl InitializedValidators { /// A validator is considered "already known" and skipped if the public key is already known. /// I.e., if there are two different definitions with the same public key then the second will /// be ignored. - pub(crate) async fn update_validators(&mut self) -> Result<(), Error> { + pub async fn update_validators(&mut self) -> Result<(), Error> { //use key cache if available let mut key_stores = HashMap::new(); @@ -1380,11 +1399,11 @@ impl InitializedValidators { // Update the enabled and total validator counts set_gauge( - &crate::http_metrics::metrics::ENABLED_VALIDATORS_COUNT, + &validator_metrics::ENABLED_VALIDATORS_COUNT, self.num_enabled() as i64, ); set_gauge( - &crate::http_metrics::metrics::TOTAL_VALIDATORS_COUNT, + &validator_metrics::TOTAL_VALIDATORS_COUNT, self.num_total() as i64, ); Ok(()) diff --git a/validator_client/signing_method/Cargo.toml b/validator_client/signing_method/Cargo.toml new file mode 100644 index 0000000000..0f3852eff6 --- /dev/null +++ b/validator_client/signing_method/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "signing_method" +version = "0.1.0" +edition = { workspace = true } +authors = ["Sigma Prime "] + +[dependencies] +eth2_keystore = { workspace = true } +lockfile = { workspace = true } +parking_lot = { workspace = true } +reqwest = { workspace = true } +task_executor = { workspace = true } +types = { workspace = true } +url = { workspace = true } +validator_metrics = { workspace = true } +serde = { workspace = true } +ethereum_serde_utils = { workspace = true } diff --git a/validator_client/src/signing_method.rs b/validator_client/signing_method/src/lib.rs similarity index 96% rename from validator_client/src/signing_method.rs rename to validator_client/signing_method/src/lib.rs index d89c9b8229..2fe4af39d3 100644 --- a/validator_client/src/signing_method.rs +++ b/validator_client/signing_method/src/lib.rs @@ -3,7 +3,6 @@ //! - Via a local `Keypair`. //! - Via a remote signer (Web3Signer) -use crate::http_metrics::metrics; use eth2_keystore::Keystore; use lockfile::Lockfile; use parking_lot::Mutex; @@ -166,8 +165,10 @@ impl SigningMethod { ) -> Result { match self { SigningMethod::LocalKeystore { voting_keypair, .. } => { - let _timer = - metrics::start_timer_vec(&metrics::SIGNING_TIMES, &[metrics::LOCAL_KEYSTORE]); + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::SIGNING_TIMES, + &[validator_metrics::LOCAL_KEYSTORE], + ); let voting_keypair = voting_keypair.clone(); // Spawn a blocking task to produce the signature. This avoids blocking the core @@ -187,8 +188,10 @@ impl SigningMethod { http_client, .. } => { - let _timer = - metrics::start_timer_vec(&metrics::SIGNING_TIMES, &[metrics::WEB3SIGNER]); + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::SIGNING_TIMES, + &[validator_metrics::WEB3SIGNER], + ); // Map the message into a Web3Signer type. let object = match signable_message { diff --git a/validator_client/src/signing_method/web3signer.rs b/validator_client/signing_method/src/web3signer.rs similarity index 100% rename from validator_client/src/signing_method/web3signer.rs rename to validator_client/signing_method/src/web3signer.rs diff --git a/validator_client/src/check_synced.rs b/validator_client/src/check_synced.rs deleted file mode 100644 index 2e9a62ff65..0000000000 --- a/validator_client/src/check_synced.rs +++ /dev/null @@ -1,27 +0,0 @@ -use crate::beacon_node_fallback::CandidateError; -use eth2::{types::Slot, BeaconNodeHttpClient}; -use slog::{warn, Logger}; - -pub async fn check_node_health( - beacon_node: &BeaconNodeHttpClient, - log: &Logger, -) -> Result<(Slot, bool, bool), CandidateError> { - let resp = match beacon_node.get_node_syncing().await { - Ok(resp) => resp, - Err(e) => { - warn!( - log, - "Unable connect to beacon node"; - "error" => %e - ); - - return Err(CandidateError::Offline); - } - }; - - Ok(( - resp.data.head_slot, - resp.data.is_optimistic, - resp.data.el_offline, - )) -} diff --git a/validator_client/src/config.rs b/validator_client/src/config.rs index f42ed55146..abdadeb393 100644 --- a/validator_client/src/config.rs +++ b/validator_client/src/config.rs @@ -1,8 +1,4 @@ -use crate::beacon_node_fallback::ApiTopic; -use crate::graffiti_file::GraffitiFile; -use crate::{ - beacon_node_fallback, beacon_node_health::BeaconNodeSyncDistanceTiers, http_api, http_metrics, -}; +use beacon_node_fallback::{beacon_node_health::BeaconNodeSyncDistanceTiers, ApiTopic}; use clap::ArgMatches; use clap_utils::{flags::DISABLE_MALLOC_TUNING_FLAG, parse_optional, parse_required}; use directory::{ @@ -10,6 +6,8 @@ use directory::{ DEFAULT_VALIDATOR_DIR, }; use eth2::types::Graffiti; +use graffiti_file::GraffitiFile; +use initialized_validators::Config as InitializedValidatorsConfig; use sensitive_url::SensitiveUrl; use serde::{Deserialize, Serialize}; use slog::{info, warn, Logger}; @@ -19,13 +17,18 @@ use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; use types::{Address, GRAFFITI_BYTES_LEN}; +use validator_http_api; +use validator_http_metrics; +use validator_store::Config as ValidatorStoreConfig; pub const DEFAULT_BEACON_NODE: &str = "http://localhost:5052/"; -pub const DEFAULT_WEB3SIGNER_KEEP_ALIVE: Option = Some(Duration::from_secs(20)); /// Stores the core configuration for this validator instance. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Config { + /// Configuration parameters for the validator store. + #[serde(flatten)] + pub validator_store: ValidatorStoreConfig, /// The data directory, which stores all validator databases pub validator_dir: PathBuf, /// The directory containing the passwords to unlock validator keystores. @@ -49,12 +52,10 @@ pub struct Config { pub graffiti: Option, /// Graffiti file to load per validator graffitis. pub graffiti_file: Option, - /// Fallback fallback address. - pub fee_recipient: Option
, /// Configuration for the HTTP REST API. - pub http_api: http_api::Config, + pub http_api: validator_http_api::Config, /// Configuration for the HTTP REST API. - pub http_metrics: http_metrics::Config, + pub http_metrics: validator_http_metrics::Config, /// Configuration for the Beacon Node fallback. pub beacon_node_fallback: beacon_node_fallback::Config, /// Configuration for sending metrics to a remote explorer endpoint. @@ -68,11 +69,7 @@ pub struct Config { /// (<= 64 validators) pub enable_high_validator_count_metrics: bool, /// Enable use of the blinded block endpoints during proposals. - pub builder_proposals: bool, - /// Overrides the timestamp field in builder api ValidatorRegistrationV1 pub builder_registration_timestamp_override: Option, - /// Fallback gas limit. - pub gas_limit: Option, /// A list of custom certificates that the validator client will additionally use when /// connecting to a beacon node over SSL/TLS. pub beacon_nodes_tls_certs: Option>, @@ -82,16 +79,11 @@ pub struct Config { pub enable_latency_measurement_service: bool, /// Defines the number of validators per `validator/register_validator` request sent to the BN. pub validator_registration_batch_size: usize, - /// Enable slashing protection even while using web3signer keys. - pub enable_web3signer_slashing_protection: bool, - /// Specifies the boost factor, a percentage multiplier to apply to the builder's payload value. - pub builder_boost_factor: Option, - /// If true, Lighthouse will prefer builder proposals, if available. - pub prefer_builder_proposals: bool, /// Whether we are running with distributed network support. pub distributed: bool, - pub web3_signer_keep_alive_timeout: Option, - pub web3_signer_max_idle_connections: Option, + /// Configuration for the initialized validators + #[serde(flatten)] + pub initialized_validators: InitializedValidatorsConfig, } impl Default for Config { @@ -109,6 +101,7 @@ impl Default for Config { let beacon_nodes = vec![SensitiveUrl::parse(DEFAULT_BEACON_NODE) .expect("beacon_nodes must always be a valid url.")]; Self { + validator_store: ValidatorStoreConfig::default(), validator_dir, secrets_dir, beacon_nodes, @@ -119,7 +112,6 @@ impl Default for Config { use_long_timeouts: false, graffiti: None, graffiti_file: None, - fee_recipient: None, http_api: <_>::default(), http_metrics: <_>::default(), beacon_node_fallback: <_>::default(), @@ -127,18 +119,12 @@ impl Default for Config { enable_doppelganger_protection: false, enable_high_validator_count_metrics: false, beacon_nodes_tls_certs: None, - builder_proposals: false, builder_registration_timestamp_override: None, - gas_limit: None, broadcast_topics: vec![ApiTopic::Subscriptions], enable_latency_measurement_service: true, validator_registration_batch_size: 500, - enable_web3signer_slashing_protection: true, - builder_boost_factor: None, - prefer_builder_proposals: false, distributed: false, - web3_signer_keep_alive_timeout: DEFAULT_WEB3SIGNER_KEEP_ALIVE, - web3_signer_max_idle_connections: None, + initialized_validators: <_>::default(), } } } @@ -233,7 +219,7 @@ impl Config { if let Some(input_fee_recipient) = parse_optional::
(cli_args, "suggested-fee-recipient")? { - config.fee_recipient = Some(input_fee_recipient); + config.validator_store.fee_recipient = Some(input_fee_recipient); } if let Some(tls_certs) = parse_optional::(cli_args, "beacon-nodes-tls-certs")? { @@ -270,7 +256,7 @@ impl Config { * Web3 signer */ if let Some(s) = parse_optional::(cli_args, "web3-signer-keep-alive-timeout")? { - config.web3_signer_keep_alive_timeout = if s == "null" { + config.initialized_validators.web3_signer_keep_alive_timeout = if s == "null" { None } else { Some(Duration::from_millis( @@ -279,7 +265,9 @@ impl Config { } } if let Some(n) = parse_optional::(cli_args, "web3-signer-max-idle-connections")? { - config.web3_signer_max_idle_connections = Some(n); + config + .initialized_validators + .web3_signer_max_idle_connections = Some(n); } /* @@ -382,14 +370,14 @@ impl Config { } if cli_args.get_flag("builder-proposals") { - config.builder_proposals = true; + config.validator_store.builder_proposals = true; } if cli_args.get_flag("prefer-builder-proposals") { - config.prefer_builder_proposals = true; + config.validator_store.prefer_builder_proposals = true; } - config.gas_limit = cli_args + config.validator_store.gas_limit = cli_args .get_one::("gas-limit") .map(|gas_limit| { gas_limit @@ -408,7 +396,8 @@ impl Config { ); } - config.builder_boost_factor = parse_optional(cli_args, "builder-boost-factor")?; + config.validator_store.builder_boost_factor = + parse_optional(cli_args, "builder-boost-factor")?; config.enable_latency_measurement_service = !cli_args.get_flag("disable-latency-measurement-service"); @@ -419,7 +408,7 @@ impl Config { return Err("validator-registration-batch-size cannot be 0".to_string()); } - config.enable_web3signer_slashing_protection = + config.validator_store.enable_web3signer_slashing_protection = if cli_args.get_flag("disable-slashing-protection-web3signer") { warn!( log, diff --git a/validator_client/src/latency.rs b/validator_client/src/latency.rs index 7e752f2923..22f02c7c0b 100644 --- a/validator_client/src/latency.rs +++ b/validator_client/src/latency.rs @@ -1,4 +1,4 @@ -use crate::{http_metrics::metrics, BeaconNodeFallback}; +use beacon_node_fallback::BeaconNodeFallback; use environment::RuntimeContext; use slog::debug; use slot_clock::SlotClock; @@ -44,14 +44,14 @@ pub fn start_latency_service( "node" => &measurement.beacon_node_id, "latency" => latency.as_millis(), ); - metrics::observe_timer_vec( - &metrics::VC_BEACON_NODE_LATENCY, + validator_metrics::observe_timer_vec( + &validator_metrics::VC_BEACON_NODE_LATENCY, &[&measurement.beacon_node_id], latency, ); if i == 0 { - metrics::observe_duration( - &metrics::VC_BEACON_NODE_LATENCY_PRIMARY_ENDPOINT, + validator_metrics::observe_duration( + &validator_metrics::VC_BEACON_NODE_LATENCY_PRIMARY_ENDPOINT, latency, ); } diff --git a/validator_client/src/lib.rs b/validator_client/src/lib.rs index 05ec1e53aa..2cc22357fb 100644 --- a/validator_client/src/lib.rs +++ b/validator_client/src/lib.rs @@ -1,52 +1,28 @@ -mod attestation_service; -mod beacon_node_fallback; -mod beacon_node_health; -mod block_service; -mod check_synced; mod cli; -mod duties_service; -mod graffiti_file; -mod http_metrics; -mod key_cache; +pub mod config; mod latency; mod notifier; -mod preparation_service; -mod signing_method; -mod sync_committee_service; -pub mod config; -mod doppelganger_service; -pub mod http_api; -pub mod initialized_validators; -pub mod validator_store; - -pub use beacon_node_fallback::ApiTopic; -pub use beacon_node_health::BeaconNodeSyncDistanceTiers; pub use cli::cli_app; pub use config::Config; use initialized_validators::InitializedValidators; use metrics::set_gauge; use monitoring_api::{MonitoringHttpClient, ProcessType}; use sensitive_url::SensitiveUrl; -pub use slashing_protection::{SlashingDatabase, SLASHING_PROTECTION_FILENAME}; +use slashing_protection::{SlashingDatabase, SLASHING_PROTECTION_FILENAME}; -use crate::beacon_node_fallback::{ +use beacon_node_fallback::{ start_fallback_updater_service, BeaconNodeFallback, CandidateBeaconNode, }; -use crate::doppelganger_service::DoppelgangerService; -use crate::graffiti_file::GraffitiFile; -use crate::initialized_validators::Error::UnableToOpenVotingKeystore; + use account_utils::validator_definitions::ValidatorDefinitions; -use attestation_service::{AttestationService, AttestationServiceBuilder}; -use block_service::{BlockService, BlockServiceBuilder}; use clap::ArgMatches; -use duties_service::{sync::SyncDutiesMap, DutiesService}; +use doppelganger_service::DoppelgangerService; use environment::RuntimeContext; -use eth2::{reqwest::ClientBuilder, types::Graffiti, BeaconNodeHttpClient, StatusCode, Timeouts}; -use http_api::ApiSecret; +use eth2::{reqwest::ClientBuilder, BeaconNodeHttpClient, StatusCode, Timeouts}; +use initialized_validators::Error::UnableToOpenVotingKeystore; use notifier::spawn_notifier; use parking_lot::RwLock; -use preparation_service::{PreparationService, PreparationServiceBuilder}; use reqwest::Certificate; use slog::{debug, error, info, warn, Logger}; use slot_clock::SlotClock; @@ -58,12 +34,20 @@ use std::net::SocketAddr; use std::path::Path; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; -use sync_committee_service::SyncCommitteeService; use tokio::{ sync::mpsc, time::{sleep, Duration}, }; -use types::{EthSpec, Hash256, PublicKeyBytes}; +use types::{EthSpec, Hash256}; +use validator_http_api::ApiSecret; +use validator_services::{ + attestation_service::{AttestationService, AttestationServiceBuilder}, + block_service::{BlockService, BlockServiceBuilder}, + duties_service::{self, DutiesService}, + preparation_service::{PreparationService, PreparationServiceBuilder}, + sync::SyncDutiesMap, + sync_committee_service::SyncCommitteeService, +}; use validator_store::ValidatorStore; /// The interval between attempts to contact the beacon node during startup. @@ -152,22 +136,23 @@ impl ProductionValidatorClient { ); // Optionally start the metrics server. - let http_metrics_ctx = if config.http_metrics.enabled { - let shared = http_metrics::Shared { + let validator_metrics_ctx = if config.http_metrics.enabled { + let shared = validator_http_metrics::Shared { validator_store: None, genesis_time: None, duties_service: None, }; - let ctx: Arc> = Arc::new(http_metrics::Context { - config: config.http_metrics.clone(), - shared: RwLock::new(shared), - log: log.clone(), - }); + let ctx: Arc> = + Arc::new(validator_http_metrics::Context { + config: config.http_metrics.clone(), + shared: RwLock::new(shared), + log: log.clone(), + }); let exit = context.executor.exit(); - let (_listen_addr, server) = http_metrics::serve(ctx.clone(), exit) + let (_listen_addr, server) = validator_http_metrics::serve(ctx.clone(), exit) .map_err(|e| format!("Unable to start metrics API server: {:?}", e))?; context @@ -215,7 +200,7 @@ impl ProductionValidatorClient { let validators = InitializedValidators::from_definitions( validator_defs, config.validator_dir.clone(), - config.clone(), + config.initialized_validators.clone(), log.clone(), ) .await @@ -384,20 +369,20 @@ impl ProductionValidatorClient { // Set the count for beacon node fallbacks excluding the primary beacon node. set_gauge( - &http_metrics::metrics::ETH2_FALLBACK_CONFIGURED, + &validator_metrics::ETH2_FALLBACK_CONFIGURED, num_nodes.saturating_sub(1) as i64, ); // Set the total beacon node count. set_gauge( - &http_metrics::metrics::TOTAL_BEACON_NODES_COUNT, + &validator_metrics::TOTAL_BEACON_NODES_COUNT, num_nodes as i64, ); // Initialize the number of connected, synced beacon nodes to 0. - set_gauge(&http_metrics::metrics::ETH2_FALLBACK_CONNECTED, 0); - set_gauge(&http_metrics::metrics::SYNCED_BEACON_NODES_COUNT, 0); + set_gauge(&validator_metrics::ETH2_FALLBACK_CONNECTED, 0); + set_gauge(&validator_metrics::SYNCED_BEACON_NODES_COUNT, 0); // Initialize the number of connected, avaliable beacon nodes to 0. - set_gauge(&http_metrics::metrics::AVAILABLE_BEACON_NODES_COUNT, 0); + set_gauge(&validator_metrics::AVAILABLE_BEACON_NODES_COUNT, 0); let mut beacon_nodes: BeaconNodeFallback<_, E> = BeaconNodeFallback::new( candidates, @@ -422,7 +407,7 @@ impl ProductionValidatorClient { }; // Update the metrics server. - if let Some(ctx) = &http_metrics_ctx { + if let Some(ctx) = &validator_metrics_ctx { ctx.shared.write().genesis_time = Some(genesis_time); } @@ -459,7 +444,7 @@ impl ProductionValidatorClient { context.eth2_config.spec.clone(), doppelganger_service.clone(), slot_clock.clone(), - &config, + &config.validator_store, context.executor.clone(), log.clone(), )); @@ -496,7 +481,7 @@ impl ProductionValidatorClient { }); // Update the metrics server. - if let Some(ctx) = &http_metrics_ctx { + if let Some(ctx) = &validator_metrics_ctx { ctx.shared.write().validator_store = Some(validator_store.clone()); ctx.shared.write().duties_service = Some(duties_service.clone()); } @@ -569,7 +554,7 @@ impl ProductionValidatorClient { let api_secret = ApiSecret::create_or_open(&self.config.validator_dir)?; self.http_api_listen_addr = if self.config.http_api.enabled { - let ctx = Arc::new(http_api::Context { + let ctx = Arc::new(validator_http_api::Context { task_executor: self.context.executor.clone(), api_secret, block_service: Some(self.block_service.clone()), @@ -588,7 +573,7 @@ impl ProductionValidatorClient { let exit = self.context.executor.exit(); - let (listen_addr, server) = http_api::serve(ctx, exit) + let (listen_addr, server) = validator_http_api::serve(ctx, exit) .map_err(|e| format!("Unable to start HTTP API server: {:?}", e))?; self.context @@ -850,24 +835,3 @@ pub fn load_pem_certificate>(pem_path: P) -> Result, - validator_definition_graffiti: Option, - graffiti_flag: Option, -) -> Option { - graffiti_file - .and_then(|mut g| match g.load_graffiti(validator_pubkey) { - Ok(g) => g, - Err(e) => { - warn!(log, "Failed to read graffiti file"; "error" => ?e); - None - } - }) - .or(validator_definition_graffiti) - .or(graffiti_flag) -} diff --git a/validator_client/src/notifier.rs b/validator_client/src/notifier.rs index cda13a5e63..ff66517795 100644 --- a/validator_client/src/notifier.rs +++ b/validator_client/src/notifier.rs @@ -1,4 +1,3 @@ -use crate::http_metrics; use crate::{DutiesService, ProductionValidatorClient}; use metrics::set_gauge; use slog::{debug, error, info, Logger}; @@ -45,15 +44,15 @@ async fn notify( let num_synced_fallback = num_synced.saturating_sub(1); set_gauge( - &http_metrics::metrics::AVAILABLE_BEACON_NODES_COUNT, + &validator_metrics::AVAILABLE_BEACON_NODES_COUNT, num_available as i64, ); set_gauge( - &http_metrics::metrics::SYNCED_BEACON_NODES_COUNT, + &validator_metrics::SYNCED_BEACON_NODES_COUNT, num_synced as i64, ); set_gauge( - &http_metrics::metrics::TOTAL_BEACON_NODES_COUNT, + &validator_metrics::TOTAL_BEACON_NODES_COUNT, num_total as i64, ); if num_synced > 0 { @@ -79,9 +78,9 @@ async fn notify( ) } if num_synced_fallback > 0 { - set_gauge(&http_metrics::metrics::ETH2_FALLBACK_CONNECTED, 1); + set_gauge(&validator_metrics::ETH2_FALLBACK_CONNECTED, 1); } else { - set_gauge(&http_metrics::metrics::ETH2_FALLBACK_CONNECTED, 0); + set_gauge(&validator_metrics::ETH2_FALLBACK_CONNECTED, 0); } for info in candidate_info { diff --git a/validator_client/validator_metrics/Cargo.toml b/validator_client/validator_metrics/Cargo.toml new file mode 100644 index 0000000000..b3cf665b26 --- /dev/null +++ b/validator_client/validator_metrics/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "validator_metrics" +version = "0.1.0" +edition = { workspace = true } +authors = ["Sigma Prime "] + +[lib] +name = "validator_metrics" +path = "src/lib.rs" + +[dependencies] +metrics = { workspace = true } diff --git a/validator_client/src/http_metrics/metrics.rs b/validator_client/validator_metrics/src/lib.rs similarity index 82% rename from validator_client/src/http_metrics/metrics.rs rename to validator_client/validator_metrics/src/lib.rs index 57e1080fd9..060d8a4edd 100644 --- a/validator_client/src/http_metrics/metrics.rs +++ b/validator_client/validator_metrics/src/lib.rs @@ -1,9 +1,4 @@ -use super::Context; -use malloc_utils::scrape_allocator_metrics; -use slot_clock::SlotClock; use std::sync::LazyLock; -use std::time::{SystemTime, UNIX_EPOCH}; -use types::EthSpec; pub const SUCCESS: &str = "success"; pub const SLASHABLE: &str = "slashable"; @@ -267,56 +262,3 @@ pub static VC_BEACON_NODE_LATENCY_PRIMARY_ENDPOINT: LazyLock> "Round-trip latency for the primary BN endpoint", ) }); - -pub fn gather_prometheus_metrics( - ctx: &Context, -) -> std::result::Result { - let mut buffer = vec![]; - let encoder = TextEncoder::new(); - - { - let shared = ctx.shared.read(); - - if let Some(genesis_time) = shared.genesis_time { - if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) { - let distance = now.as_secs() as i64 - genesis_time as i64; - set_gauge(&GENESIS_DISTANCE, distance); - } - } - - if let Some(duties_service) = &shared.duties_service { - if let Some(slot) = duties_service.slot_clock.now() { - let current_epoch = slot.epoch(E::slots_per_epoch()); - let next_epoch = current_epoch + 1; - - set_int_gauge( - &PROPOSER_COUNT, - &[CURRENT_EPOCH], - duties_service.proposer_count(current_epoch) as i64, - ); - set_int_gauge( - &ATTESTER_COUNT, - &[CURRENT_EPOCH], - duties_service.attester_count(current_epoch) as i64, - ); - set_int_gauge( - &ATTESTER_COUNT, - &[NEXT_EPOCH], - duties_service.attester_count(next_epoch) as i64, - ); - } - } - } - - // It's important to ensure these metrics are explicitly enabled in the case that users aren't - // using glibc and this function causes panics. - if ctx.config.allocator_metrics_enabled { - scrape_allocator_metrics(); - } - - warp_utils::metrics::scrape_health_metrics(); - - encoder.encode(&metrics::gather(), &mut buffer).unwrap(); - - String::from_utf8(buffer).map_err(|e| format!("Failed to encode prometheus info: {:?}", e)) -} diff --git a/validator_client/validator_services/Cargo.toml b/validator_client/validator_services/Cargo.toml new file mode 100644 index 0000000000..7dcd815541 --- /dev/null +++ b/validator_client/validator_services/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "validator_services" +version = "0.1.0" +edition = { workspace = true } +authors = ["Sigma Prime "] + +[dependencies] +beacon_node_fallback = { workspace = true } +validator_metrics = { workspace = true } +validator_store = { workspace = true } +graffiti_file = { workspace = true } +doppelganger_service = { workspace = true } +environment = { workspace = true } +eth2 = { workspace = true } +futures = { workspace = true } +parking_lot = { workspace = true } +safe_arith = { workspace = true } +slog = { workspace = true } +slot_clock = { workspace = true } +tokio = { workspace = true } +types = { workspace = true } +tree_hash = { workspace = true } +bls = { workspace = true } diff --git a/validator_client/src/attestation_service.rs b/validator_client/validator_services/src/attestation_service.rs similarity index 95% rename from validator_client/src/attestation_service.rs rename to validator_client/validator_services/src/attestation_service.rs index 5363f36f66..e31ad4f661 100644 --- a/validator_client/src/attestation_service.rs +++ b/validator_client/validator_services/src/attestation_service.rs @@ -1,9 +1,5 @@ -use crate::beacon_node_fallback::{ApiTopic, BeaconNodeFallback}; -use crate::{ - duties_service::{DutiesService, DutyAndProof}, - http_metrics::metrics, - validator_store::{Error as ValidatorStoreError, ValidatorStore}, -}; +use crate::duties_service::{DutiesService, DutyAndProof}; +use beacon_node_fallback::{ApiTopic, BeaconNodeFallback}; use environment::RuntimeContext; use futures::future::join_all; use slog::{crit, debug, error, info, trace, warn}; @@ -14,8 +10,10 @@ use std::sync::Arc; use tokio::time::{sleep, sleep_until, Duration, Instant}; use tree_hash::TreeHash; use types::{Attestation, AttestationData, ChainSpec, CommitteeIndex, EthSpec, Slot}; +use validator_store::{Error as ValidatorStoreError, ValidatorStore}; /// Builds an `AttestationService`. +#[derive(Default)] pub struct AttestationServiceBuilder { duties_service: Option>>, validator_store: Option>>, @@ -238,9 +236,9 @@ impl AttestationService { aggregate_production_instant: Instant, ) -> Result<(), ()> { let log = self.context.log(); - let attestations_timer = metrics::start_timer_vec( - &metrics::ATTESTATION_SERVICE_TIMES, - &[metrics::ATTESTATIONS], + let attestations_timer = validator_metrics::start_timer_vec( + &validator_metrics::ATTESTATION_SERVICE_TIMES, + &[validator_metrics::ATTESTATIONS], ); // There's not need to produce `Attestation` or `SignedAggregateAndProof` if we do not have @@ -278,9 +276,9 @@ impl AttestationService { sleep_until(aggregate_production_instant).await; // Start the metrics timer *after* we've done the delay. - let _aggregates_timer = metrics::start_timer_vec( - &metrics::ATTESTATION_SERVICE_TIMES, - &[metrics::AGGREGATES], + let _aggregates_timer = validator_metrics::start_timer_vec( + &validator_metrics::ATTESTATION_SERVICE_TIMES, + &[validator_metrics::AGGREGATES], ); // Then download, sign and publish a `SignedAggregateAndProof` for each @@ -339,9 +337,9 @@ impl AttestationService { let attestation_data = self .beacon_nodes .first_success(|beacon_node| async move { - let _timer = metrics::start_timer_vec( - &metrics::ATTESTATION_SERVICE_TIMES, - &[metrics::ATTESTATIONS_HTTP_GET], + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::ATTESTATION_SERVICE_TIMES, + &[validator_metrics::ATTESTATIONS_HTTP_GET], ); beacon_node .get_validator_attestation_data(slot, committee_index) @@ -454,9 +452,9 @@ impl AttestationService { match self .beacon_nodes .request(ApiTopic::Attestations, |beacon_node| async move { - let _timer = metrics::start_timer_vec( - &metrics::ATTESTATION_SERVICE_TIMES, - &[metrics::ATTESTATIONS_HTTP_POST], + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::ATTESTATION_SERVICE_TIMES, + &[validator_metrics::ATTESTATIONS_HTTP_POST], ); if fork_name.electra_enabled() { beacon_node @@ -531,9 +529,9 @@ impl AttestationService { let aggregated_attestation = &self .beacon_nodes .first_success(|beacon_node| async move { - let _timer = metrics::start_timer_vec( - &metrics::ATTESTATION_SERVICE_TIMES, - &[metrics::AGGREGATES_HTTP_GET], + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::ATTESTATION_SERVICE_TIMES, + &[validator_metrics::AGGREGATES_HTTP_GET], ); if fork_name.electra_enabled() { beacon_node @@ -620,9 +618,9 @@ impl AttestationService { match self .beacon_nodes .first_success(|beacon_node| async move { - let _timer = metrics::start_timer_vec( - &metrics::ATTESTATION_SERVICE_TIMES, - &[metrics::AGGREGATES_HTTP_POST], + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::ATTESTATION_SERVICE_TIMES, + &[validator_metrics::AGGREGATES_HTTP_POST], ); if fork_name.electra_enabled() { beacon_node diff --git a/validator_client/src/block_service.rs b/validator_client/validator_services/src/block_service.rs similarity index 94% rename from validator_client/src/block_service.rs rename to validator_client/validator_services/src/block_service.rs index 9903324cad..60eb0361ad 100644 --- a/validator_client/src/block_service.rs +++ b/validator_client/validator_services/src/block_service.rs @@ -1,17 +1,9 @@ -use crate::beacon_node_fallback::{Error as FallbackError, Errors}; -use crate::{ - beacon_node_fallback::{ApiTopic, BeaconNodeFallback}, - determine_graffiti, - graffiti_file::GraffitiFile, -}; -use crate::{ - http_metrics::metrics, - validator_store::{Error as ValidatorStoreError, ValidatorStore}, -}; +use beacon_node_fallback::{ApiTopic, BeaconNodeFallback, Error as FallbackError, Errors}; use bls::SignatureBytes; use environment::RuntimeContext; use eth2::types::{FullBlockContents, PublishBlockRequest}; use eth2::{BeaconNodeHttpClient, StatusCode}; +use graffiti_file::{determine_graffiti, GraffitiFile}; use slog::{crit, debug, error, info, trace, warn, Logger}; use slot_clock::SlotClock; use std::fmt::Debug; @@ -24,6 +16,7 @@ use types::{ BlindedBeaconBlock, BlockType, EthSpec, Graffiti, PublicKeyBytes, SignedBlindedBeaconBlock, Slot, }; +use validator_store::{Error as ValidatorStoreError, ValidatorStore}; #[derive(Debug)] pub enum BlockError { @@ -50,6 +43,7 @@ impl From> for BlockError { } /// Builds a `BlockService`. +#[derive(Default)] pub struct BlockServiceBuilder { validator_store: Option>>, slot_clock: Option>, @@ -186,8 +180,8 @@ impl ProposerFallback { pub struct Inner { validator_store: Arc>, slot_clock: Arc, - pub(crate) beacon_nodes: Arc>, - pub(crate) proposer_nodes: Option>>, + pub beacon_nodes: Arc>, + pub proposer_nodes: Option>>, context: RuntimeContext, graffiti: Option, graffiti_file: Option, @@ -247,8 +241,10 @@ impl BlockService { /// Attempt to produce a block for any block producers in the `ValidatorStore`. async fn do_update(&self, notification: BlockServiceNotification) -> Result<(), ()> { let log = self.context.log(); - let _timer = - metrics::start_timer_vec(&metrics::BLOCK_SERVICE_TIMES, &[metrics::FULL_UPDATE]); + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::BLOCK_SERVICE_TIMES, + &[validator_metrics::FULL_UPDATE], + ); let slot = self.slot_clock.now().ok_or_else(move || { crit!(log, "Duties manager failed to read slot clock"); @@ -337,7 +333,7 @@ impl BlockService { unsigned_block: UnsignedBlock, ) -> Result<(), BlockError> { let log = self.context.log(); - let signing_timer = metrics::start_timer(&metrics::BLOCK_SIGNING_TIMES); + let signing_timer = validator_metrics::start_timer(&validator_metrics::BLOCK_SIGNING_TIMES); let res = match unsigned_block { UnsignedBlock::Full(block_contents) => { @@ -418,8 +414,10 @@ impl BlockService { builder_boost_factor: Option, ) -> Result<(), BlockError> { let log = self.context.log(); - let _timer = - metrics::start_timer_vec(&metrics::BLOCK_SERVICE_TIMES, &[metrics::BEACON_BLOCK]); + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::BLOCK_SERVICE_TIMES, + &[validator_metrics::BEACON_BLOCK], + ); let randao_reveal = match self .validator_store @@ -475,9 +473,9 @@ impl BlockService { // great view of attestations on the network. let unsigned_block = proposer_fallback .request_proposers_last(|beacon_node| async move { - let _get_timer = metrics::start_timer_vec( - &metrics::BLOCK_SERVICE_TIMES, - &[metrics::BEACON_BLOCK_HTTP_GET], + let _get_timer = validator_metrics::start_timer_vec( + &validator_metrics::BLOCK_SERVICE_TIMES, + &[validator_metrics::BEACON_BLOCK_HTTP_GET], ); Self::get_validator_block( &beacon_node, @@ -520,9 +518,9 @@ impl BlockService { let slot = signed_block.slot(); match signed_block { SignedBlock::Full(signed_block) => { - let _post_timer = metrics::start_timer_vec( - &metrics::BLOCK_SERVICE_TIMES, - &[metrics::BEACON_BLOCK_HTTP_POST], + let _post_timer = validator_metrics::start_timer_vec( + &validator_metrics::BLOCK_SERVICE_TIMES, + &[validator_metrics::BEACON_BLOCK_HTTP_POST], ); beacon_node .post_beacon_blocks_v2_ssz(signed_block, None) @@ -530,9 +528,9 @@ impl BlockService { .or_else(|e| handle_block_post_error(e, slot, log))? } SignedBlock::Blinded(signed_block) => { - let _post_timer = metrics::start_timer_vec( - &metrics::BLOCK_SERVICE_TIMES, - &[metrics::BLINDED_BEACON_BLOCK_HTTP_POST], + let _post_timer = validator_metrics::start_timer_vec( + &validator_metrics::BLOCK_SERVICE_TIMES, + &[validator_metrics::BLINDED_BEACON_BLOCK_HTTP_POST], ); beacon_node .post_beacon_blinded_blocks_v2_ssz(signed_block, None) diff --git a/validator_client/src/duties_service.rs b/validator_client/validator_services/src/duties_service.rs similarity index 95% rename from validator_client/src/duties_service.rs rename to validator_client/validator_services/src/duties_service.rs index cf8d499792..187eb4feb5 100644 --- a/validator_client/src/duties_service.rs +++ b/validator_client/validator_services/src/duties_service.rs @@ -6,15 +6,11 @@ //! The `DutiesService` is also responsible for sending events to the `BlockService` which trigger //! block production. -pub mod sync; - -use crate::beacon_node_fallback::{ApiTopic, BeaconNodeFallback}; -use crate::http_metrics::metrics::{get_int_gauge, set_int_gauge, ATTESTATION_DUTY}; -use crate::{ - block_service::BlockServiceNotification, - http_metrics::metrics, - validator_store::{DoppelgangerStatus, Error as ValidatorStoreError, ValidatorStore}, -}; +use crate::block_service::BlockServiceNotification; +use crate::sync::poll_sync_committee_duties; +use crate::sync::SyncDutiesMap; +use beacon_node_fallback::{ApiTopic, BeaconNodeFallback}; +use doppelganger_service::DoppelgangerStatus; use environment::RuntimeContext; use eth2::types::{ AttesterData, BeaconCommitteeSubscription, DutiesResponse, ProposerData, StateId, ValidatorId, @@ -29,10 +25,10 @@ use std::collections::{hash_map, BTreeMap, HashMap, HashSet}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::Duration; -use sync::poll_sync_committee_duties; -use sync::SyncDutiesMap; use tokio::{sync::mpsc::Sender, time::sleep}; use types::{ChainSpec, Epoch, EthSpec, Hash256, PublicKeyBytes, SelectionProof, Slot}; +use validator_metrics::{get_int_gauge, set_int_gauge, ATTESTATION_DUTY}; +use validator_store::{Error as ValidatorStoreError, ValidatorStore}; /// Only retain `HISTORICAL_DUTIES_EPOCHS` duties prior to the current epoch. const HISTORICAL_DUTIES_EPOCHS: u64 = 2; @@ -473,8 +469,10 @@ pub fn start_update_service( async fn poll_validator_indices( duties_service: &DutiesService, ) { - let _timer = - metrics::start_timer_vec(&metrics::DUTIES_SERVICE_TIMES, &[metrics::UPDATE_INDICES]); + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::UPDATE_INDICES], + ); let log = duties_service.context.log(); @@ -518,9 +516,9 @@ async fn poll_validator_indices( let download_result = duties_service .beacon_nodes .first_success(|beacon_node| async move { - let _timer = metrics::start_timer_vec( - &metrics::DUTIES_SERVICE_TIMES, - &[metrics::VALIDATOR_ID_HTTP_GET], + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::VALIDATOR_ID_HTTP_GET], ); beacon_node .get_beacon_states_validator_id( @@ -604,9 +602,9 @@ async fn poll_validator_indices( async fn poll_beacon_attesters( duties_service: &Arc>, ) -> Result<(), Error> { - let current_epoch_timer = metrics::start_timer_vec( - &metrics::DUTIES_SERVICE_TIMES, - &[metrics::UPDATE_ATTESTERS_CURRENT_EPOCH], + let current_epoch_timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::UPDATE_ATTESTERS_CURRENT_EPOCH], ); let log = duties_service.context.log(); @@ -660,9 +658,9 @@ async fn poll_beacon_attesters( update_per_validator_duty_metrics::(duties_service, current_epoch, current_slot); drop(current_epoch_timer); - let next_epoch_timer = metrics::start_timer_vec( - &metrics::DUTIES_SERVICE_TIMES, - &[metrics::UPDATE_ATTESTERS_NEXT_EPOCH], + let next_epoch_timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::UPDATE_ATTESTERS_NEXT_EPOCH], ); // Download the duties and update the duties for the next epoch. @@ -682,8 +680,10 @@ async fn poll_beacon_attesters( update_per_validator_duty_metrics::(duties_service, next_epoch, current_slot); drop(next_epoch_timer); - let subscriptions_timer = - metrics::start_timer_vec(&metrics::DUTIES_SERVICE_TIMES, &[metrics::SUBSCRIPTIONS]); + let subscriptions_timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::SUBSCRIPTIONS], + ); // This vector is intentionally oversized by 10% so that it won't reallocate. // Each validator has 2 attestation duties occuring in the current and next epoch, for which @@ -741,9 +741,9 @@ async fn poll_beacon_attesters( let subscription_result = duties_service .beacon_nodes .request(ApiTopic::Subscriptions, |beacon_node| async move { - let _timer = metrics::start_timer_vec( - &metrics::DUTIES_SERVICE_TIMES, - &[metrics::SUBSCRIPTIONS_HTTP_POST], + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::SUBSCRIPTIONS_HTTP_POST], ); beacon_node .post_validator_beacon_committee_subscriptions(subscriptions_ref) @@ -815,9 +815,9 @@ async fn poll_beacon_attesters_for_epoch( return Ok(()); } - let fetch_timer = metrics::start_timer_vec( - &metrics::DUTIES_SERVICE_TIMES, - &[metrics::UPDATE_ATTESTERS_FETCH], + let fetch_timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::UPDATE_ATTESTERS_FETCH], ); // Request duties for all uninitialized validators. If there isn't any, we will just request for @@ -883,9 +883,9 @@ async fn poll_beacon_attesters_for_epoch( drop(fetch_timer); - let _store_timer = metrics::start_timer_vec( - &metrics::DUTIES_SERVICE_TIMES, - &[metrics::UPDATE_ATTESTERS_STORE], + let _store_timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::UPDATE_ATTESTERS_STORE], ); debug!( @@ -1029,9 +1029,9 @@ async fn post_validator_duties_attester( duties_service .beacon_nodes .first_success(|beacon_node| async move { - let _timer = metrics::start_timer_vec( - &metrics::DUTIES_SERVICE_TIMES, - &[metrics::ATTESTER_DUTIES_HTTP_POST], + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::ATTESTER_DUTIES_HTTP_POST], ); beacon_node .post_validator_duties_attester(epoch, validator_indices) @@ -1089,9 +1089,9 @@ async fn fill_in_selection_proofs( continue; } - let timer = metrics::start_timer_vec( - &metrics::DUTIES_SERVICE_TIMES, - &[metrics::ATTESTATION_SELECTION_PROOFS], + let timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::ATTESTATION_SELECTION_PROOFS], ); // Sign selection proofs (serially). @@ -1223,8 +1223,10 @@ async fn poll_beacon_proposers( duties_service: &DutiesService, block_service_tx: &mut Sender, ) -> Result<(), Error> { - let _timer = - metrics::start_timer_vec(&metrics::DUTIES_SERVICE_TIMES, &[metrics::UPDATE_PROPOSERS]); + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::UPDATE_PROPOSERS], + ); let log = duties_service.context.log(); @@ -1261,9 +1263,9 @@ async fn poll_beacon_proposers( let download_result = duties_service .beacon_nodes .first_success(|beacon_node| async move { - let _timer = metrics::start_timer_vec( - &metrics::DUTIES_SERVICE_TIMES, - &[metrics::PROPOSER_DUTIES_HTTP_GET], + let _timer = validator_metrics::start_timer_vec( + &validator_metrics::DUTIES_SERVICE_TIMES, + &[validator_metrics::PROPOSER_DUTIES_HTTP_GET], ); beacon_node .get_validator_duties_proposer(current_epoch) @@ -1341,7 +1343,7 @@ async fn poll_beacon_proposers( "Detected new block proposer"; "current_slot" => current_slot, ); - metrics::inc_counter(&metrics::PROPOSAL_CHANGED); + validator_metrics::inc_counter(&validator_metrics::PROPOSAL_CHANGED); } } diff --git a/validator_client/validator_services/src/lib.rs b/validator_client/validator_services/src/lib.rs new file mode 100644 index 0000000000..abf8fab3cb --- /dev/null +++ b/validator_client/validator_services/src/lib.rs @@ -0,0 +1,6 @@ +pub mod attestation_service; +pub mod block_service; +pub mod duties_service; +pub mod preparation_service; +pub mod sync; +pub mod sync_committee_service; diff --git a/validator_client/src/preparation_service.rs b/validator_client/validator_services/src/preparation_service.rs similarity index 97% rename from validator_client/src/preparation_service.rs rename to validator_client/validator_services/src/preparation_service.rs index 010c651c25..480f4af2b3 100644 --- a/validator_client/src/preparation_service.rs +++ b/validator_client/validator_services/src/preparation_service.rs @@ -1,6 +1,6 @@ -use crate::beacon_node_fallback::{ApiTopic, BeaconNodeFallback}; -use crate::validator_store::{DoppelgangerStatus, Error as ValidatorStoreError, ValidatorStore}; +use beacon_node_fallback::{ApiTopic, BeaconNodeFallback}; use bls::PublicKeyBytes; +use doppelganger_service::DoppelgangerStatus; use environment::RuntimeContext; use parking_lot::RwLock; use slog::{debug, error, info, warn}; @@ -15,6 +15,7 @@ use types::{ Address, ChainSpec, EthSpec, ProposerPreparationData, SignedValidatorRegistrationData, ValidatorRegistrationData, }; +use validator_store::{Error as ValidatorStoreError, ProposalData, ValidatorStore}; /// Number of epochs before the Bellatrix hard fork to begin posting proposer preparations. const PROPOSER_PREPARATION_LOOKAHEAD_EPOCHS: u64 = 2; @@ -23,6 +24,7 @@ const PROPOSER_PREPARATION_LOOKAHEAD_EPOCHS: u64 = 2; const EPOCHS_PER_VALIDATOR_REGISTRATION_SUBMISSION: u64 = 1; /// Builds an `PreparationService`. +#[derive(Default)] pub struct PreparationServiceBuilder { validator_store: Option>>, slot_clock: Option, @@ -492,11 +494,3 @@ impl PreparationService { Ok(()) } } - -/// A helper struct, used for passing data from the validator store to services. -pub struct ProposalData { - pub(crate) validator_index: Option, - pub(crate) fee_recipient: Option
, - pub(crate) gas_limit: u64, - pub(crate) builder_proposals: bool, -} diff --git a/validator_client/src/duties_service/sync.rs b/validator_client/validator_services/src/sync.rs similarity index 98% rename from validator_client/src/duties_service/sync.rs rename to validator_client/validator_services/src/sync.rs index 0bd99dc638..af501326f4 100644 --- a/validator_client/src/duties_service/sync.rs +++ b/validator_client/validator_services/src/sync.rs @@ -1,10 +1,5 @@ -use crate::{ - doppelganger_service::DoppelgangerStatus, - duties_service::{DutiesService, Error}, - http_metrics::metrics, - validator_store::Error as ValidatorStoreError, -}; - +use crate::duties_service::{DutiesService, Error}; +use doppelganger_service::DoppelgangerStatus; use futures::future::join_all; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; use slog::{crit, debug, info, warn}; @@ -13,6 +8,7 @@ use std::collections::{HashMap, HashSet}; use std::marker::PhantomData; use std::sync::Arc; use types::{ChainSpec, EthSpec, PublicKeyBytes, Slot, SyncDuty, SyncSelectionProof, SyncSubnetId}; +use validator_store::Error as ValidatorStoreError; /// Number of epochs in advance to compute selection proofs when not in `distributed` mode. pub const AGGREGATION_PRE_COMPUTE_EPOCHS: u64 = 2; @@ -442,9 +438,9 @@ pub async fn poll_sync_committee_duties_for_period"] + +[lib] +name = "validator_store" +path = "src/lib.rs" + +[dependencies] +account_utils = { workspace = true } +doppelganger_service = { workspace = true } +initialized_validators = { workspace = true } +parking_lot = { workspace = true } +serde = { workspace = true } +signing_method = { workspace = true } +slashing_protection = { workspace = true } +slog = { workspace = true } +slot_clock = { workspace = true } +task_executor = { workspace = true } +types = { workspace = true } +validator_metrics = { workspace = true } diff --git a/validator_client/src/validator_store.rs b/validator_client/validator_store/src/lib.rs similarity index 90% rename from validator_client/src/validator_store.rs rename to validator_client/validator_store/src/lib.rs index af59ad9892..837af5b51d 100644 --- a/validator_client/src/validator_store.rs +++ b/validator_client/validator_store/src/lib.rs @@ -1,12 +1,9 @@ -use crate::{ - doppelganger_service::DoppelgangerService, - http_metrics::metrics, - initialized_validators::InitializedValidators, - signing_method::{Error as SigningError, SignableMessage, SigningContext, SigningMethod}, - Config, -}; use account_utils::validator_definitions::{PasswordStorage, ValidatorDefinition}; +use doppelganger_service::{DoppelgangerService, DoppelgangerStatus, DoppelgangerValidatorStore}; +use initialized_validators::InitializedValidators; use parking_lot::{Mutex, RwLock}; +use serde::{Deserialize, Serialize}; +use signing_method::{Error as SigningError, SignableMessage, SigningContext, SigningMethod}; use slashing_protection::{ interchange::Interchange, InterchangeError, NotSafe, Safe, SlashingDatabase, }; @@ -26,9 +23,6 @@ use types::{ ValidatorRegistrationData, VoluntaryExit, }; -pub use crate::doppelganger_service::DoppelgangerStatus; -use crate::preparation_service::ProposalData; - #[derive(Debug, PartialEq)] pub enum Error { DoppelgangerProtected(PublicKeyBytes), @@ -48,6 +42,30 @@ impl From for Error { } } +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +pub struct Config { + /// Fallback fee recipient address. + pub fee_recipient: Option
, + /// Fallback gas limit. + pub gas_limit: Option, + /// Enable use of the blinded block endpoints during proposals. + pub builder_proposals: bool, + /// Enable slashing protection even while using web3signer keys. + pub enable_web3signer_slashing_protection: bool, + /// If true, Lighthouse will prefer builder proposals, if available. + pub prefer_builder_proposals: bool, + /// Specifies the boost factor, a percentage multiplier to apply to the builder's payload value. + pub builder_boost_factor: Option, +} + +/// A helper struct, used for passing data from the validator store to services. +pub struct ProposalData { + pub validator_index: Option, + pub fee_recipient: Option
, + pub gas_limit: u64, + pub builder_proposals: bool, +} + /// Number of epochs of slashing protection history to keep. /// /// This acts as a maximum safe-guard against clock drift. @@ -77,6 +95,12 @@ pub struct ValidatorStore { _phantom: PhantomData, } +impl DoppelgangerValidatorStore for ValidatorStore { + fn get_validator_index(&self, pubkey: &PublicKeyBytes) -> Option { + self.validator_index(pubkey) + } +} + impl ValidatorStore { // All arguments are different types. Making the fields `pub` is undesired. A builder seems // unnecessary. @@ -590,7 +614,10 @@ impl ValidatorStore { match slashing_status { // We can safely sign this block without slashing. Ok(Safe::Valid) => { - metrics::inc_counter_vec(&metrics::SIGNED_BLOCKS_TOTAL, &[metrics::SUCCESS]); + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_BLOCKS_TOTAL, + &[validator_metrics::SUCCESS], + ); let signature = signing_method .get_signature::( @@ -607,7 +634,10 @@ impl ValidatorStore { self.log, "Skipping signing of previously signed block"; ); - metrics::inc_counter_vec(&metrics::SIGNED_BLOCKS_TOTAL, &[metrics::SAME_DATA]); + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_BLOCKS_TOTAL, + &[validator_metrics::SAME_DATA], + ); Err(Error::SameData) } Err(NotSafe::UnregisteredValidator(pk)) => { @@ -617,7 +647,10 @@ impl ValidatorStore { "msg" => "Carefully consider running with --init-slashing-protection (see --help)", "public_key" => format!("{:?}", pk) ); - metrics::inc_counter_vec(&metrics::SIGNED_BLOCKS_TOTAL, &[metrics::UNREGISTERED]); + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_BLOCKS_TOTAL, + &[validator_metrics::UNREGISTERED], + ); Err(Error::Slashable(NotSafe::UnregisteredValidator(pk))) } Err(e) => { @@ -626,7 +659,10 @@ impl ValidatorStore { "Not signing slashable block"; "error" => format!("{:?}", e) ); - metrics::inc_counter_vec(&metrics::SIGNED_BLOCKS_TOTAL, &[metrics::SLASHABLE]); + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_BLOCKS_TOTAL, + &[validator_metrics::SLASHABLE], + ); Err(Error::Slashable(e)) } } @@ -681,7 +717,10 @@ impl ValidatorStore { .add_signature(&signature, validator_committee_position) .map_err(Error::UnableToSignAttestation)?; - metrics::inc_counter_vec(&metrics::SIGNED_ATTESTATIONS_TOTAL, &[metrics::SUCCESS]); + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_ATTESTATIONS_TOTAL, + &[validator_metrics::SUCCESS], + ); Ok(()) } @@ -690,9 +729,9 @@ impl ValidatorStore { self.log, "Skipping signing of previously signed attestation" ); - metrics::inc_counter_vec( - &metrics::SIGNED_ATTESTATIONS_TOTAL, - &[metrics::SAME_DATA], + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_ATTESTATIONS_TOTAL, + &[validator_metrics::SAME_DATA], ); Err(Error::SameData) } @@ -703,9 +742,9 @@ impl ValidatorStore { "msg" => "Carefully consider running with --init-slashing-protection (see --help)", "public_key" => format!("{:?}", pk) ); - metrics::inc_counter_vec( - &metrics::SIGNED_ATTESTATIONS_TOTAL, - &[metrics::UNREGISTERED], + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_ATTESTATIONS_TOTAL, + &[validator_metrics::UNREGISTERED], ); Err(Error::Slashable(NotSafe::UnregisteredValidator(pk))) } @@ -716,9 +755,9 @@ impl ValidatorStore { "attestation" => format!("{:?}", attestation.data()), "error" => format!("{:?}", e) ); - metrics::inc_counter_vec( - &metrics::SIGNED_ATTESTATIONS_TOTAL, - &[metrics::SLASHABLE], + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_ATTESTATIONS_TOTAL, + &[validator_metrics::SLASHABLE], ); Err(Error::Slashable(e)) } @@ -743,7 +782,10 @@ impl ValidatorStore { ) .await?; - metrics::inc_counter_vec(&metrics::SIGNED_VOLUNTARY_EXITS_TOTAL, &[metrics::SUCCESS]); + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_VOLUNTARY_EXITS_TOTAL, + &[validator_metrics::SUCCESS], + ); Ok(SignedVoluntaryExit { message: voluntary_exit, @@ -769,9 +811,9 @@ impl ValidatorStore { ) .await?; - metrics::inc_counter_vec( - &metrics::SIGNED_VALIDATOR_REGISTRATIONS_TOTAL, - &[metrics::SUCCESS], + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_VALIDATOR_REGISTRATIONS_TOTAL, + &[validator_metrics::SUCCESS], ); Ok(SignedValidatorRegistrationData { @@ -807,7 +849,10 @@ impl ValidatorStore { ) .await?; - metrics::inc_counter_vec(&metrics::SIGNED_AGGREGATES_TOTAL, &[metrics::SUCCESS]); + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_AGGREGATES_TOTAL, + &[validator_metrics::SUCCESS], + ); Ok(SignedAggregateAndProof::from_aggregate_and_proof( message, signature, @@ -843,7 +888,10 @@ impl ValidatorStore { .await .map_err(Error::UnableToSign)?; - metrics::inc_counter_vec(&metrics::SIGNED_SELECTION_PROOFS_TOTAL, &[metrics::SUCCESS]); + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_SELECTION_PROOFS_TOTAL, + &[validator_metrics::SUCCESS], + ); Ok(signature.into()) } @@ -862,9 +910,9 @@ impl ValidatorStore { // Bypass `with_validator_signing_method`: sync committee messages are not slashable. let signing_method = self.doppelganger_bypassed_signing_method(*validator_pubkey)?; - metrics::inc_counter_vec( - &metrics::SIGNED_SYNC_SELECTION_PROOFS_TOTAL, - &[metrics::SUCCESS], + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_SYNC_SELECTION_PROOFS_TOTAL, + &[validator_metrics::SUCCESS], ); let message = SyncAggregatorSelectionData { @@ -911,9 +959,9 @@ impl ValidatorStore { .await .map_err(Error::UnableToSign)?; - metrics::inc_counter_vec( - &metrics::SIGNED_SYNC_COMMITTEE_MESSAGES_TOTAL, - &[metrics::SUCCESS], + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_SYNC_COMMITTEE_MESSAGES_TOTAL, + &[validator_metrics::SUCCESS], ); Ok(SyncCommitteeMessage { @@ -953,9 +1001,9 @@ impl ValidatorStore { .await .map_err(Error::UnableToSign)?; - metrics::inc_counter_vec( - &metrics::SIGNED_SYNC_COMMITTEE_CONTRIBUTIONS_TOTAL, - &[metrics::SUCCESS], + validator_metrics::inc_counter_vec( + &validator_metrics::SIGNED_SYNC_COMMITTEE_CONTRIBUTIONS_TOTAL, + &[validator_metrics::SUCCESS], ); Ok(SignedContributionAndProof { message, signature }) @@ -1029,7 +1077,8 @@ impl ValidatorStore { info!(self.log, "Pruning slashing protection DB"; "epoch" => current_epoch); } - let _timer = metrics::start_timer(&metrics::SLASHING_PROTECTION_PRUNE_TIMES); + let _timer = + validator_metrics::start_timer(&validator_metrics::SLASHING_PROTECTION_PRUNE_TIMES); let new_min_target_epoch = current_epoch.saturating_sub(SLASHING_PROTECTION_HISTORY_EPOCHS); let new_min_slot = new_min_target_epoch.start_slot(E::slots_per_epoch()); diff --git a/validator_manager/Cargo.toml b/validator_manager/Cargo.toml index 92267ad875..4f367b8f5b 100644 --- a/validator_manager/Cargo.toml +++ b/validator_manager/Cargo.toml @@ -25,4 +25,4 @@ derivative = { workspace = true } [dev-dependencies] tempfile = { workspace = true } regex = { workspace = true } -validator_client = { workspace = true } +validator_http_api = { workspace = true } diff --git a/validator_manager/src/delete_validators.rs b/validator_manager/src/delete_validators.rs index 6283279986..a2d6c062fa 100644 --- a/validator_manager/src/delete_validators.rs +++ b/validator_manager/src/delete_validators.rs @@ -148,7 +148,7 @@ mod test { use crate::{ common::ValidatorSpecification, import_validators::tests::TestBuilder as ImportTestBuilder, }; - use validator_client::http_api::{test_utils::ApiTester, Config as HttpConfig}; + use validator_http_api::{test_utils::ApiTester, Config as HttpConfig}; struct TestBuilder { delete_config: Option, diff --git a/validator_manager/src/import_validators.rs b/validator_manager/src/import_validators.rs index 6065ecb603..2a819a2a64 100644 --- a/validator_manager/src/import_validators.rs +++ b/validator_manager/src/import_validators.rs @@ -387,7 +387,7 @@ pub mod tests { str::FromStr, }; use tempfile::{tempdir, TempDir}; - use validator_client::http_api::{test_utils::ApiTester, Config as HttpConfig}; + use validator_http_api::{test_utils::ApiTester, Config as HttpConfig}; const VC_TOKEN_FILE_NAME: &str = "vc_token.json"; diff --git a/validator_manager/src/list_validators.rs b/validator_manager/src/list_validators.rs index 7df85a7eb9..e3deb0b21a 100644 --- a/validator_manager/src/list_validators.rs +++ b/validator_manager/src/list_validators.rs @@ -87,7 +87,7 @@ mod test { use crate::{ common::ValidatorSpecification, import_validators::tests::TestBuilder as ImportTestBuilder, }; - use validator_client::http_api::{test_utils::ApiTester, Config as HttpConfig}; + use validator_http_api::{test_utils::ApiTester, Config as HttpConfig}; struct TestBuilder { list_config: Option, diff --git a/validator_manager/src/move_validators.rs b/validator_manager/src/move_validators.rs index 7651917ea9..807a147ca1 100644 --- a/validator_manager/src/move_validators.rs +++ b/validator_manager/src/move_validators.rs @@ -668,7 +668,7 @@ mod test { use account_utils::validator_definitions::SigningDefinition; use std::fs; use tempfile::{tempdir, TempDir}; - use validator_client::http_api::{test_utils::ApiTester, Config as HttpConfig}; + use validator_http_api::{test_utils::ApiTester, Config as HttpConfig}; const SRC_VC_TOKEN_FILE_NAME: &str = "src_vc_token.json"; const DEST_VC_TOKEN_FILE_NAME: &str = "dest_vc_token.json"; diff --git a/watch/README.md b/watch/README.md index 34519e52e5..877cddf234 100644 --- a/watch/README.md +++ b/watch/README.md @@ -39,8 +39,6 @@ diesel database reset --database-url postgres://postgres:postgres@localhost/dev 1. Ensure a synced Lighthouse beacon node with historical states is available at `localhost:5052`. -The smaller the value of `--slots-per-restore-point` the faster beacon.watch -will be able to sync to the beacon node. 1. Run the updater daemon: ```