Fix simulator

This commit is contained in:
Mac L
2023-07-21 17:49:52 +10:00
parent 61bc700fdf
commit 129568565e
4 changed files with 94 additions and 46 deletions

View File

@@ -1,4 +1,5 @@
use crate::local_network::LocalNetwork;
use crate::ACCEPTABLE_FALLBACK_ATTESTATION_HIT_PERCENTAGE;
use node_test_rig::eth2::types::{BlockId, StateId};
use std::time::Duration;
use types::{Epoch, EthSpec, ExecPayload, ExecutionBlockHash, Hash256, Slot, Unsigned};
@@ -244,30 +245,42 @@ pub async fn verify_transition_block_finalized<E: EthSpec>(
}
}
// Causes the execution node at `node_index` to disconnect from the execution layer 1 epoch after
// the merge transition.
pub async fn disconnect_from_execution_layer<E: EthSpec>(
network: LocalNetwork<E>,
transition_epoch: Epoch,
slot_duration: Duration,
node_index: usize,
) -> Result<(), String> {
epoch_delay(transition_epoch + 1, slot_duration, E::slots_per_epoch()).await;
eprintln!("Disabling Execution Layer");
// Take the execution node at position 0 and force it to return the `syncing` status.
network.execution_nodes.read()[0]
// Force the execution node to return the `syncing` status.
network.execution_nodes.read()[node_index]
.server
.all_payloads_syncing(false);
Ok(())
}
// Run for 2 epochs with the 0th execution node stalled.
pub async fn reconnect_to_execution_layer<E: EthSpec>(
network: LocalNetwork<E>,
transition_epoch: Epoch,
slot_duration: Duration,
node_index: usize,
epochs_offline: u64,
) -> Result<(), String> {
// Ensure this is configurable by only reconnecting after `epoch_offline`.
epoch_delay(
transition_epoch + 1 + 2,
transition_epoch + epochs_offline,
slot_duration,
E::slots_per_epoch(),
)
.await;
// Restore the functionality of the 0th execution node.
network.execution_nodes.read()[0]
// Restore the functionality of the execution node.
network.execution_nodes.read()[node_index]
.server
.all_payloads_valid();
@@ -278,32 +291,76 @@ pub async fn disconnect_from_execution_layer<E: EthSpec>(
/// Ensure all validators have attested correctly.
pub async fn check_attestation_correctness<E: EthSpec>(
network: LocalNetwork<E>,
start_epoch: Epoch,
// Must be 2 epochs less than the end of the simulation.
upto_epoch: Epoch,
slots_per_epoch: u64,
slot_duration: Duration,
// Select which node to query. Will use this node to determine the global network performance.
node_index: usize,
) -> Result<(), String> {
let upto_slot = upto_epoch.start_slot(slots_per_epoch);
slot_delay(upto_slot, slot_duration).await;
let remote_node = &network.remote_nodes()?[1];
let remote_node = &network.remote_nodes()?[node_index];
let results = remote_node
.get_lighthouse_analysis_attestation_performance(
Epoch::new(2),
start_epoch,
upto_epoch - 2,
"global".to_string(),
)
.await
.map_err(|e| format!("Unable to get attestation performance: {e}"))?;
let mut active_successes: f64 = 0.0;
let mut head_successes: f64 = 0.0;
let mut target_successes: f64 = 0.0;
let mut source_successes: f64 = 0.0;
let mut total: f64 = 0.0;
for result in results {
for epochs in result.epochs.values() {
assert!(epochs.active);
assert!(epochs.head);
assert!(epochs.target);
assert!(epochs.source);
total += 1.0;
if epochs.active {
active_successes += 1.0;
}
if epochs.head {
head_successes += 1.0;
}
if epochs.target {
target_successes += 1.0;
}
if epochs.source {
source_successes += 1.0;
}
}
}
let active_percent = active_successes / total * 100.0;
let head_percent = head_successes / total * 100.0;
let target_percent = target_successes / total * 100.0;
let source_percent = source_successes / total * 100.0;
eprintln!("Total Attestations: {}", total);
eprintln!("Active: {}: {}%", active_successes, active_percent);
eprintln!("Head: {}: {}%", head_successes, head_percent);
eprintln!("Target: {}: {}%", target_successes, target_percent);
eprintln!("Source: {}: {}%", source_successes, source_percent);
if active_percent < ACCEPTABLE_FALLBACK_ATTESTATION_HIT_PERCENTAGE {
return Err("Active percent was below required level".to_string());
}
if head_percent < ACCEPTABLE_FALLBACK_ATTESTATION_HIT_PERCENTAGE {
return Err("Head percent was below required level".to_string());
}
if target_percent < ACCEPTABLE_FALLBACK_ATTESTATION_HIT_PERCENTAGE {
return Err("Target percent was below required level".to_string());
}
if source_percent < ACCEPTABLE_FALLBACK_ATTESTATION_HIT_PERCENTAGE {
return Err("Source percent was below required level".to_string());
}
Ok(())
}

View File

@@ -148,11 +148,6 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
.takes_value(true)
.default_value("3")
.help("Speed up factor. Please use a divisor of 12."))
.arg(Arg::with_name("post-merge")
.short("m")
.long("post-merge")
.takes_value(false)
.help("Simulate the merge transition"))
.arg(Arg::with_name("continue_after_checks")
.short("c")
.long("continue_after_checks")

View File

@@ -21,7 +21,7 @@ use sensitive_url::SensitiveUrl;
use tokio::time::sleep;
use types::{Epoch, EthSpec, MinimalEthSpec};
const END_EPOCH: u64 = 20;
const END_EPOCH: u64 = 16;
const ALTAIR_FORK_EPOCH: u64 = 1;
const BELLATRIX_FORK_EPOCH: u64 = 2;
@@ -36,7 +36,6 @@ pub fn run_fallback_sim(matches: &ArgMatches) -> Result<(), String> {
value_t!(matches, "validators_per_vc", usize).expect("missing validators_per_vc default");
let bns_per_vc = value_t!(matches, "bns_per_vc", usize).expect("missing bns_per_vc default");
let continue_after_checks = matches.is_present("continue_after_checks");
//let post_merge_sim = matches.is_present("post-merge");
let post_merge_sim = true;
println!("Fallback Simulator:");
@@ -106,8 +105,6 @@ fn fallback_sim(
let total_validator_count = validators_per_vc * vc_count;
let node_count = vc_count * bns_per_vc;
//let altair_fork_version = spec.altair_fork_version;
//let bellatrix_fork_version = spec.bellatrix_fork_version;
spec.seconds_per_slot /= speed_up_factor;
spec.seconds_per_slot = max(1, spec.seconds_per_slot);
@@ -266,41 +263,32 @@ fn fallback_sim(
* breakage by changes to the VC.
*/
let (
//finalization,
//block_prod,
//validator_count,
//onboarding,
fallback,
check_attestations,
//fork,
//sync_aggregate,
//transition,
) = futures::join!(
//checks::verify_first_finalization(network.clone(), slot_duration),
let (disconnect, reconnect, check_attestations) = futures::join!(
checks::disconnect_from_execution_layer(
network.clone(),
Epoch::new(BELLATRIX_FORK_EPOCH),
slot_duration
slot_duration,
0
),
checks::reconnect_to_execution_layer(
network.clone(),
Epoch::new(BELLATRIX_FORK_EPOCH),
slot_duration,
0,
2,
),
checks::check_attestation_correctness(
network.clone(),
Epoch::new(END_EPOCH),
Epoch::new(0),
Epoch::new(END_EPOCH - 2),
MinimalEthSpec::slots_per_epoch(),
slot_duration
slot_duration,
1,
),
//checks::stall_node(network.clone(), 0, 30, seconds_per_slot),
);
//block_prod?;
//finalization?;
//validator_count?;
//onboarding?;
fallback?;
disconnect?;
reconnect?;
check_attestations?;
//fork?;
//sync_aggregate?;
//transition?;
// The `final_future` either completes immediately or never completes, depending on the value
// of `continue_after_checks`.

View File

@@ -30,6 +30,14 @@ use env_logger::{Builder, Env};
use local_network::LocalNetwork;
use types::MinimalEthSpec;
// Since simulator tests are non-deterministic and there is a non-zero chance of missed
// attestations, define an acceptable network-wide attestation performance.
//
// This has potential to block CI so it should be set conservatively enough that spurious failures
// don't become very common, but not so conservatively that regressions to the fallback mechanism
// cannot be detected.
pub(crate) const ACCEPTABLE_FALLBACK_ATTESTATION_HIT_PERCENTAGE: f64 = 99.0;
pub type E = MinimalEthSpec;
fn main() {