mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-02 16:21:42 +00:00
CI fix: add retries to eth1 sim tests (#4501)
## Issue Addressed
This PR attempts to workaround the recent frequent eth1 simulator failures caused by missing eth logs from Anvil.
> FailedToInsertDeposit(NonConsecutive { log_index: 1, expected: 0 })
This usually occurs at the beginning of the tests, and it guarantees a timeout after a few hours if this log shows up, and this is currently causing our CIs to fail quite frequently.
Example failure here: https://github.com/sigp/lighthouse/actions/runs/5525760195/jobs/10079736914
## Proposed Changes
The quick fix applied here adds a timeout to node startup and restarts the node again.
- Add a 60 seconds timeout to beacon node startup in eth1 simulator tests. It takes ~10 seconds on my machine, but could take longer on CI runners.
- Wrap the startup code in a retry function, that allows for 3 retries before returning an error.
## Additional Info
We should probably raise an issue under the Anvil GitHub repo there so this can be further investigated.
This commit is contained in:
@@ -14,3 +14,4 @@ validator_client = { path = "../../validator_client" }
|
||||
validator_dir = { path = "../../common/validator_dir", features = ["insecure_keys"] }
|
||||
sensitive_url = { path = "../../common/sensitive_url" }
|
||||
execution_layer = { path = "../../beacon_node/execution_layer" }
|
||||
tokio = { version = "1.14.0", features = ["time"] }
|
||||
|
||||
@@ -10,6 +10,7 @@ use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use tempfile::{Builder as TempBuilder, TempDir};
|
||||
use tokio::time::timeout;
|
||||
use types::EthSpec;
|
||||
use validator_client::ProductionValidatorClient;
|
||||
use validator_dir::insecure_keys::build_deterministic_validator_dirs;
|
||||
@@ -24,6 +25,8 @@ pub use validator_client::Config as ValidatorConfig;
|
||||
|
||||
/// The global timeout for HTTP requests to the beacon node.
|
||||
const HTTP_TIMEOUT: Duration = Duration::from_secs(4);
|
||||
/// The timeout for a beacon node to start up.
|
||||
const STARTUP_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
/// Provides a beacon node that is running in the current process on a given tokio executor (it
|
||||
/// is _local_ to this process).
|
||||
@@ -51,12 +54,16 @@ impl<E: EthSpec> LocalBeaconNode<E> {
|
||||
client_config.set_data_dir(datadir.path().into());
|
||||
client_config.network.network_dir = PathBuf::from(datadir.path()).join("network");
|
||||
|
||||
ProductionBeaconNode::new(context, client_config)
|
||||
.await
|
||||
.map(move |client| Self {
|
||||
client: client.into_inner(),
|
||||
datadir,
|
||||
})
|
||||
timeout(
|
||||
STARTUP_TIMEOUT,
|
||||
ProductionBeaconNode::new(context, client_config),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| format!("Beacon node startup timed out after {:?}", STARTUP_TIMEOUT))?
|
||||
.map(move |client| Self {
|
||||
client: client.into_inner(),
|
||||
datadir,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user