mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-20 21:34:46 +00:00
CI fix: add retries to eth1 sim tests (#4501)
## Issue Addressed
This PR attempts to workaround the recent frequent eth1 simulator failures caused by missing eth logs from Anvil.
> FailedToInsertDeposit(NonConsecutive { log_index: 1, expected: 0 })
This usually occurs at the beginning of the tests, and it guarantees a timeout after a few hours if this log shows up, and this is currently causing our CIs to fail quite frequently.
Example failure here: https://github.com/sigp/lighthouse/actions/runs/5525760195/jobs/10079736914
## Proposed Changes
The quick fix applied here adds a timeout to node startup and restarts the node again.
- Add a 60 seconds timeout to beacon node startup in eth1 simulator tests. It takes ~10 seconds on my machine, but could take longer on CI runners.
- Wrap the startup code in a retry function, that allows for 3 retries before returning an error.
## Additional Info
We should probably raise an issue under the Anvil GitHub repo there so this can be further investigated.
This commit is contained in:
63
testing/simulator/src/retry.rs
Normal file
63
testing/simulator/src/retry.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
use std::fmt::Debug;
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
|
||||
/// Executes the function with a specified number of retries if the function returns an error.
|
||||
/// Once it exceeds `max_retries` and still fails, the error is returned.
|
||||
pub async fn with_retry<T, E, F>(max_retries: usize, mut func: F) -> Result<T, E>
|
||||
where
|
||||
F: FnMut() -> Pin<Box<dyn Future<Output = Result<T, E>>>>,
|
||||
E: Debug,
|
||||
{
|
||||
let mut retry_count = 0;
|
||||
loop {
|
||||
let result = Box::pin(func()).await;
|
||||
if result.is_ok() || retry_count >= max_retries {
|
||||
break result;
|
||||
}
|
||||
retry_count += 1;
|
||||
|
||||
if let Err(e) = result {
|
||||
eprintln!(
|
||||
"Operation failed with error {:?}, retrying {} of {}",
|
||||
e, retry_count, max_retries
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
async fn my_async_func(is_ok: bool) -> Result<(), ()> {
|
||||
if is_ok {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_with_retry_ok() {
|
||||
let res = with_retry(3, || Box::pin(my_async_func(true))).await;
|
||||
assert!(res.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_with_retry_2nd_ok() {
|
||||
let mut mock_results = VecDeque::from([false, true]);
|
||||
let res = with_retry(3, || {
|
||||
Box::pin(my_async_func(mock_results.pop_front().unwrap()))
|
||||
})
|
||||
.await;
|
||||
assert!(res.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_with_retry_fail() {
|
||||
let res = with_retry(3, || Box::pin(my_async_func(false))).await;
|
||||
assert!(res.is_err());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user