Add genesis sync test to CI (#7561)

* #7550

Use existing code from  @jimmygchen in #7530 and modify for genesis sync test. Thanks @jimmygchen !
This commit is contained in:
chonghe
2025-06-11 17:51:37 +08:00
committed by GitHub
parent 8c6abc0b69
commit 7416d06dce
5 changed files with 241 additions and 3 deletions

View File

@@ -20,7 +20,7 @@ jobs:
- name: Build Docker image
run: |
docker build --build-arg FEATURES=portable -t lighthouse:local .
docker build --build-arg FEATURES=portable,spec-minimal -t lighthouse:local .
docker save lighthouse:local -o lighthouse-docker.tar
- name: Upload Docker image artifact
@@ -213,6 +213,49 @@ jobs:
scripts/local_testnet/logs
retention-days: 3
# Test syncing from genesis on a local testnet. Aims to cover forward syncing both short and long distances.
genesis-sync-test:
name: genesis-sync-test-${{ matrix.fork }}-${{ matrix.offline_secs }}s
runs-on: ubuntu-latest
needs: dockerfile-ubuntu
if: contains(github.event.pull_request.labels.*.name, 'syncing')
strategy:
matrix:
fork: [electra, fulu]
offline_secs: [120, 300]
steps:
- uses: actions/checkout@v4
- name: Install Kurtosis
run: |
echo "deb [trusted=yes] https://apt.fury.io/kurtosis-tech/ /" | sudo tee /etc/apt/sources.list.d/kurtosis.list
sudo apt update
sudo apt install -y kurtosis-cli
kurtosis analytics disable
- name: Download Docker image artifact
uses: actions/download-artifact@v4
with:
name: lighthouse-docker
path: .
- name: Load Docker image
run: docker load -i lighthouse-docker.tar
- name: Run the genesis sync test script
run: |
./genesis-sync.sh "sync-${{ matrix.fork }}-${{ matrix.offline_secs }}s" "genesis-sync-config-${{ matrix.fork }}.yaml" "${{ matrix.fork }}" "${{ matrix.offline_secs }}"
working-directory: scripts/tests
- name: Upload logs artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: logs-genesis-sync-${{ matrix.fork }}-${{ matrix.offline_secs }}s
path: |
scripts/local_testnet/logs
retention-days: 3
# This job succeeds ONLY IF all others succeed. It is used by the merge queue to determine whether
# a PR is safe to merge. New jobs should be added here.
local-testnet-success:
@@ -228,5 +271,5 @@ jobs:
- uses: actions/checkout@v4
- name: Check that success job is dependent on all others
run: |
exclude_jobs='checkpoint-sync-test'
exclude_jobs='checkpoint-sync-test|genesis-sync-test'
./scripts/ci/check-success-job.sh ./.github/workflows/local-testnet.yml local-testnet-success "$exclude_jobs"

View File

@@ -81,7 +81,7 @@ fi
if [ "$BUILD_IMAGE" = true ]; then
echo "Building Lighthouse Docker image."
ROOT_DIR="$SCRIPT_DIR/../.."
docker build --build-arg FEATURES=portable -f $ROOT_DIR/Dockerfile -t $LH_IMAGE_NAME $ROOT_DIR
docker build --build-arg FEATURES=portable,spec-minimal -f $ROOT_DIR/Dockerfile -t $LH_IMAGE_NAME $ROOT_DIR
else
echo "Not rebuilding Lighthouse Docker image."
fi

View File

@@ -0,0 +1,22 @@
# Kurtosis config file for testing sync on a local devnet.
participants:
- cl_type: lighthouse
cl_image: lighthouse:local
count: 2
# nodes without validators, used for testing sync.
- cl_type: lighthouse
cl_image: lighthouse:local
supernode: true # no supernode in Electra, this is for future proof
validator_count: 0
- cl_type: lighthouse
cl_image: lighthouse:local
supernode: false
validator_count: 0
network_params:
seconds_per_slot: 6
electra_fork_epoch: 0
preset: "minimal"
additional_services:
- tx_fuzz
- spamoor
global_log_level: debug

View File

@@ -0,0 +1,22 @@
# Kurtosis config file for testing sync on a local devnet.
participants:
- cl_type: lighthouse
cl_image: lighthouse:local
count: 2
# nodes without validators, used for testing sync.
- cl_type: lighthouse
cl_image: lighthouse:local
supernode: true
validator_count: 0
- cl_type: lighthouse
cl_image: lighthouse:local
supernode: false
validator_count: 0
network_params:
seconds_per_slot: 6
fulu_fork_epoch: 0
preset: "minimal"
additional_services:
- tx_fuzz
- spamoor
global_log_level: debug

151
scripts/tests/genesis-sync.sh Executable file
View File

@@ -0,0 +1,151 @@
#!/usr/bin/env bash
#
# Genesis sync test on a local network.
#
# Start a local testnet, shut down non-validator nodes for a period, then restart them
# and monitor their sync progress from genesis to head.
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
ENCLAVE_NAME=${1:-genesis-sync-testnet}
CONFIG=${2:-$SCRIPT_DIR/genesis-sync-config-electra.yaml}
FORK_TYPE=${3:-electra} # electra or fulu
OFFLINE_DURATION_SECS=${4:-120} # stopped duration of non validating nodes
# Test configuration
# ------------------------------------------------------
# Interval for polling the /lighthouse/syncing endpoint for sync status
# Reduce the polling time so that some progress can be seen
POLL_INTERVAL_SECS=0.5
# Timeout for this test, if the nodes fail to sync, fail the test.
TIMEOUT_MINS=5
TIMEOUT_SECS=$((TIMEOUT_MINS * 60))
# ------------------------------------------------------
echo "Starting genesis sync test with:"
echo " Fork: $FORK_TYPE"
echo " Offline duration: ${OFFLINE_DURATION_SECS}s"
# Polls a node's sync status
poll_node() {
local node_type=$1
local url=${node_urls[$node_type]}
response=$(curl -s "${url}/lighthouse/syncing" 2>/dev/null)
if [ -z "$response" ] || [ "$response" = "null" ]; then
echo "${node_type} status: No response or null response"
return
fi
# Print syncing status
sync_state=$(echo "$response" | jq -r 'if (.data | type) == "object" then "object" else "string" end' 2>/dev/null)
if [ "$sync_state" = "object" ]; then
status=$(echo "$response" | jq -r '.data | keys[0] // "Unknown"')
fields=$(echo "$response" | jq -r ".data.${status} | to_entries | map(\"\(.key): \(.value)\") | join(\", \")")
echo "${node_type} status: ${status}, ${fields}"
else
status=$(echo "$response" | jq -r '.data' 2>/dev/null)
echo "${node_type} status: ${status:-Unknown}"
# The test is complete when the node is synced
if [ "$status" = "Synced" ]; then
mark_node_complete "$node_type"
fi
fi
}
# Marks a node as complete and record time
mark_node_complete() {
local node_type=$1
if [ "${node_completed[$node_type]}" = false ]; then
node_completed[$node_type]=true
node_complete_time[$node_type]=$(date +%s)
echo "${node_type} completed sync in $((node_complete_time[$node_type] - sync_start_time)) seconds"
fi
}
exit_and_dump_logs() {
local exit_code=$1
echo "Shutting down..."
$SCRIPT_DIR/../local_testnet/stop_local_testnet.sh $ENCLAVE_NAME
echo "Test completed with exit code $exit_code."
exit $exit_code
}
# Start the nodes
$SCRIPT_DIR/../local_testnet/start_local_testnet.sh -e $ENCLAVE_NAME -b false -n $CONFIG
if [ $? -ne 0 ]; then
echo "Failed to start local testnet"
exit_and_dump_logs 1
fi
# Wait for 10s before stopping non-validating nodes
sleep 10
# These are non validating nodes
supernode="cl-3-lighthouse-geth"
fullnode="cl-4-lighthouse-geth"
# Stop the non-validator nodes
kurtosis service stop $ENCLAVE_NAME $supernode
kurtosis service stop $ENCLAVE_NAME $fullnode
echo "Non-validator nodes stopped. Waiting ${OFFLINE_DURATION_SECS} seconds..."
# Display the time every 10s when the nodes are stopped
remaining_time=$OFFLINE_DURATION_SECS
while [ $remaining_time -gt 0 ]; do
sleep 10
remaining_time=$((remaining_time - 10))
echo "Nodes are stopped for $((OFFLINE_DURATION_SECS - remaining_time))s, ${remaining_time}s remains..."
done
echo "Resuming non-validator nodes..."
# Resume the non validating nodes
kurtosis service start $ENCLAVE_NAME $supernode
kurtosis service start $ENCLAVE_NAME $fullnode
# The time at which syncing starts after the node was stopped
sync_start_time=$(date +%s)
# Get beacon API URLs for non validating nodes for query
supernode_url=$(kurtosis port print $ENCLAVE_NAME $supernode http)
fullnode_url=$(kurtosis port print $ENCLAVE_NAME $fullnode http)
# Initialize statuses
declare -A node_completed
declare -A node_complete_time
declare -A node_urls
node_urls["supernode"]="$supernode_url"
node_urls["fullnode"]="$fullnode_url"
node_completed["supernode"]=false
node_completed["fullnode"]=false
echo "Polling sync status until nodes are synced or timeout of ${TIMEOUT_MINS} mins"
while [ "${node_completed[supernode]}" = false ] || [ "${node_completed[fullnode]}" = false ]; do
current_time=$(date +%s)
elapsed=$((current_time - sync_start_time))
if [ "$elapsed" -ge "$TIMEOUT_SECS" ]; then
echo "ERROR: Nodes timed out syncing after ${TIMEOUT_MINS} minutes. Exiting."
exit_and_dump_logs 1
fi
# Poll each node that hasn't completed yet
for node in "supernode" "fullnode"; do
if [ "${node_completed[$node]}" = false ]; then
poll_node "$node"
fi
done
sleep $POLL_INTERVAL_SECS
done
echo "Genesis sync test complete! Both supernode and fullnode have synced successfully."
echo "Supernode time: $((node_complete_time[supernode] - sync_start_time)) seconds"
echo "Fullnode time: $((node_complete_time[fullnode] - sync_start_time)) seconds"
exit_and_dump_logs 0