mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-02 16:21:42 +00:00
#7603 #### Custody backfill sync service Similar in many ways to the current backfill service. There may be ways to unify the two services. The difficulty there is that the current backfill service tightly couples blocks and their associated blobs/data columns. Any attempts to unify the two services should be left to a separate PR in my opinion. #### `SyncNeworkContext` `SyncNetworkContext` manages custody sync data columns by range requests separetly from other sync RPC requests. I think this is a nice separation considering that custody backfill is its own service. #### Data column import logic The import logic verifies KZG committments and that the data columns block root matches the block root in the nodes store before importing columns #### New channel to send messages to `SyncManager` Now external services can communicate with the `SyncManager`. In this PR this channel is used to trigger a custody sync. Alternatively we may be able to use the existing `mpsc` channel that the `SyncNetworkContext` uses to communicate with the `SyncManager`. I will spend some time reviewing this. Co-Authored-By: Eitan Seri-Levi <eserilev@ucsc.edu> Co-Authored-By: Eitan Seri- Levi <eserilev@gmail.com> Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com>
129 lines
4.6 KiB
Bash
Executable File
129 lines
4.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# Checkpoint sync to a live network.
|
|
#
|
|
# Start with checkpoint sync and let the node(s) sync to head and perform backfill for a specified number of slots.
|
|
# This test ensures we cover all sync components (range, lookup, backfill) and measures sync speed
|
|
# to detect any performance regressions.
|
|
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
|
|
|
ENCLAVE_NAME=${1:-sync-testnet}
|
|
CONFIG=${2:-$SCRIPT_DIR/checkpoint-sync-config-sepolia.yaml}
|
|
|
|
# Test configuration
|
|
# ------------------------------------------------------
|
|
# Interval for polling the /lighthouse/syncing endpoint for sync status
|
|
POLL_INTERVAL_SECS=5
|
|
# Target number of slots to backfill to complete this test.
|
|
TARGET_BACKFILL_SLOTS=256
|
|
# Timeout for this test, if the node(s) fail to backfill `TARGET_BACKFILL_SLOTS` slots, fail the test.
|
|
TIMEOUT_MINS=10
|
|
TIMEOUT_SECS=$((TIMEOUT_MINS * 60))
|
|
# ------------------------------------------------------
|
|
|
|
# Polls a single node's sync status
|
|
poll_node() {
|
|
local node_type=$1
|
|
local url=${node_urls[$node_type]}
|
|
|
|
response=$(curl -s "${url}/lighthouse/syncing")
|
|
|
|
if [ -z "$response" ] || [ "$response" = "null" ]; then
|
|
echo "${node_type} status: No response or null response"
|
|
return
|
|
fi
|
|
|
|
# Print syncing status
|
|
sync_state=$(echo "$response" | jq -r 'if (.data | type) == "object" then "object" else "string" end' 2>/dev/null)
|
|
|
|
if [ "$sync_state" = "object" ]; then
|
|
status=$(echo "$response" | jq -r '.data | keys[0] // "Unknown"')
|
|
fields=$(echo "$response" | jq -r ".data.${status} | to_entries | map(\"\(.key): \(.value)\") | join(\", \")")
|
|
echo "${node_type} status: ${status}, ${fields}"
|
|
else
|
|
status=$(echo "$response" | jq -r '.data' 2>/dev/null)
|
|
echo "${node_type} status: ${status:-Unknown}"
|
|
fi
|
|
|
|
# Check for completion criteria
|
|
if [ "$status" = "BackFillSyncing" ]; then
|
|
completed=$(echo "$response" | jq -r ".data.${status}.completed // 0")
|
|
if [ "$completed" -ge "$TARGET_BACKFILL_SLOTS" ]; then
|
|
mark_node_complete "$node_type"
|
|
fi
|
|
fi
|
|
# For other states (Synced, SyncingFinalized, SyncingHead, SyncTransition, Stalled, Unknown),
|
|
# we continue polling
|
|
# NOTE: there is a bug where Lighthouse briefly switch to "Synced" before completing backfilling. We ignore this state
|
|
# as it's unlikely a node is fully synced without going through backfilling `TARGET_BACKFILL_SLOTS` slots (only
|
|
# possible on a new network).
|
|
}
|
|
|
|
# Marks a node as complete and record time
|
|
mark_node_complete() {
|
|
local node_type=$1
|
|
if [ "${node_completed[$node_type]}" = false ]; then
|
|
node_completed[$node_type]=true
|
|
node_complete_time[$node_type]=$(date +%s)
|
|
echo "${node_type} completed backfill in $((node_complete_time[$node_type] - start_time)) seconds"
|
|
fi
|
|
}
|
|
|
|
exit_and_dump_logs() {
|
|
local exit_code=$1
|
|
echo "Shutting down..."
|
|
$SCRIPT_DIR/../local_testnet/stop_local_testnet.sh $ENCLAVE_NAME
|
|
echo "Test completed with exit code $exit_code."
|
|
exit $exit_code
|
|
}
|
|
|
|
# Start the nodes
|
|
$SCRIPT_DIR/../local_testnet/start_local_testnet.sh -e $ENCLAVE_NAME -b false -n $CONFIG
|
|
if [ $? -ne 0 ]; then
|
|
echo "Failed to start local testnet"
|
|
exit_and_dump_logs 1
|
|
fi
|
|
|
|
start_time=$(date +%s)
|
|
|
|
# Get all beacon API URLs
|
|
supernode_url=$(kurtosis port print $ENCLAVE_NAME cl-1-lighthouse-geth http)
|
|
fullnode_url=$(kurtosis port print $ENCLAVE_NAME cl-2-lighthouse-geth http)
|
|
|
|
# Initialize statuses
|
|
declare -A node_completed
|
|
declare -A node_complete_time
|
|
declare -A node_urls
|
|
|
|
node_urls["supernode"]="$supernode_url"
|
|
node_urls["fullnode"]="$fullnode_url"
|
|
node_completed["supernode"]=false
|
|
node_completed["fullnode"]=false
|
|
|
|
echo "Polling sync status until backfill reaches ${TARGET_BACKFILL_SLOTS} slots or timeout of ${TIMEOUT_MINS} mins"
|
|
|
|
# while [ "${node_completed[supernode]}" = false ] || [ "${node_completed[fullnode]}" = false ]; do
|
|
while [ "${node_completed[fullnode]}" = false ]; do
|
|
current_time=$(date +%s)
|
|
elapsed=$((current_time - start_time))
|
|
|
|
if [ "$elapsed" -ge "$TIMEOUT_SECS" ]; then
|
|
echo "ERROR: Nodes timed out syncing after ${TIMEOUT_MINS} minutes. Exiting."
|
|
exit_and_dump_logs 1
|
|
fi
|
|
|
|
# Poll each node that hasn't completed yet
|
|
# for node in "supernode" "fullnode"; do
|
|
for node in "fullnode"; do
|
|
if [ "${node_completed[$node]}" = false ]; then
|
|
poll_node "$node"
|
|
fi
|
|
done
|
|
|
|
sleep $POLL_INTERVAL_SECS
|
|
done
|
|
|
|
echo "Sync test complete! Fullnode has synced to HEAD and backfilled ${TARGET_BACKFILL_SLOTS} slots."
|
|
# echo "Supernode time: $((node_complete_time[supernode] - start_time)) seconds"
|
|
echo "Fullnode time: $((node_complete_time[fullnode] - start_time)) seconds"
|
|
exit_and_dump_logs 0 |