Fix PeerDAS sync scoring (#7352)

* Remove request tracking inside syncing chains

* Prioritize by range peers in network context

* Prioritize custody peers for columns by range

* Explicit error handling of the no peers error case

* Remove good_peers_on_sampling_subnets

* Count AwaitingDownload towards the buffer limit

* Retry syncing chains in AwaitingDownload state

* Use same peer priorization for lookups

* Review PR

* Address TODOs

* Revert changes to peer erroring in range sync

* Revert metrics changes

* Update comment

* Pass peers_to_deprioritize to select_columns_by_range_peers_to_request

* more idiomatic

* Idiomatic while

* Add note about infinite loop

* Use while let

* Fix wrong custody column count for lookup blocks

* Remove impl

* Remove stale comment

* Fix build errors.

* Or default

* Review PR

* BatchPeerGroup

* Match block and blob signatures

* Explicit match statement to BlockError in range sync

* Remove todo in BatchPeerGroup

* Remove participating peers from backfill sync

* Remove MissingAllCustodyColumns error

* Merge fixes

* Clean up PR

* Consistent naming of batch_peers

* Address multiple review comments

* Better errors for das

* Penalize column peers once

* Restore fn

* Fix error enum

* Removed MismatchedPublicKeyLen

* Revert testing changes

* Change BlockAndCustodyColumns enum variant

* Revert type change in import_historical_block_batch

* Drop pubkey cache

* Don't collect Vec

* Classify errors

* Remove ReconstructColumnsError

* More detailed UnrequestedSlot error

* Lint test

* Fix slot conversion

* Reduce penalty for missing blobs

* Revert changes in peer selection

* Lint tests

* Rename block matching functions

* Reorder block matching in historical blocks

* Fix order of block matching

* Add store tests

* Filter blockchain in assert_correct_historical_block_chain

* Also filter before KZG checks

* Lint tests

* Fix lint

* Fix fulu err assertion

* Check point is not at infinity

* Fix ws sync test

* Revert dropping filter fn

---------

Co-authored-by: Jimmy Chen <jchen.tc@gmail.com>
Co-authored-by: Jimmy Chen <jimmy@sigmaprime.io>
Co-authored-by: Pawan Dhananjay <pawandhananjay@gmail.com>
This commit is contained in:
Lion - dapplion
2025-05-21 08:06:42 -05:00
committed by GitHub
parent f06d1d0346
commit b014675b7a
27 changed files with 1103 additions and 654 deletions

View File

@@ -3,7 +3,8 @@
use beacon_chain::attestation_verification::Error as AttnError;
use beacon_chain::block_verification_types::RpcBlock;
use beacon_chain::builder::BeaconChainBuilder;
use beacon_chain::data_availability_checker::AvailableBlock;
use beacon_chain::data_availability_checker::{AvailableBlock, AvailableBlockData};
use beacon_chain::data_column_verification::CustodyDataColumn;
use beacon_chain::schema_change::migrate_schema;
use beacon_chain::test_utils::SyncCommitteeStrategy;
use beacon_chain::test_utils::{
@@ -11,9 +12,11 @@ use beacon_chain::test_utils::{
BlockStrategy, DiskHarnessType,
};
use beacon_chain::{
data_availability_checker::MaybeAvailableBlock, historical_blocks::HistoricalBlockError,
migrate::MigratorConfig, BeaconChain, BeaconChainError, BeaconChainTypes, BeaconSnapshot,
BlockError, ChainConfig, NotifyExecutionLayer, ServerSentEventHandler, WhenSlotSkipped,
data_availability_checker::{AvailabilityCheckError, MaybeAvailableBlock},
historical_blocks::HistoricalBlockError,
migrate::MigratorConfig,
BeaconChain, BeaconChainError, BeaconChainTypes, BeaconSnapshot, BlockError, ChainConfig,
NotifyExecutionLayer, ServerSentEventHandler, WhenSlotSkipped,
};
use logging::create_test_tracing_subscriber;
use maplit::hashset;
@@ -33,6 +36,7 @@ use store::{
BlobInfo, DBColumn, HotColdDB, StoreConfig,
};
use tempfile::{tempdir, TempDir};
use tracing::info;
use types::test_utils::{SeedableRng, XorShiftRng};
use types::*;
@@ -2339,6 +2343,7 @@ async fn weak_subjectivity_sync_test(slots: Vec<Slot>, checkpoint_slot: Slot) {
let store = get_store(&temp2);
let spec = test_spec::<E>();
let seconds_per_slot = spec.seconds_per_slot;
let wss_fork = harness.spec.fork_name_at_slot::<E>(checkpoint_slot);
let kzg = get_kzg(&spec);
@@ -2499,12 +2504,154 @@ async fn weak_subjectivity_sync_test(slots: Vec<Slot>, checkpoint_slot: Slot) {
};
// Importing the invalid batch should error.
assert!(matches!(
beacon_chain
.import_historical_block_batch(batch_with_invalid_first_block)
.unwrap_err(),
HistoricalBlockError::InvalidSignature
));
let err = beacon_chain
.import_historical_block_batch(batch_with_invalid_first_block)
.unwrap_err();
match err {
HistoricalBlockError::InvalidSignature(_) => {} // ok
e => panic!("Unexpected error {e:?}"),
}
if wss_fork.deneb_enabled() {
// Currently ExecutionBlockGenerator::build_new_execution_payload doesn't accept a parameter
// to generate a fixed number of blob TXs, so it's random. Given the large number of blocks
// in this batch it's very unlikely that no block has data, but it's probable that's it's
// not index 0, so we need to find the first block with data.
let first_block_with_data = available_blocks
.iter()
.position(|block| block.block().num_expected_blobs() > 0)
.expect("No blocks have data, try different RNG");
// Test 1: Invalidate sidecar header signature
let mut batch_with_invalid_header = available_blocks.to_vec();
batch_with_invalid_header[first_block_with_data] = {
let (block_root, block, block_data) = batch_with_invalid_header[first_block_with_data]
.clone()
.deconstruct();
if wss_fork.fulu_enabled() {
info!(block_slot = %block.slot(), ?block_root, "Corrupting data column header signature");
let AvailableBlockData::DataColumns(mut data_columns) = block_data else {
panic!("no columns")
};
assert!(
!data_columns.is_empty(),
"data column sidecars shouldn't be empty"
);
let mut data_column = (*data_columns[0]).clone();
data_column.signed_block_header.signature = Signature::empty();
data_columns[0] = data_column.into();
AvailableBlock::__new_for_testing(
block_root,
block,
AvailableBlockData::DataColumns(data_columns),
beacon_chain.spec.clone(),
)
} else {
info!(block_slot = %block.slot(), ?block_root, "Corrupting blob header signature");
let AvailableBlockData::Blobs(mut blobs) = block_data else {
let blocks_have_blobs = available_blocks
.into_iter()
.map(|block| (block.block().slot(), block.has_blobs()))
.collect::<Vec<_>>();
panic!(
"no blobs at block {:?} {}. blocks_have_blobs {:?}",
block_root,
block.slot(),
blocks_have_blobs
);
};
assert!(!blobs.is_empty(), "blob sidecars shouldn't be empty");
let mut blob = (*blobs[0]).clone();
blob.signed_block_header.signature = Signature::empty();
blobs[0] = blob.into();
AvailableBlock::__new_for_testing(
block_root,
block,
AvailableBlockData::Blobs(blobs),
beacon_chain.spec.clone(),
)
}
};
// Importing the invalid batch should error.
let err = beacon_chain
.import_historical_block_batch(batch_with_invalid_header)
.unwrap_err();
if wss_fork.fulu_enabled() {
match err {
HistoricalBlockError::InvalidDataColumnsSignature(_) => {} // ok
e => panic!("Unexpected error {e:?}"),
}
} else {
match err {
HistoricalBlockError::InvalidBlobsSignature(_) => {} // ok
e => panic!("Unexpected error {e:?}"),
}
}
// Test 2: invalidate KZG proof
let mut batch_with_invalid_kzg = available_blocks
.iter()
.map(|block| available_to_rpc_block(block.clone(), &harness.spec))
.collect::<Vec<_>>();
batch_with_invalid_kzg[first_block_with_data] = {
let (block_root, block, blobs, cols) = batch_with_invalid_kzg[first_block_with_data]
.clone()
.deconstruct();
if wss_fork.fulu_enabled() {
info!(block_slot = %block.slot(), ?block_root, "Corrupting data column KZG proof");
let (mut data_columns, expected_column_indices) = cols.unwrap();
assert!(
!data_columns.is_empty(),
"data column sidecars shouldn't be empty"
);
let mut data_column = (*(data_columns[0]).clone_arc()).clone();
if data_column.kzg_proofs[0] == KzgProof::empty() {
panic!("kzg_proof is already G1_POINT_AT_INFINITY")
}
data_column.kzg_proofs[0] = KzgProof::empty();
data_columns[0] = CustodyDataColumn::from_asserted_custody(data_column.into());
RpcBlock::new_with_custody_columns(
Some(block_root),
block,
data_columns.to_vec(),
expected_column_indices,
&harness.spec,
)
.unwrap()
} else {
info!(block_slot = %block.slot(), ?block_root, "Corrupting blob KZG proof");
let mut blobs = blobs.unwrap();
assert!(!blobs.is_empty(), "blob sidecars shouldn't be empty");
let mut blob = (*blobs[0]).clone();
blob.kzg_proof = KzgProof::empty();
blobs[0] = blob.into();
RpcBlock::new(Some(block_root), block, Some(blobs)).unwrap()
}
};
let err = beacon_chain
.verify_and_import_historical_block_batch(batch_with_invalid_kzg)
.unwrap_err();
if wss_fork.fulu_enabled() {
match err {
HistoricalBlockError::AvailabilityCheckError(
AvailabilityCheckError::InvalidColumn(_),
) => {} // ok
e => panic!("Unexpected error {e:?}"),
}
} else {
match err {
HistoricalBlockError::AvailabilityCheckError(
AvailabilityCheckError::InvalidBlobs(_),
) => {} // ok
e => panic!("Unexpected error {e:?}"),
}
}
}
// Importing the batch with valid signatures should succeed.
let available_blocks_dup = available_blocks.iter().map(clone_block).collect::<Vec<_>>();
@@ -3678,5 +3825,27 @@ fn get_blocks(
}
fn clone_block<E: EthSpec>(block: &AvailableBlock<E>) -> AvailableBlock<E> {
block.__clone_without_recv().unwrap()
block.clone()
}
fn available_to_rpc_block<E: EthSpec>(block: AvailableBlock<E>, spec: &ChainSpec) -> RpcBlock<E> {
let (block_root, block, block_data) = block.deconstruct();
match block_data {
AvailableBlockData::NoData => RpcBlock::new(Some(block_root), block, None).unwrap(),
AvailableBlockData::Blobs(blobs) => {
RpcBlock::new(Some(block_root), block, Some(blobs)).unwrap()
}
AvailableBlockData::DataColumns(data_columns) => RpcBlock::new_with_custody_columns(
Some(block_root),
block,
data_columns
.into_iter()
.map(|d| CustodyDataColumn::from_asserted_custody(d))
.collect(),
vec![],
spec,
)
.unwrap(),
}
}