mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-03 16:51:41 +00:00
Compare commits
37 Commits
unstable
...
peerdas-ra
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb5f76f137 | ||
|
|
aa726cc72c | ||
|
|
56fcf289ec | ||
|
|
8c8a8124ee | ||
|
|
a7a3457def | ||
|
|
895ce18343 | ||
|
|
82c8e82fe1 | ||
|
|
e426e45455 | ||
|
|
4e13b3be0f | ||
|
|
7a03578795 | ||
|
|
dbce5f7734 | ||
|
|
28d9d8b8e2 | ||
|
|
ffc8321f9d | ||
|
|
6f754bfd8d | ||
|
|
4fadf1fba8 | ||
|
|
ae0ef8f929 | ||
|
|
2b4a9bda44 | ||
|
|
b9ce98a3e5 | ||
|
|
1b72871ad1 | ||
|
|
42ef88bdb4 | ||
|
|
c6b39e9e10 | ||
|
|
02d97377a5 | ||
|
|
144b83e625 | ||
|
|
0ef95dd7f8 | ||
|
|
fc3922f854 | ||
|
|
52722b7b2e | ||
|
|
86ad87eced | ||
|
|
8f74adc66f | ||
|
|
34b37b97ed | ||
|
|
01329ab230 | ||
|
|
c8a0c9e379 | ||
|
|
7d0fb93274 | ||
|
|
b383f7af53 | ||
|
|
a85d863fb6 | ||
|
|
801659d4ae | ||
|
|
4fb2ae658a | ||
|
|
b014675b7a |
@@ -94,6 +94,7 @@ use store::{Error as DBError, HotStateSummary, KeyValueStore, StoreOp};
|
||||
use strum::AsRefStr;
|
||||
use task_executor::JoinHandle;
|
||||
use tracing::{debug, error};
|
||||
use types::ColumnIndex;
|
||||
use types::{
|
||||
data_column_sidecar::DataColumnSidecarError, BeaconBlockRef, BeaconState, BeaconStateError,
|
||||
BlobsList, ChainSpec, DataColumnSidecarList, Epoch, EthSpec, ExecutionBlockHash, FullPayload,
|
||||
@@ -220,6 +221,10 @@ pub enum BlockError {
|
||||
///
|
||||
/// The block is invalid and the peer is faulty.
|
||||
InvalidSignature(InvalidSignature),
|
||||
/// One or more signatures in a BlobSidecar of an RpcBlock are invalid
|
||||
InvalidBlobsSignature(Vec<u64>),
|
||||
/// One or more signatures in a DataColumnSidecar of an RpcBlock are invalid
|
||||
InvalidDataColumnsSignature(Vec<ColumnIndex>),
|
||||
/// The provided block is not from a later slot than its parent.
|
||||
///
|
||||
/// ## Peer scoring
|
||||
@@ -634,6 +639,34 @@ pub fn signature_verify_chain_segment<T: BeaconChainTypes>(
|
||||
&chain.spec,
|
||||
)?;
|
||||
|
||||
// Verify signatures before matching blocks and data. Otherwise we may penalize blob or column
|
||||
// peers for valid signatures if the block peer sends us an invalid signature.
|
||||
let pubkey_cache = get_validator_pubkey_cache(chain)?;
|
||||
let mut signature_verifier = get_signature_verifier(&state, &pubkey_cache, &chain.spec);
|
||||
for (block_root, block) in &chain_segment {
|
||||
let mut consensus_context =
|
||||
ConsensusContext::new(block.slot()).set_current_block_root(*block_root);
|
||||
signature_verifier.include_all_signatures(block.as_block(), &mut consensus_context)?;
|
||||
}
|
||||
if signature_verifier.verify().is_err() {
|
||||
return Err(BlockError::InvalidSignature(InvalidSignature::Unknown));
|
||||
}
|
||||
drop(pubkey_cache);
|
||||
|
||||
// Verify that blobs or data columns signatures match
|
||||
//
|
||||
// TODO(das): Should check correct proposer cheap for added protection if blocks and columns
|
||||
// don't match. This code attributes fault to the blobs / data columns if they don't match the
|
||||
// block
|
||||
for (_, block) in &chain_segment {
|
||||
if let Err(indices) = block.match_block_and_blobs() {
|
||||
return Err(BlockError::InvalidBlobsSignature(indices));
|
||||
}
|
||||
if let Err(indices) = block.match_block_and_data_columns() {
|
||||
return Err(BlockError::InvalidDataColumnsSignature(indices));
|
||||
}
|
||||
}
|
||||
|
||||
// unzip chain segment and verify kzg in bulk
|
||||
let (roots, blocks): (Vec<_>, Vec<_>) = chain_segment.into_iter().unzip();
|
||||
let maybe_available_blocks = chain
|
||||
@@ -655,20 +688,6 @@ pub fn signature_verify_chain_segment<T: BeaconChainTypes>(
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// verify signatures
|
||||
let pubkey_cache = get_validator_pubkey_cache(chain)?;
|
||||
let mut signature_verifier = get_signature_verifier(&state, &pubkey_cache, &chain.spec);
|
||||
for svb in &mut signature_verified_blocks {
|
||||
signature_verifier
|
||||
.include_all_signatures(svb.block.as_block(), &mut svb.consensus_context)?;
|
||||
}
|
||||
|
||||
if signature_verifier.verify().is_err() {
|
||||
return Err(BlockError::InvalidSignature(InvalidSignature::Unknown));
|
||||
}
|
||||
|
||||
drop(pubkey_cache);
|
||||
|
||||
if let Some(signature_verified_block) = signature_verified_blocks.first_mut() {
|
||||
signature_verified_block.parent = Some(parent);
|
||||
}
|
||||
|
||||
@@ -9,8 +9,9 @@ use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
use types::blob_sidecar::BlobIdentifier;
|
||||
use types::{
|
||||
BeaconBlockRef, BeaconState, BlindedPayload, BlobSidecarList, ChainSpec, Epoch, EthSpec,
|
||||
Hash256, RuntimeVariableList, SignedBeaconBlock, SignedBeaconBlockHeader, Slot,
|
||||
BeaconBlockRef, BeaconState, BlindedPayload, BlobSidecarList, ChainSpec, ColumnIndex,
|
||||
DataColumnSidecar, Epoch, EthSpec, Hash256, RuntimeVariableList, SignedBeaconBlock,
|
||||
SignedBeaconBlockHeader, Slot,
|
||||
};
|
||||
|
||||
/// A block that has been received over RPC. It has 2 internal variants:
|
||||
@@ -48,7 +49,7 @@ impl<E: EthSpec> RpcBlock<E> {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(block) => block,
|
||||
RpcBlockInner::BlockAndBlobs(block, _) => block,
|
||||
RpcBlockInner::BlockAndCustodyColumns(block, _) => block,
|
||||
RpcBlockInner::BlockAndCustodyColumns { block, .. } => block,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,7 +57,7 @@ impl<E: EthSpec> RpcBlock<E> {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(block) => block.clone(),
|
||||
RpcBlockInner::BlockAndBlobs(block, _) => block.clone(),
|
||||
RpcBlockInner::BlockAndCustodyColumns(block, _) => block.clone(),
|
||||
RpcBlockInner::BlockAndCustodyColumns { block, .. } => block.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,7 +65,7 @@ impl<E: EthSpec> RpcBlock<E> {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(_) => None,
|
||||
RpcBlockInner::BlockAndBlobs(_, blobs) => Some(blobs),
|
||||
RpcBlockInner::BlockAndCustodyColumns(_, _) => None,
|
||||
RpcBlockInner::BlockAndCustodyColumns { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +73,36 @@ impl<E: EthSpec> RpcBlock<E> {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(_) => None,
|
||||
RpcBlockInner::BlockAndBlobs(_, _) => None,
|
||||
RpcBlockInner::BlockAndCustodyColumns(_, data_columns) => Some(data_columns),
|
||||
RpcBlockInner::BlockAndCustodyColumns { data_columns, .. } => Some(data_columns),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns Err if any of its inner BlobSidecar's signed_block_header does not match the inner
|
||||
/// block
|
||||
pub fn match_block_and_blobs(&self) -> Result<(), Vec<u64>> {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(_) => Ok(()),
|
||||
RpcBlockInner::BlockAndBlobs(block, blobs) => match_block_and_blobs(block, blobs),
|
||||
RpcBlockInner::BlockAndCustodyColumns { .. } => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns Err if any of its inner DataColumnSidecar's signed_block_header does not match the
|
||||
/// inner block
|
||||
pub fn match_block_and_data_columns(&self) -> Result<(), Vec<ColumnIndex>> {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(_) => Ok(()),
|
||||
RpcBlockInner::BlockAndBlobs(..) => Ok(()),
|
||||
RpcBlockInner::BlockAndCustodyColumns {
|
||||
block,
|
||||
data_columns,
|
||||
..
|
||||
} => match_block_and_data_columns(
|
||||
block,
|
||||
data_columns
|
||||
.iter()
|
||||
.map(|data_column| data_column.as_data_column()),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -83,14 +113,19 @@ impl<E: EthSpec> RpcBlock<E> {
|
||||
#[derive(Debug, Clone, Derivative)]
|
||||
#[derivative(Hash(bound = "E: EthSpec"))]
|
||||
enum RpcBlockInner<E: EthSpec> {
|
||||
/// Single block lookup response. This should potentially hit the data availability cache.
|
||||
/// **Range sync**: Variant for all pre-Deneb blocks
|
||||
/// **Lookup sync**: Variant used for all blocks of all forks, regardless if the have data or
|
||||
/// not
|
||||
Block(Arc<SignedBeaconBlock<E>>),
|
||||
/// This variant is used with parent lookups and by-range responses. It should have all blobs
|
||||
/// ordered, all block roots matching, and the correct number of blobs for this block.
|
||||
/// **Range sync**: Variant for all post-Deneb blocks regardless if they have data or not
|
||||
/// **Lookup sync**: Not used
|
||||
BlockAndBlobs(Arc<SignedBeaconBlock<E>>, BlobSidecarList<E>),
|
||||
/// This variant is used with parent lookups and by-range responses. It should have all
|
||||
/// requested data columns, all block roots matching for this block.
|
||||
BlockAndCustodyColumns(Arc<SignedBeaconBlock<E>>, CustodyDataColumnList<E>),
|
||||
/// **Range sync**: Variant for all post-Fulu blocks regardless if they have data or not
|
||||
/// **Lookup sync**: Not used
|
||||
BlockAndCustodyColumns {
|
||||
block: Arc<SignedBeaconBlock<E>>,
|
||||
data_columns: CustodyDataColumnList<E>,
|
||||
},
|
||||
}
|
||||
|
||||
impl<E: EthSpec> RpcBlock<E> {
|
||||
@@ -153,21 +188,16 @@ impl<E: EthSpec> RpcBlock<E> {
|
||||
block: Arc<SignedBeaconBlock<E>>,
|
||||
custody_columns: Vec<CustodyDataColumn<E>>,
|
||||
spec: &ChainSpec,
|
||||
) -> Result<Self, AvailabilityCheckError> {
|
||||
) -> Result<Self, String> {
|
||||
let block_root = block_root.unwrap_or_else(|| get_block_root(&block));
|
||||
|
||||
if block.num_expected_blobs() > 0 && custody_columns.is_empty() {
|
||||
// The number of required custody columns is out of scope here.
|
||||
return Err(AvailabilityCheckError::MissingCustodyColumns);
|
||||
}
|
||||
// Treat empty data column lists as if they are missing.
|
||||
let inner = if !custody_columns.is_empty() {
|
||||
RpcBlockInner::BlockAndCustodyColumns(
|
||||
block,
|
||||
RuntimeVariableList::new(custody_columns, spec.number_of_columns as usize)?,
|
||||
let inner = RpcBlockInner::BlockAndCustodyColumns {
|
||||
block,
|
||||
data_columns: RuntimeVariableList::new(
|
||||
custody_columns,
|
||||
spec.number_of_columns as usize,
|
||||
)
|
||||
} else {
|
||||
RpcBlockInner::Block(block)
|
||||
.map_err(|e| format!("custody_columns len exceeds number_of_columns: {e:?}"))?,
|
||||
};
|
||||
Ok(Self {
|
||||
block_root,
|
||||
@@ -188,21 +218,22 @@ impl<E: EthSpec> RpcBlock<E> {
|
||||
match self.block {
|
||||
RpcBlockInner::Block(block) => (block_root, block, None, None),
|
||||
RpcBlockInner::BlockAndBlobs(block, blobs) => (block_root, block, Some(blobs), None),
|
||||
RpcBlockInner::BlockAndCustodyColumns(block, data_columns) => {
|
||||
(block_root, block, None, Some(data_columns))
|
||||
}
|
||||
RpcBlockInner::BlockAndCustodyColumns {
|
||||
block,
|
||||
data_columns,
|
||||
} => (block_root, block, None, Some(data_columns)),
|
||||
}
|
||||
}
|
||||
pub fn n_blobs(&self) -> usize {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(_) | RpcBlockInner::BlockAndCustodyColumns(_, _) => 0,
|
||||
RpcBlockInner::Block(_) | RpcBlockInner::BlockAndCustodyColumns { .. } => 0,
|
||||
RpcBlockInner::BlockAndBlobs(_, blobs) => blobs.len(),
|
||||
}
|
||||
}
|
||||
pub fn n_data_columns(&self) -> usize {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(_) | RpcBlockInner::BlockAndBlobs(_, _) => 0,
|
||||
RpcBlockInner::BlockAndCustodyColumns(_, data_columns) => data_columns.len(),
|
||||
RpcBlockInner::BlockAndCustodyColumns { data_columns, .. } => data_columns.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -512,17 +543,50 @@ impl<E: EthSpec> AsBlock<E> for RpcBlock<E> {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(block) => block,
|
||||
RpcBlockInner::BlockAndBlobs(block, _) => block,
|
||||
RpcBlockInner::BlockAndCustodyColumns(block, _) => block,
|
||||
RpcBlockInner::BlockAndCustodyColumns { block, .. } => block,
|
||||
}
|
||||
}
|
||||
fn block_cloned(&self) -> Arc<SignedBeaconBlock<E>> {
|
||||
match &self.block {
|
||||
RpcBlockInner::Block(block) => block.clone(),
|
||||
RpcBlockInner::BlockAndBlobs(block, _) => block.clone(),
|
||||
RpcBlockInner::BlockAndCustodyColumns(block, _) => block.clone(),
|
||||
RpcBlockInner::BlockAndCustodyColumns { block, .. } => block.clone(),
|
||||
}
|
||||
}
|
||||
fn canonical_root(&self) -> Hash256 {
|
||||
self.as_block().canonical_root()
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns Err if any of `blobs` BlobSidecar's signed_block_header does not match
|
||||
/// block
|
||||
pub fn match_block_and_blobs<E: EthSpec>(
|
||||
block: &SignedBeaconBlock<E>,
|
||||
blobs: &BlobSidecarList<E>,
|
||||
) -> Result<(), Vec<u64>> {
|
||||
let indices = blobs
|
||||
.iter()
|
||||
.filter(|blob| &blob.signed_block_header.signature != block.signature())
|
||||
.map(|blob| blob.index)
|
||||
.collect::<Vec<_>>();
|
||||
if indices.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(indices)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn match_block_and_data_columns<'a, E: EthSpec>(
|
||||
block: &SignedBeaconBlock<E>,
|
||||
data_columns: impl Iterator<Item = &'a Arc<DataColumnSidecar<E>>>,
|
||||
) -> Result<(), Vec<ColumnIndex>> {
|
||||
let indices = data_columns
|
||||
.filter(|column| &column.signed_block_header.signature != block.signature())
|
||||
.map(|column| column.index)
|
||||
.collect::<Vec<_>>();
|
||||
if indices.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(indices)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use crate::blob_verification::{verify_kzg_for_blob_list, GossipVerifiedBlob, KzgVerifiedBlobList};
|
||||
use crate::block_verification_types::{
|
||||
AvailabilityPendingExecutedBlock, AvailableExecutedBlock, RpcBlock,
|
||||
match_block_and_blobs, match_block_and_data_columns, AsBlock, AvailabilityPendingExecutedBlock,
|
||||
AvailableExecutedBlock, RpcBlock,
|
||||
};
|
||||
use crate::data_availability_checker::overflow_lru_cache::{
|
||||
DataAvailabilityCheckerInner, ReconstructColumnsDecision,
|
||||
@@ -8,6 +9,7 @@ use crate::data_availability_checker::overflow_lru_cache::{
|
||||
use crate::{metrics, BeaconChain, BeaconChainTypes, BeaconStore, CustodyContext};
|
||||
use kzg::Kzg;
|
||||
use slot_clock::SlotClock;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
use std::fmt::Debug;
|
||||
use std::num::NonZeroUsize;
|
||||
@@ -17,8 +19,8 @@ use task_executor::TaskExecutor;
|
||||
use tracing::{debug, error, info_span, Instrument};
|
||||
use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList};
|
||||
use types::{
|
||||
BlobSidecarList, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256,
|
||||
RuntimeVariableList, SignedBeaconBlock,
|
||||
BlobSidecarList, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch,
|
||||
EthSpec, Hash256, SignedBeaconBlock,
|
||||
};
|
||||
|
||||
mod error;
|
||||
@@ -387,14 +389,15 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
|
||||
let all_data_columns = blocks
|
||||
.iter()
|
||||
// TODO(das): we may want to remove this line. If columns are present they should be
|
||||
// verified. The outcome of `data_columns_required_for_block` is time dependant. So we
|
||||
// may end up importing data columns that are not verified.
|
||||
.filter(|block| self.data_columns_required_for_block(block.as_block()))
|
||||
// this clone is cheap as it's cloning an Arc
|
||||
.filter_map(|block| block.custody_columns().cloned())
|
||||
.flatten()
|
||||
.map(CustodyDataColumn::into_inner)
|
||||
.collect::<Vec<_>>();
|
||||
let all_data_columns =
|
||||
RuntimeVariableList::from_vec(all_data_columns, self.spec.number_of_columns as usize);
|
||||
|
||||
// verify kzg for all data columns at once
|
||||
if !all_data_columns.is_empty() {
|
||||
@@ -403,6 +406,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
.map_err(AvailabilityCheckError::InvalidColumn)?;
|
||||
}
|
||||
|
||||
// TODO(das): we could do the matching first before spending CPU cycles on KZG verification
|
||||
for block in blocks {
|
||||
let (block_root, block, blobs, data_columns) = block.deconstruct();
|
||||
|
||||
@@ -420,6 +424,21 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
}
|
||||
} else if self.data_columns_required_for_block(&block) {
|
||||
if let Some(data_columns) = data_columns {
|
||||
let received_indices =
|
||||
HashSet::<ColumnIndex>::from_iter(data_columns.iter().map(|d| d.index()));
|
||||
|
||||
let expected_custody_indices = self
|
||||
.custody_context
|
||||
.sampling_size(Some(block.epoch()), &self.spec);
|
||||
|
||||
if expected_custody_indices != received_indices.len() as u64 {
|
||||
// FIXME: da checker does not have the exact columns
|
||||
// Maybe we can move this logic to network?
|
||||
return Err(AvailabilityCheckError::MissingCustodyColumns(
|
||||
received_indices.into_iter().collect::<Vec<_>>(),
|
||||
));
|
||||
}
|
||||
|
||||
MaybeAvailableBlock::Available(AvailableBlock {
|
||||
block_root,
|
||||
block,
|
||||
@@ -430,7 +449,12 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
spec: self.spec.clone(),
|
||||
})
|
||||
} else {
|
||||
MaybeAvailableBlock::AvailabilityPending { block_root, block }
|
||||
// This is unreachable. If a block returns true for
|
||||
// `data_columns_required_for_block` it must be a Fulu block. All Fulu RpcBlocks
|
||||
// are constructed with the `DataColumns` variant, so `data_columns` must be Some
|
||||
return Err(AvailabilityCheckError::Unexpected(
|
||||
"Data columns should be Some for a Fulu block".to_string(),
|
||||
));
|
||||
}
|
||||
} else {
|
||||
MaybeAvailableBlock::Available(AvailableBlock {
|
||||
@@ -539,7 +563,7 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
|
||||
self.availability_cache
|
||||
.handle_reconstruction_failure(block_root);
|
||||
metrics::inc_counter(&KZG_DATA_COLUMN_RECONSTRUCTION_FAILURES);
|
||||
AvailabilityCheckError::ReconstructColumnsError(e)
|
||||
AvailabilityCheckError::Unexpected(format!("Error reconstructing columns: {e:?}"))
|
||||
})?;
|
||||
|
||||
// Check indices from cache again to make sure we don't publish components we've already received.
|
||||
@@ -681,7 +705,7 @@ async fn availability_cache_maintenance_service<T: BeaconChainTypes>(
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum AvailableBlockData<E: EthSpec> {
|
||||
/// Block is pre-Deneb or has zero blobs
|
||||
NoData,
|
||||
@@ -692,7 +716,7 @@ pub enum AvailableBlockData<E: EthSpec> {
|
||||
}
|
||||
|
||||
/// A fully available block that is ready to be imported into fork choice.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AvailableBlock<E: EthSpec> {
|
||||
block_root: Hash256,
|
||||
block: Arc<SignedBeaconBlock<E>>,
|
||||
@@ -752,21 +776,26 @@ impl<E: EthSpec> AvailableBlock<E> {
|
||||
(block_root, block, blob_data)
|
||||
}
|
||||
|
||||
/// Only used for testing
|
||||
pub fn __clone_without_recv(&self) -> Result<Self, String> {
|
||||
Ok(Self {
|
||||
block_root: self.block_root,
|
||||
block: self.block.clone(),
|
||||
blob_data: match &self.blob_data {
|
||||
AvailableBlockData::NoData => AvailableBlockData::NoData,
|
||||
AvailableBlockData::Blobs(blobs) => AvailableBlockData::Blobs(blobs.clone()),
|
||||
AvailableBlockData::DataColumns(data_columns) => {
|
||||
AvailableBlockData::DataColumns(data_columns.clone())
|
||||
}
|
||||
},
|
||||
blobs_available_timestamp: self.blobs_available_timestamp,
|
||||
spec: self.spec.clone(),
|
||||
})
|
||||
/// Returns Err if any of its inner BlobSidecar's signed_block_header does not match the inner
|
||||
/// block
|
||||
pub fn match_block_and_blobs(&self) -> Result<(), Vec<u64>> {
|
||||
match &self.blob_data {
|
||||
AvailableBlockData::NoData => Ok(()),
|
||||
AvailableBlockData::Blobs(blobs) => match_block_and_blobs(&self.block, blobs),
|
||||
AvailableBlockData::DataColumns(_) => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns Err if any of its inner DataColumnSidecar's signed_block_header does not match the
|
||||
/// inner block
|
||||
pub fn match_block_and_data_columns(&self) -> Result<(), Vec<ColumnIndex>> {
|
||||
match &self.blob_data {
|
||||
AvailableBlockData::NoData => Ok(()),
|
||||
AvailableBlockData::Blobs(_) => Ok(()),
|
||||
AvailableBlockData::DataColumns(data_columns) => {
|
||||
match_block_and_data_columns(&self.block, data_columns.iter())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,24 +1,20 @@
|
||||
use kzg::{Error as KzgError, KzgCommitment};
|
||||
use types::{BeaconStateError, ColumnIndex, Hash256};
|
||||
use types::{BeaconStateError, ColumnIndex};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
InvalidBlobs(KzgError),
|
||||
InvalidColumn(Vec<(ColumnIndex, KzgError)>),
|
||||
ReconstructColumnsError(KzgError),
|
||||
KzgCommitmentMismatch {
|
||||
blob_commitment: KzgCommitment,
|
||||
block_commitment: KzgCommitment,
|
||||
},
|
||||
Unexpected(String),
|
||||
SszTypes(ssz_types::Error),
|
||||
MissingBlobs,
|
||||
MissingCustodyColumns,
|
||||
MissingCustodyColumns(Vec<ColumnIndex>),
|
||||
BlobIndexInvalid(u64),
|
||||
DataColumnIndexInvalid(u64),
|
||||
StoreError(store::Error),
|
||||
DecodeError(ssz::DecodeError),
|
||||
ParentStateMissing(Hash256),
|
||||
BlockReplayError(state_processing::BlockReplayError),
|
||||
RebuildingStateCaches(BeaconStateError),
|
||||
SlotClockError,
|
||||
@@ -35,19 +31,15 @@ pub enum ErrorCategory {
|
||||
impl Error {
|
||||
pub fn category(&self) -> ErrorCategory {
|
||||
match self {
|
||||
Error::SszTypes(_)
|
||||
| Error::MissingBlobs
|
||||
| Error::MissingCustodyColumns
|
||||
| Error::StoreError(_)
|
||||
| Error::DecodeError(_)
|
||||
Error::StoreError(_)
|
||||
| Error::Unexpected(_)
|
||||
| Error::ParentStateMissing(_)
|
||||
| Error::BlockReplayError(_)
|
||||
| Error::RebuildingStateCaches(_)
|
||||
| Error::SlotClockError => ErrorCategory::Internal,
|
||||
Error::InvalidBlobs { .. }
|
||||
Error::MissingBlobs
|
||||
| Error::MissingCustodyColumns(_)
|
||||
| Error::InvalidBlobs { .. }
|
||||
| Error::InvalidColumn { .. }
|
||||
| Error::ReconstructColumnsError { .. }
|
||||
| Error::BlobIndexInvalid(_)
|
||||
| Error::DataColumnIndexInvalid(_)
|
||||
| Error::KzgCommitmentMismatch { .. } => ErrorCategory::Malicious,
|
||||
@@ -55,24 +47,12 @@ impl Error {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ssz_types::Error> for Error {
|
||||
fn from(value: ssz_types::Error) -> Self {
|
||||
Self::SszTypes(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<store::Error> for Error {
|
||||
fn from(value: store::Error) -> Self {
|
||||
Self::StoreError(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ssz::DecodeError> for Error {
|
||||
fn from(value: ssz::DecodeError) -> Self {
|
||||
Self::DecodeError(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<state_processing::BlockReplayError> for Error {
|
||||
fn from(value: state_processing::BlockReplayError) -> Self {
|
||||
Self::BlockReplayError(value)
|
||||
|
||||
@@ -150,9 +150,9 @@ impl<T: BeaconChainTypes> StateLRUCache<T> {
|
||||
parent_block_state_root,
|
||||
)
|
||||
.map_err(AvailabilityCheckError::StoreError)?
|
||||
.ok_or(AvailabilityCheckError::ParentStateMissing(
|
||||
parent_block_state_root,
|
||||
))?;
|
||||
.ok_or(AvailabilityCheckError::Unexpected(format!(
|
||||
"Parent state missing {parent_block_state_root:?}"
|
||||
)))?;
|
||||
|
||||
let state_roots = vec![
|
||||
Ok((parent_state_root, diet_executed_block.parent_block.slot())),
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
use crate::data_availability_checker::{AvailableBlock, AvailableBlockData};
|
||||
use crate::block_verification_types::{MaybeAvailableBlock, RpcBlock};
|
||||
use crate::data_availability_checker::{
|
||||
AvailabilityCheckError, AvailableBlock, AvailableBlockData,
|
||||
};
|
||||
use crate::{metrics, BeaconChain, BeaconChainTypes};
|
||||
use itertools::Itertools;
|
||||
use state_processing::{
|
||||
@@ -12,7 +15,7 @@ use store::metadata::DataColumnInfo;
|
||||
use store::{AnchorInfo, BlobInfo, DBColumn, Error as StoreError, KeyValueStore, KeyValueStoreOp};
|
||||
use strum::IntoStaticStr;
|
||||
use tracing::debug;
|
||||
use types::{FixedBytesExtended, Hash256, Slot};
|
||||
use types::{ColumnIndex, FixedBytesExtended, Hash256, Slot};
|
||||
|
||||
/// Use a longer timeout on the pubkey cache.
|
||||
///
|
||||
@@ -23,19 +26,27 @@ const PUBKEY_CACHE_LOCK_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
pub enum HistoricalBlockError {
|
||||
/// Block root mismatch, caller should retry with different blocks.
|
||||
MismatchedBlockRoot {
|
||||
block_slot: Slot,
|
||||
block_root: Hash256,
|
||||
expected_block_root: Hash256,
|
||||
oldest_block_parent: Hash256,
|
||||
},
|
||||
/// Bad signature, caller should retry with different blocks.
|
||||
SignatureSet(SignatureSetError),
|
||||
/// Bad signature, caller should retry with different blocks.
|
||||
InvalidSignature,
|
||||
InvalidSignature(String),
|
||||
/// One or more signatures in a BlobSidecar of an RpcBlock are invalid
|
||||
InvalidBlobsSignature(Vec<u64>),
|
||||
/// One or more signatures in a DataColumnSidecar of an RpcBlock are invalid
|
||||
InvalidDataColumnsSignature(Vec<ColumnIndex>),
|
||||
/// Unexpected error
|
||||
Unexpected(String),
|
||||
/// Transitory error, caller should retry with the same blocks.
|
||||
ValidatorPubkeyCacheTimeout,
|
||||
/// Logic error: should never occur.
|
||||
IndexOutOfBounds,
|
||||
/// Internal store error
|
||||
StoreError(StoreError),
|
||||
/// Faulty and internal AvailabilityCheckError
|
||||
AvailabilityCheckError(AvailabilityCheckError),
|
||||
}
|
||||
|
||||
impl From<StoreError> for HistoricalBlockError {
|
||||
@@ -44,7 +55,100 @@ impl From<StoreError> for HistoricalBlockError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SignatureSetError> for HistoricalBlockError {
|
||||
fn from(err: SignatureSetError) -> Self {
|
||||
match err {
|
||||
// The encoding of the signature is invalid, peer fault
|
||||
e
|
||||
@ (SignatureSetError::SignatureInvalid(_) | SignatureSetError::BadBlsBytes { .. }) => {
|
||||
Self::InvalidSignature(format!("{e:?}"))
|
||||
}
|
||||
// All these variants are internal errors or unreachable for historical block paths,
|
||||
// which only check the proposer signature.
|
||||
// BadBlsBytes = Unreachable
|
||||
e @ (SignatureSetError::BeaconStateError(_)
|
||||
| SignatureSetError::ValidatorUnknown(_)
|
||||
| SignatureSetError::ValidatorPubkeyUnknown(_)
|
||||
| SignatureSetError::IncorrectBlockProposer { .. }
|
||||
| SignatureSetError::PublicKeyDecompressionFailed
|
||||
| SignatureSetError::InconsistentBlockFork(_)) => Self::Unexpected(format!("{e:?}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AvailabilityCheckError> for HistoricalBlockError {
|
||||
fn from(e: AvailabilityCheckError) -> Self {
|
||||
Self::AvailabilityCheckError(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
pub fn assert_correct_historical_block_chain(
|
||||
&self,
|
||||
blocks: &[RpcBlock<T::EthSpec>],
|
||||
) -> Result<(), HistoricalBlockError> {
|
||||
let anchor_info = self.store.get_anchor_info();
|
||||
let mut expected_block_root = anchor_info.oldest_block_parent;
|
||||
|
||||
for block in blocks.iter().rev() {
|
||||
if block.as_block().slot() >= anchor_info.oldest_block_slot {
|
||||
continue;
|
||||
}
|
||||
|
||||
if block.block_root() != expected_block_root {
|
||||
return Err(HistoricalBlockError::MismatchedBlockRoot {
|
||||
block_slot: block.as_block().slot(),
|
||||
block_root: block.block_root(),
|
||||
expected_block_root,
|
||||
oldest_block_parent: anchor_info.oldest_block_parent,
|
||||
});
|
||||
}
|
||||
|
||||
expected_block_root = block.as_block().message().parent_root();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn verify_and_import_historical_block_batch(
|
||||
&self,
|
||||
blocks: Vec<RpcBlock<T::EthSpec>>,
|
||||
) -> Result<usize, HistoricalBlockError> {
|
||||
let anchor_info = self.store.get_anchor_info();
|
||||
|
||||
// Take all blocks with slots less than the oldest block slot.
|
||||
let blocks_to_import = blocks
|
||||
.into_iter()
|
||||
.filter(|block| block.as_block().slot() < anchor_info.oldest_block_slot)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// First check that chain of blocks is correct
|
||||
self.assert_correct_historical_block_chain(&blocks_to_import)?;
|
||||
|
||||
// Check that all data columns are present <- faulty failure if missing because we have
|
||||
// checked the block root is correct first.
|
||||
let available_blocks_to_import = self
|
||||
.data_availability_checker
|
||||
.verify_kzg_for_rpc_blocks(blocks_to_import)
|
||||
.and_then(|blocks| {
|
||||
blocks
|
||||
.into_iter()
|
||||
// RpcBlocks must always be Available, otherwise a data peer is faulty of
|
||||
// malicious. `verify_kzg_for_rpc_blocks` returns errors for those cases, but we
|
||||
// haven't updated its function signature. This code block can be deleted later
|
||||
// bigger refactor.
|
||||
.map(|maybe_available| match maybe_available {
|
||||
MaybeAvailableBlock::Available(block) => Ok(block),
|
||||
MaybeAvailableBlock::AvailabilityPending { .. } => Err(
|
||||
AvailabilityCheckError::Unexpected("block not available".to_string()),
|
||||
),
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
})?;
|
||||
|
||||
self.import_historical_block_batch(available_blocks_to_import)
|
||||
}
|
||||
|
||||
/// Store a batch of historical blocks in the database.
|
||||
///
|
||||
/// The `blocks` should be given in slot-ascending order. One of the blocks should have a block
|
||||
@@ -103,16 +207,9 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
let mut hot_batch = Vec::with_capacity(blocks_to_import.len());
|
||||
let mut signed_blocks = Vec::with_capacity(blocks_to_import.len());
|
||||
|
||||
for available_block in blocks_to_import.into_iter().rev() {
|
||||
for available_block in blocks_to_import.iter().cloned().rev() {
|
||||
let (block_root, block, block_data) = available_block.deconstruct();
|
||||
|
||||
if block_root != expected_block_root {
|
||||
return Err(HistoricalBlockError::MismatchedBlockRoot {
|
||||
block_root,
|
||||
expected_block_root,
|
||||
});
|
||||
}
|
||||
|
||||
if !self.store.get_config().prune_payloads {
|
||||
// If prune-payloads is set to false, store the block which includes the execution payload
|
||||
self.store
|
||||
@@ -213,18 +310,32 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map_err(HistoricalBlockError::SignatureSet)
|
||||
.map(ParallelSignatureSets::from)?;
|
||||
drop(pubkey_cache);
|
||||
drop(setup_timer);
|
||||
|
||||
let verify_timer = metrics::start_timer(&metrics::BACKFILL_SIGNATURE_VERIFY_TIMES);
|
||||
if !signature_set.verify() {
|
||||
return Err(HistoricalBlockError::InvalidSignature);
|
||||
return Err(HistoricalBlockError::InvalidSignature("invalid".to_owned()));
|
||||
}
|
||||
drop(verify_timer);
|
||||
drop(sig_timer);
|
||||
|
||||
// Check that the proposer signature in the blobs and data columns is the same as the
|
||||
// correct signature in the block.
|
||||
blocks_to_import
|
||||
.iter()
|
||||
.map(|block| {
|
||||
if let Err(indices) = block.match_block_and_blobs() {
|
||||
return Err(HistoricalBlockError::InvalidBlobsSignature(indices));
|
||||
}
|
||||
if let Err(indices) = block.match_block_and_data_columns() {
|
||||
return Err(HistoricalBlockError::InvalidDataColumnsSignature(indices));
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
// Write the I/O batches to disk, writing the blocks themselves first, as it's better
|
||||
// for the hot DB to contain extra blocks than for the cold DB to point to blocks that
|
||||
// do not exist.
|
||||
|
||||
@@ -2392,7 +2392,8 @@ where
|
||||
.take(sampling_column_count)
|
||||
.map(CustodyDataColumn::from_asserted_custody)
|
||||
.collect::<Vec<_>>();
|
||||
RpcBlock::new_with_custody_columns(Some(block_root), block, columns, &self.spec)?
|
||||
RpcBlock::new_with_custody_columns(Some(block_root), block, columns, &self.spec)
|
||||
.map_err(BlockError::InternalError)?
|
||||
} else {
|
||||
RpcBlock::new_without_blobs(Some(block_root), block)
|
||||
}
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
use beacon_chain::attestation_verification::Error as AttnError;
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::builder::BeaconChainBuilder;
|
||||
use beacon_chain::data_availability_checker::AvailableBlock;
|
||||
use beacon_chain::data_availability_checker::{AvailableBlock, AvailableBlockData};
|
||||
use beacon_chain::data_column_verification::CustodyDataColumn;
|
||||
use beacon_chain::schema_change::migrate_schema;
|
||||
use beacon_chain::test_utils::SyncCommitteeStrategy;
|
||||
use beacon_chain::test_utils::{
|
||||
@@ -11,9 +12,11 @@ use beacon_chain::test_utils::{
|
||||
BlockStrategy, DiskHarnessType,
|
||||
};
|
||||
use beacon_chain::{
|
||||
data_availability_checker::MaybeAvailableBlock, historical_blocks::HistoricalBlockError,
|
||||
migrate::MigratorConfig, BeaconChain, BeaconChainError, BeaconChainTypes, BeaconSnapshot,
|
||||
BlockError, ChainConfig, NotifyExecutionLayer, ServerSentEventHandler, WhenSlotSkipped,
|
||||
data_availability_checker::{AvailabilityCheckError, MaybeAvailableBlock},
|
||||
historical_blocks::HistoricalBlockError,
|
||||
migrate::MigratorConfig,
|
||||
BeaconChain, BeaconChainError, BeaconChainTypes, BeaconSnapshot, BlockError, ChainConfig,
|
||||
NotifyExecutionLayer, ServerSentEventHandler, WhenSlotSkipped,
|
||||
};
|
||||
use logging::create_test_tracing_subscriber;
|
||||
use maplit::hashset;
|
||||
@@ -33,6 +36,7 @@ use store::{
|
||||
BlobInfo, DBColumn, HotColdDB, StoreConfig,
|
||||
};
|
||||
use tempfile::{tempdir, TempDir};
|
||||
use tracing::info;
|
||||
use types::test_utils::{SeedableRng, XorShiftRng};
|
||||
use types::*;
|
||||
|
||||
@@ -2339,6 +2343,7 @@ async fn weak_subjectivity_sync_test(slots: Vec<Slot>, checkpoint_slot: Slot) {
|
||||
let store = get_store(&temp2);
|
||||
let spec = test_spec::<E>();
|
||||
let seconds_per_slot = spec.seconds_per_slot;
|
||||
let wss_fork = harness.spec.fork_name_at_slot::<E>(checkpoint_slot);
|
||||
|
||||
let kzg = get_kzg(&spec);
|
||||
|
||||
@@ -2499,12 +2504,153 @@ async fn weak_subjectivity_sync_test(slots: Vec<Slot>, checkpoint_slot: Slot) {
|
||||
};
|
||||
|
||||
// Importing the invalid batch should error.
|
||||
assert!(matches!(
|
||||
beacon_chain
|
||||
.import_historical_block_batch(batch_with_invalid_first_block)
|
||||
.unwrap_err(),
|
||||
HistoricalBlockError::InvalidSignature
|
||||
));
|
||||
let err = beacon_chain
|
||||
.import_historical_block_batch(batch_with_invalid_first_block)
|
||||
.unwrap_err();
|
||||
match err {
|
||||
HistoricalBlockError::InvalidSignature(_) => {} // ok
|
||||
e => panic!("Unexpected error {e:?}"),
|
||||
}
|
||||
|
||||
if wss_fork.deneb_enabled() {
|
||||
// Currently ExecutionBlockGenerator::build_new_execution_payload doesn't accept a parameter
|
||||
// to generate a fixed number of blob TXs, so it's random. Given the large number of blocks
|
||||
// in this batch it's very unlikely that no block has data, but it's probable that's it's
|
||||
// not index 0, so we need to find the first block with data.
|
||||
let first_block_with_data = available_blocks
|
||||
.iter()
|
||||
.position(|block| block.block().num_expected_blobs() > 0)
|
||||
.expect("No blocks have data, try different RNG");
|
||||
|
||||
// Test 1: Invalidate sidecar header signature
|
||||
|
||||
let mut batch_with_invalid_header = available_blocks.to_vec();
|
||||
batch_with_invalid_header[first_block_with_data] = {
|
||||
let (block_root, block, block_data) = batch_with_invalid_header[first_block_with_data]
|
||||
.clone()
|
||||
.deconstruct();
|
||||
if wss_fork.fulu_enabled() {
|
||||
info!(block_slot = %block.slot(), ?block_root, "Corrupting data column header signature");
|
||||
let AvailableBlockData::DataColumns(mut data_columns) = block_data else {
|
||||
panic!("no columns")
|
||||
};
|
||||
assert!(
|
||||
!data_columns.is_empty(),
|
||||
"data column sidecars shouldn't be empty"
|
||||
);
|
||||
let mut data_column = (*data_columns[0]).clone();
|
||||
data_column.signed_block_header.signature = Signature::empty();
|
||||
data_columns[0] = data_column.into();
|
||||
AvailableBlock::__new_for_testing(
|
||||
block_root,
|
||||
block,
|
||||
AvailableBlockData::DataColumns(data_columns),
|
||||
beacon_chain.spec.clone(),
|
||||
)
|
||||
} else {
|
||||
info!(block_slot = %block.slot(), ?block_root, "Corrupting blob header signature");
|
||||
let AvailableBlockData::Blobs(mut blobs) = block_data else {
|
||||
let blocks_have_blobs = available_blocks
|
||||
.into_iter()
|
||||
.map(|block| (block.block().slot(), block.has_blobs()))
|
||||
.collect::<Vec<_>>();
|
||||
panic!(
|
||||
"no blobs at block {:?} {}. blocks_have_blobs {:?}",
|
||||
block_root,
|
||||
block.slot(),
|
||||
blocks_have_blobs
|
||||
);
|
||||
};
|
||||
assert!(!blobs.is_empty(), "blob sidecars shouldn't be empty");
|
||||
let mut blob = (*blobs[0]).clone();
|
||||
blob.signed_block_header.signature = Signature::empty();
|
||||
blobs[0] = blob.into();
|
||||
AvailableBlock::__new_for_testing(
|
||||
block_root,
|
||||
block,
|
||||
AvailableBlockData::Blobs(blobs),
|
||||
beacon_chain.spec.clone(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
// Importing the invalid batch should error.
|
||||
let err = beacon_chain
|
||||
.import_historical_block_batch(batch_with_invalid_header)
|
||||
.unwrap_err();
|
||||
if wss_fork.fulu_enabled() {
|
||||
match err {
|
||||
HistoricalBlockError::InvalidDataColumnsSignature(_) => {} // ok
|
||||
e => panic!("Unexpected error {e:?}"),
|
||||
}
|
||||
} else {
|
||||
match err {
|
||||
HistoricalBlockError::InvalidBlobsSignature(_) => {} // ok
|
||||
e => panic!("Unexpected error {e:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
// Test 2: invalidate KZG proof
|
||||
|
||||
let mut batch_with_invalid_kzg = available_blocks
|
||||
.iter()
|
||||
.map(|block| available_to_rpc_block(block.clone(), &harness.spec))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
batch_with_invalid_kzg[first_block_with_data] = {
|
||||
let (block_root, block, blobs, cols) = batch_with_invalid_kzg[first_block_with_data]
|
||||
.clone()
|
||||
.deconstruct();
|
||||
if wss_fork.fulu_enabled() {
|
||||
info!(block_slot = %block.slot(), ?block_root, "Corrupting data column KZG proof");
|
||||
let mut data_columns = cols.unwrap();
|
||||
assert!(
|
||||
!data_columns.is_empty(),
|
||||
"data column sidecars shouldn't be empty"
|
||||
);
|
||||
let mut data_column = (*(data_columns[0]).clone_arc()).clone();
|
||||
if data_column.kzg_proofs[0] == KzgProof::empty() {
|
||||
panic!("kzg_proof is already G1_POINT_AT_INFINITY")
|
||||
}
|
||||
data_column.kzg_proofs[0] = KzgProof::empty();
|
||||
data_columns[0] = CustodyDataColumn::from_asserted_custody(data_column.into());
|
||||
RpcBlock::new_with_custody_columns(
|
||||
Some(block_root),
|
||||
block,
|
||||
data_columns.to_vec(),
|
||||
&harness.spec,
|
||||
)
|
||||
.unwrap()
|
||||
} else {
|
||||
info!(block_slot = %block.slot(), ?block_root, "Corrupting blob KZG proof");
|
||||
let mut blobs = blobs.unwrap();
|
||||
assert!(!blobs.is_empty(), "blob sidecars shouldn't be empty");
|
||||
let mut blob = (*blobs[0]).clone();
|
||||
blob.kzg_proof = KzgProof::empty();
|
||||
blobs[0] = blob.into();
|
||||
RpcBlock::new(Some(block_root), block, Some(blobs)).unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
let err = beacon_chain
|
||||
.verify_and_import_historical_block_batch(batch_with_invalid_kzg)
|
||||
.unwrap_err();
|
||||
if wss_fork.fulu_enabled() {
|
||||
match err {
|
||||
HistoricalBlockError::AvailabilityCheckError(
|
||||
AvailabilityCheckError::InvalidColumn(_),
|
||||
) => {} // ok
|
||||
e => panic!("Unexpected error {e:?}"),
|
||||
}
|
||||
} else {
|
||||
match err {
|
||||
HistoricalBlockError::AvailabilityCheckError(
|
||||
AvailabilityCheckError::InvalidBlobs(_),
|
||||
) => {} // ok
|
||||
e => panic!("Unexpected error {e:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Importing the batch with valid signatures should succeed.
|
||||
let available_blocks_dup = available_blocks.iter().map(clone_block).collect::<Vec<_>>();
|
||||
@@ -3654,5 +3800,26 @@ fn get_blocks(
|
||||
}
|
||||
|
||||
fn clone_block<E: EthSpec>(block: &AvailableBlock<E>) -> AvailableBlock<E> {
|
||||
block.__clone_without_recv().unwrap()
|
||||
block.clone()
|
||||
}
|
||||
|
||||
fn available_to_rpc_block<E: EthSpec>(block: AvailableBlock<E>, spec: &ChainSpec) -> RpcBlock<E> {
|
||||
let (block_root, block, block_data) = block.deconstruct();
|
||||
|
||||
match block_data {
|
||||
AvailableBlockData::NoData => RpcBlock::new(Some(block_root), block, None).unwrap(),
|
||||
AvailableBlockData::Blobs(blobs) => {
|
||||
RpcBlock::new(Some(block_root), block, Some(blobs)).unwrap()
|
||||
}
|
||||
AvailableBlockData::DataColumns(data_columns) => RpcBlock::new_with_custody_columns(
|
||||
Some(block_root),
|
||||
block,
|
||||
data_columns
|
||||
.into_iter()
|
||||
.map(|d| CustodyDataColumn::from_asserted_custody(d))
|
||||
.collect(),
|
||||
spec,
|
||||
)
|
||||
.unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ const GOSSIPSUB_POSITIVE_SCORE_WEIGHT: f64 = GOSSIPSUB_NEGATIVE_SCORE_WEIGHT;
|
||||
/// Each variant has an associated score change.
|
||||
// To easily assess the behaviour of scores changes the number of variants should stay low, and
|
||||
// somewhat generic.
|
||||
#[derive(Debug, Clone, Copy, AsRefStr)]
|
||||
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, AsRefStr)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub enum PeerAction {
|
||||
/// We should not communicate more with this peer.
|
||||
|
||||
@@ -59,6 +59,14 @@ pub struct BlobsByRangeRequestId {
|
||||
pub struct DataColumnsByRangeRequestId {
|
||||
/// Id to identify this attempt at a data_columns_by_range request for `parent_request_id`
|
||||
pub id: Id,
|
||||
/// The Id of the parent custody by range request that issued this data_columns_by_range request
|
||||
pub parent_request_id: CustodyByRangeRequestId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct CustodyByRangeRequestId {
|
||||
/// Id to identify this attempt at a meta custody by range request for `parent_request_id`
|
||||
pub id: Id,
|
||||
/// The Id of the overall By Range request for block components.
|
||||
pub parent_request_id: ComponentsByRangeRequestId,
|
||||
}
|
||||
@@ -221,6 +229,7 @@ macro_rules! impl_display {
|
||||
impl_display!(BlocksByRangeRequestId, "{}/{}", id, parent_request_id);
|
||||
impl_display!(BlobsByRangeRequestId, "{}/{}", id, parent_request_id);
|
||||
impl_display!(DataColumnsByRangeRequestId, "{}/{}", id, parent_request_id);
|
||||
impl_display!(CustodyByRangeRequestId, "{}/{}", id, parent_request_id);
|
||||
impl_display!(ComponentsByRangeRequestId, "{}/{}", id, requester);
|
||||
impl_display!(DataColumnsByRootRequestId, "{}/{}", id, requester);
|
||||
impl_display!(SingleLookupReqId, "{}/Lookup/{}", req_id, lookup_id);
|
||||
@@ -299,14 +308,17 @@ mod tests {
|
||||
fn display_id_data_columns_by_range() {
|
||||
let id = DataColumnsByRangeRequestId {
|
||||
id: 123,
|
||||
parent_request_id: ComponentsByRangeRequestId {
|
||||
parent_request_id: CustodyByRangeRequestId {
|
||||
id: 122,
|
||||
requester: RangeRequestId::RangeSync {
|
||||
chain_id: 54,
|
||||
batch_id: Epoch::new(0),
|
||||
parent_request_id: ComponentsByRangeRequestId {
|
||||
id: 121,
|
||||
requester: RangeRequestId::RangeSync {
|
||||
chain_id: 54,
|
||||
batch_id: Epoch::new(0),
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
assert_eq!(format!("{id}"), "123/122/RangeSync/0/54");
|
||||
assert_eq!(format!("{id}"), "123/122/121/RangeSync/0/54");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -248,6 +248,10 @@ impl<E: EthSpec> NetworkGlobals<E> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sampling_columns_count(&self) -> usize {
|
||||
self.sampling_columns.read().len()
|
||||
}
|
||||
|
||||
pub fn sampling_columns(&self) -> HashSet<ColumnIndex> {
|
||||
self.sampling_columns.read().clone()
|
||||
}
|
||||
@@ -271,6 +275,25 @@ impl<E: EthSpec> NetworkGlobals<E> {
|
||||
Self::new_test_globals_with_metadata(trusted_peers, metadata, config, spec)
|
||||
}
|
||||
|
||||
pub fn new_test_globals_as_supernode(
|
||||
trusted_peers: Vec<PeerId>,
|
||||
config: Arc<NetworkConfig>,
|
||||
spec: Arc<ChainSpec>,
|
||||
is_supernode: bool,
|
||||
) -> NetworkGlobals<E> {
|
||||
let metadata = MetaData::V3(MetaDataV3 {
|
||||
seq_number: 0,
|
||||
attnets: Default::default(),
|
||||
syncnets: Default::default(),
|
||||
custody_group_count: if is_supernode {
|
||||
spec.number_of_custody_groups
|
||||
} else {
|
||||
spec.custody_requirement
|
||||
},
|
||||
});
|
||||
Self::new_test_globals_with_metadata(trusted_peers, metadata, config, spec)
|
||||
}
|
||||
|
||||
pub(crate) fn new_test_globals_with_metadata(
|
||||
trusted_peers: Vec<PeerId>,
|
||||
metadata: MetaData<E>,
|
||||
|
||||
@@ -1436,7 +1436,12 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
return None;
|
||||
}
|
||||
// BlobNotRequired is unreachable. Only constructed in `process_gossip_blob`
|
||||
Err(e @ BlockError::InternalError(_)) | Err(e @ BlockError::BlobNotRequired(_)) => {
|
||||
// InvalidBlobsSignature is unreachable. Only constructed in `process_chain_segment`
|
||||
// InvalidDataColumnsSignature is unreachable. Only constructed in `process_chain_segment`
|
||||
Err(e @ BlockError::InternalError(_))
|
||||
| Err(e @ BlockError::BlobNotRequired(_))
|
||||
| Err(e @ BlockError::InvalidBlobsSignature(_))
|
||||
| Err(e @ BlockError::InvalidDataColumnsSignature(_)) => {
|
||||
error!(error = %e, "Internal block gossip validation error");
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ use tokio::sync::mpsc::{self, error::TrySendError};
|
||||
use tracing::{debug, error, trace, warn, Instrument};
|
||||
use types::*;
|
||||
|
||||
pub use sync_methods::ChainSegmentProcessId;
|
||||
pub use sync_methods::{ChainSegmentProcessId, PeerGroupAction};
|
||||
use types::blob_sidecar::FixedBlobSidecarList;
|
||||
|
||||
pub type Error<T> = TrySendError<BeaconWorkEvent<T>>;
|
||||
@@ -554,7 +554,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
pub fn send_rpc_validate_data_columns(
|
||||
self: &Arc<Self>,
|
||||
block_root: Hash256,
|
||||
data_columns: Vec<Arc<DataColumnSidecar<T::EthSpec>>>,
|
||||
data_columns: DataColumnSidecarList<T::EthSpec>,
|
||||
seen_timestamp: Duration,
|
||||
id: SamplingId,
|
||||
) -> Result<(), Error<T::EthSpec>> {
|
||||
|
||||
@@ -7,7 +7,6 @@ use crate::sync::{
|
||||
};
|
||||
use beacon_chain::block_verification_types::{AsBlock, RpcBlock};
|
||||
use beacon_chain::data_availability_checker::AvailabilityCheckError;
|
||||
use beacon_chain::data_availability_checker::MaybeAvailableBlock;
|
||||
use beacon_chain::data_column_verification::verify_kzg_for_data_column_list;
|
||||
use beacon_chain::{
|
||||
validator_monitor::get_slot_delay_ms, AvailabilityProcessingStatus, BeaconChainTypes,
|
||||
@@ -18,6 +17,7 @@ use beacon_processor::{
|
||||
AsyncFn, BlockingFn, DuplicateCache,
|
||||
};
|
||||
use lighthouse_network::PeerAction;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use store::KzgCommitment;
|
||||
@@ -25,7 +25,9 @@ use tokio::sync::mpsc;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use types::beacon_block_body::format_kzg_commitments;
|
||||
use types::blob_sidecar::FixedBlobSidecarList;
|
||||
use types::{BlockImportSource, DataColumnSidecar, DataColumnSidecarList, Epoch, Hash256};
|
||||
use types::{
|
||||
BlockImportSource, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch, Hash256,
|
||||
};
|
||||
|
||||
/// Id associated to a batch processing request, either a sync batch or a parent lookup.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
@@ -37,11 +39,65 @@ pub enum ChainSegmentProcessId {
|
||||
}
|
||||
|
||||
/// Returned when a chain segment import fails.
|
||||
struct ChainSegmentFailed {
|
||||
#[derive(Debug)]
|
||||
pub struct ChainSegmentFailed {
|
||||
/// To be displayed in logs.
|
||||
message: String,
|
||||
pub message: String,
|
||||
/// Used to penalize peers.
|
||||
peer_action: Option<PeerAction>,
|
||||
pub peer_action: Option<PeerGroupAction>,
|
||||
}
|
||||
|
||||
/// Tracks which block(s) component caused the block to be invalid. Used to attribute fault in sync.
|
||||
#[derive(Debug)]
|
||||
pub struct PeerGroupAction {
|
||||
pub block_peer: Option<PeerAction>,
|
||||
pub column_peer: HashMap<ColumnIndex, PeerAction>,
|
||||
}
|
||||
|
||||
impl PeerGroupAction {
|
||||
fn block_peer(action: PeerAction) -> Self {
|
||||
Self {
|
||||
block_peer: Some(action),
|
||||
column_peer: <_>::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn column_peers(columns: &[ColumnIndex], action: PeerAction) -> Self {
|
||||
Self {
|
||||
block_peer: None,
|
||||
column_peer: HashMap::from_iter(columns.iter().map(|index| (*index, action))),
|
||||
}
|
||||
}
|
||||
|
||||
fn from_availability_check_error(e: &AvailabilityCheckError) -> Option<Self> {
|
||||
match e {
|
||||
AvailabilityCheckError::InvalidBlobs(_) => {
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
AvailabilityCheckError::InvalidColumn(errors) => Some(PeerGroupAction::column_peers(
|
||||
&errors.iter().map(|(index, _)| *index).collect::<Vec<_>>(),
|
||||
PeerAction::LowToleranceError,
|
||||
)),
|
||||
AvailabilityCheckError::KzgCommitmentMismatch { .. } => None, // should never happen after checking inclusion proof
|
||||
AvailabilityCheckError::Unexpected(_) => None, // internal
|
||||
AvailabilityCheckError::MissingBlobs => {
|
||||
Some(PeerGroupAction::block_peer(PeerAction::HighToleranceError))
|
||||
}
|
||||
// TOOD(das): PeerAction::High may be too soft of a penalty. Also may be deprecated
|
||||
// with https://github.com/sigp/lighthouse/issues/6258
|
||||
AvailabilityCheckError::MissingCustodyColumns(columns) => Some(
|
||||
PeerGroupAction::column_peers(columns, PeerAction::HighToleranceError),
|
||||
),
|
||||
AvailabilityCheckError::BlobIndexInvalid(_) => {
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
AvailabilityCheckError::DataColumnIndexInvalid(_) => None, // unreachable
|
||||
AvailabilityCheckError::StoreError(_) => None, // unreachable
|
||||
AvailabilityCheckError::BlockReplayError(_) => None, // internal error
|
||||
AvailabilityCheckError::RebuildingStateCaches(_) => None, // internal error
|
||||
AvailabilityCheckError::SlotClockError => None, // internal error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
@@ -480,7 +536,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
match e.peer_action {
|
||||
Some(penalty) => BatchProcessResult::FaultyFailure {
|
||||
imported_blocks,
|
||||
penalty,
|
||||
peer_action: penalty,
|
||||
error: e.message,
|
||||
},
|
||||
None => BatchProcessResult::NonFaultyFailure,
|
||||
}
|
||||
@@ -502,7 +559,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
.sum::<usize>();
|
||||
|
||||
match self.process_backfill_blocks(downloaded_blocks) {
|
||||
(imported_blocks, Ok(_)) => {
|
||||
Ok(imported_blocks) => {
|
||||
debug!(
|
||||
batch_epoch = %epoch,
|
||||
first_block_slot = start_slot,
|
||||
@@ -518,7 +575,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
imported_blocks,
|
||||
}
|
||||
}
|
||||
(_, Err(e)) => {
|
||||
Err(e) => {
|
||||
debug!(
|
||||
batch_epoch = %epoch,
|
||||
first_block_slot = start_slot,
|
||||
@@ -529,9 +586,10 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
"Backfill batch processing failed"
|
||||
);
|
||||
match e.peer_action {
|
||||
Some(penalty) => BatchProcessResult::FaultyFailure {
|
||||
Some(peer_action) => BatchProcessResult::FaultyFailure {
|
||||
imported_blocks: 0,
|
||||
penalty,
|
||||
peer_action,
|
||||
error: e.message,
|
||||
},
|
||||
None => BatchProcessResult::NonFaultyFailure,
|
||||
}
|
||||
@@ -589,148 +647,77 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
fn process_backfill_blocks(
|
||||
&self,
|
||||
downloaded_blocks: Vec<RpcBlock<T::EthSpec>>,
|
||||
) -> (usize, Result<(), ChainSegmentFailed>) {
|
||||
let total_blocks = downloaded_blocks.len();
|
||||
let available_blocks = match self
|
||||
) -> Result<usize, ChainSegmentFailed> {
|
||||
match self
|
||||
.chain
|
||||
.data_availability_checker
|
||||
.verify_kzg_for_rpc_blocks(downloaded_blocks)
|
||||
.verify_and_import_historical_block_batch(downloaded_blocks)
|
||||
{
|
||||
Ok(blocks) => blocks
|
||||
.into_iter()
|
||||
.filter_map(|maybe_available| match maybe_available {
|
||||
MaybeAvailableBlock::Available(block) => Some(block),
|
||||
MaybeAvailableBlock::AvailabilityPending { .. } => None,
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
Err(e) => match e {
|
||||
AvailabilityCheckError::StoreError(_) => {
|
||||
return (
|
||||
0,
|
||||
Err(ChainSegmentFailed {
|
||||
peer_action: None,
|
||||
message: "Failed to check block availability".into(),
|
||||
}),
|
||||
);
|
||||
}
|
||||
e => {
|
||||
return (
|
||||
0,
|
||||
Err(ChainSegmentFailed {
|
||||
peer_action: Some(PeerAction::LowToleranceError),
|
||||
message: format!("Failed to check block availability : {:?}", e),
|
||||
}),
|
||||
)
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
if available_blocks.len() != total_blocks {
|
||||
return (
|
||||
0,
|
||||
Err(ChainSegmentFailed {
|
||||
peer_action: Some(PeerAction::LowToleranceError),
|
||||
message: format!(
|
||||
"{} out of {} blocks were unavailable",
|
||||
(total_blocks - available_blocks.len()),
|
||||
total_blocks
|
||||
),
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
match self.chain.import_historical_block_batch(available_blocks) {
|
||||
Ok(imported_blocks) => {
|
||||
metrics::inc_counter(
|
||||
&metrics::BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_SUCCESS_TOTAL,
|
||||
);
|
||||
(imported_blocks, Ok(()))
|
||||
Ok(imported_blocks)
|
||||
}
|
||||
Err(e) => {
|
||||
metrics::inc_counter(
|
||||
&metrics::BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_FAILED_TOTAL,
|
||||
);
|
||||
let peer_action = match &e {
|
||||
HistoricalBlockError::MismatchedBlockRoot {
|
||||
block_root,
|
||||
expected_block_root,
|
||||
} => {
|
||||
debug!(
|
||||
error = "mismatched_block_root",
|
||||
?block_root,
|
||||
expected_root = ?expected_block_root,
|
||||
"Backfill batch processing error"
|
||||
);
|
||||
// The peer is faulty if they send blocks with bad roots.
|
||||
Some(PeerAction::LowToleranceError)
|
||||
HistoricalBlockError::AvailabilityCheckError(e) => {
|
||||
PeerGroupAction::from_availability_check_error(e)
|
||||
}
|
||||
HistoricalBlockError::InvalidSignature
|
||||
| HistoricalBlockError::SignatureSet(_) => {
|
||||
warn!(
|
||||
error = ?e,
|
||||
"Backfill batch processing error"
|
||||
);
|
||||
// The peer is faulty if they bad signatures.
|
||||
Some(PeerAction::LowToleranceError)
|
||||
// The peer is faulty if they send blocks with bad roots or invalid signatures
|
||||
HistoricalBlockError::MismatchedBlockRoot { .. }
|
||||
| HistoricalBlockError::InvalidSignature(_) => {
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
HistoricalBlockError::ValidatorPubkeyCacheTimeout => {
|
||||
warn!(
|
||||
error = "pubkey_cache_timeout",
|
||||
"Backfill batch processing error"
|
||||
);
|
||||
// Blobs are served by the block_peer
|
||||
HistoricalBlockError::InvalidBlobsSignature(_) => {
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
HistoricalBlockError::InvalidDataColumnsSignature(indices) => Some(
|
||||
PeerGroupAction::column_peers(indices, PeerAction::LowToleranceError),
|
||||
),
|
||||
HistoricalBlockError::ValidatorPubkeyCacheTimeout
|
||||
| HistoricalBlockError::IndexOutOfBounds
|
||||
| HistoricalBlockError::StoreError(_)
|
||||
| HistoricalBlockError::Unexpected(_) => {
|
||||
// This is an internal error, do not penalize the peer.
|
||||
None
|
||||
}
|
||||
HistoricalBlockError::IndexOutOfBounds => {
|
||||
error!(
|
||||
error = ?e,
|
||||
"Backfill batch OOB error"
|
||||
);
|
||||
// This should never occur, don't penalize the peer.
|
||||
None
|
||||
}
|
||||
HistoricalBlockError::StoreError(e) => {
|
||||
warn!(error = ?e, "Backfill batch processing error");
|
||||
// This is an internal error, don't penalize the peer.
|
||||
None
|
||||
} //
|
||||
// Do not use a fallback match, handle all errors explicitly
|
||||
} // Do not use a fallback match, handle all errors explicitly
|
||||
};
|
||||
let err_str: &'static str = e.into();
|
||||
(
|
||||
0,
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!("{:?}", err_str),
|
||||
// This is an internal error, don't penalize the peer.
|
||||
peer_action,
|
||||
}),
|
||||
)
|
||||
|
||||
if peer_action.is_some() {
|
||||
// All errors that result in a peer penalty are "expected" external faults the
|
||||
// node runner can't do anything about
|
||||
debug!(?e, "Backfill sync processing error");
|
||||
} else {
|
||||
// All others are some type of internal error worth surfacing?
|
||||
warn!(?e, "Unexpected backfill sync processing error");
|
||||
}
|
||||
|
||||
Err(ChainSegmentFailed {
|
||||
// Render the full error in debug for full details
|
||||
message: format!("{:?}", e),
|
||||
peer_action,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to handle a `BlockError` from `process_chain_segment`
|
||||
fn handle_failed_chain_segment(&self, error: BlockError) -> Result<(), ChainSegmentFailed> {
|
||||
match error {
|
||||
BlockError::ParentUnknown { parent_root, .. } => {
|
||||
let peer_action = match &error {
|
||||
BlockError::ParentUnknown { .. } => {
|
||||
// blocks should be sequential and all parents should exist
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!("Block has an unknown parent: {}", parent_root),
|
||||
// Peers are faulty if they send non-sequential blocks.
|
||||
peer_action: Some(PeerAction::LowToleranceError),
|
||||
})
|
||||
}
|
||||
BlockError::DuplicateFullyImported(_)
|
||||
| BlockError::DuplicateImportStatusUnknown(..) => {
|
||||
// This can happen for many reasons. Head sync's can download multiples and parent
|
||||
// lookups can download blocks before range sync
|
||||
Ok(())
|
||||
// Peers are faulty if they send non-sequential blocks.
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
BlockError::FutureSlot {
|
||||
present_slot,
|
||||
block_slot,
|
||||
} => {
|
||||
if present_slot + FUTURE_SLOT_TOLERANCE >= block_slot {
|
||||
if *present_slot + FUTURE_SLOT_TOLERANCE >= *block_slot {
|
||||
// The block is too far in the future, drop it.
|
||||
warn!(
|
||||
msg = "block for future slot rejected, check your time",
|
||||
@@ -739,121 +726,90 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
FUTURE_SLOT_TOLERANCE,
|
||||
"Block is ahead of our slot clock"
|
||||
);
|
||||
} else {
|
||||
// The block is in the future, but not too far.
|
||||
debug!(
|
||||
%present_slot,
|
||||
%block_slot,
|
||||
FUTURE_SLOT_TOLERANCE,
|
||||
"Block is slightly ahead of our slot clock. Ignoring."
|
||||
);
|
||||
}
|
||||
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!(
|
||||
"Block with slot {} is higher than the current slot {}",
|
||||
block_slot, present_slot
|
||||
),
|
||||
// Peers are faulty if they send blocks from the future.
|
||||
peer_action: Some(PeerAction::LowToleranceError),
|
||||
})
|
||||
// Peers are faulty if they send blocks from the future.
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
BlockError::WouldRevertFinalizedSlot { .. } => {
|
||||
debug!("Finalized or earlier block processed");
|
||||
Ok(())
|
||||
// Block is invalid
|
||||
BlockError::StateRootMismatch { .. }
|
||||
| BlockError::BlockSlotLimitReached
|
||||
| BlockError::IncorrectBlockProposer { .. }
|
||||
| BlockError::UnknownValidator { .. }
|
||||
| BlockError::BlockIsNotLaterThanParent { .. }
|
||||
| BlockError::NonLinearParentRoots
|
||||
| BlockError::NonLinearSlots
|
||||
| BlockError::PerBlockProcessingError(_)
|
||||
| BlockError::InconsistentFork(_)
|
||||
| BlockError::InvalidSignature(_) => {
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
BlockError::NotFinalizedDescendant { block_parent_root } => {
|
||||
debug!(
|
||||
"Not syncing to a chain that conflicts with the canonical or manual finalized checkpoint"
|
||||
);
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!(
|
||||
"Block with parent_root {} conflicts with our checkpoint state",
|
||||
block_parent_root
|
||||
),
|
||||
peer_action: Some(PeerAction::Fatal),
|
||||
})
|
||||
// Currently blobs are served by the block peer
|
||||
BlockError::InvalidBlobsSignature(_) => {
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
BlockError::GenesisBlock => {
|
||||
debug!("Genesis block was processed");
|
||||
Ok(())
|
||||
BlockError::InvalidDataColumnsSignature(indices) => Some(
|
||||
PeerGroupAction::column_peers(indices, PeerAction::LowToleranceError),
|
||||
),
|
||||
BlockError::GenesisBlock
|
||||
| BlockError::WouldRevertFinalizedSlot { .. }
|
||||
| BlockError::DuplicateFullyImported(_)
|
||||
| BlockError::DuplicateImportStatusUnknown(..) => {
|
||||
// This can happen for many reasons. Head sync's can download multiples and parent
|
||||
// lookups can download blocks before range sync
|
||||
return Ok(());
|
||||
}
|
||||
BlockError::BeaconChainError(e) => {
|
||||
warn!(
|
||||
msg = "unexpected condition in processing block.",
|
||||
outcome = ?e,
|
||||
"BlockProcessingFailure"
|
||||
);
|
||||
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!("Internal error whilst processing block: {:?}", e),
|
||||
// Do not penalize peers for internal errors.
|
||||
peer_action: None,
|
||||
})
|
||||
// Not syncing to a chain that conflicts with the canonical or manual finalized checkpoint
|
||||
BlockError::NotFinalizedDescendant { .. } | BlockError::WeakSubjectivityConflict => {
|
||||
Some(PeerGroupAction::block_peer(PeerAction::Fatal))
|
||||
}
|
||||
ref err @ BlockError::ExecutionPayloadError(ref epe) => {
|
||||
if !epe.penalize_peer() {
|
||||
BlockError::AvailabilityCheck(e) => PeerGroupAction::from_availability_check_error(e),
|
||||
BlockError::ExecutionPayloadError(e) => {
|
||||
if !e.penalize_peer() {
|
||||
// These errors indicate an issue with the EL and not the `ChainSegment`.
|
||||
// Pause the syncing while the EL recovers
|
||||
debug!(
|
||||
outcome = "pausing sync",
|
||||
?err,
|
||||
"Execution layer verification failed"
|
||||
);
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!("Execution layer offline. Reason: {:?}", err),
|
||||
// Do not penalize peers for internal errors.
|
||||
peer_action: None,
|
||||
})
|
||||
None
|
||||
} else {
|
||||
debug!(
|
||||
error = ?err,
|
||||
"Invalid execution payload"
|
||||
);
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!(
|
||||
"Peer sent a block containing invalid execution payload. Reason: {:?}",
|
||||
err
|
||||
),
|
||||
peer_action: Some(PeerAction::LowToleranceError),
|
||||
})
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
}
|
||||
ref err @ BlockError::ParentExecutionPayloadInvalid { ref parent_root } => {
|
||||
// We need to penalise harshly in case this represents an actual attack. In case
|
||||
// of a faulty EL it will usually require manual intervention to fix anyway, so
|
||||
// it's not too bad if we drop most of our peers.
|
||||
BlockError::ParentExecutionPayloadInvalid { parent_root } => {
|
||||
warn!(
|
||||
?parent_root,
|
||||
advice = "check execution node for corruption then restart it and Lighthouse",
|
||||
"Failed to sync chain built on invalid parent"
|
||||
);
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!("Peer sent invalid block. Reason: {err:?}"),
|
||||
// We need to penalise harshly in case this represents an actual attack. In case
|
||||
// of a faulty EL it will usually require manual intervention to fix anyway, so
|
||||
// it's not too bad if we drop most of our peers.
|
||||
peer_action: Some(PeerAction::LowToleranceError),
|
||||
})
|
||||
Some(PeerGroupAction::block_peer(PeerAction::LowToleranceError))
|
||||
}
|
||||
// Penalise peers for sending us banned blocks.
|
||||
BlockError::KnownInvalidExecutionPayload(block_root) => {
|
||||
warn!(?block_root, "Received block known to be invalid",);
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!("Banned block: {block_root:?}"),
|
||||
peer_action: Some(PeerAction::Fatal),
|
||||
})
|
||||
warn!(?block_root, "Received block known to be invalid");
|
||||
Some(PeerGroupAction::block_peer(PeerAction::Fatal))
|
||||
}
|
||||
other => {
|
||||
debug!(
|
||||
msg = "peer sent invalid block",
|
||||
outcome = %other,
|
||||
"Invalid block received"
|
||||
);
|
||||
BlockError::Slashable => {
|
||||
Some(PeerGroupAction::block_peer(PeerAction::MidToleranceError))
|
||||
}
|
||||
// Do not penalize peers for internal errors.
|
||||
// BlobNotRequired is never constructed on this path
|
||||
// TODO(sync): Double check that all `BeaconChainError` variants are actually internal
|
||||
// errors in thie code path
|
||||
BlockError::BeaconChainError(_)
|
||||
| BlockError::InternalError(_)
|
||||
| BlockError::BlobNotRequired(_) => None,
|
||||
// Do not use a fallback match, handle all errors explicitly
|
||||
};
|
||||
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!("Peer sent invalid block. Reason: {:?}", other),
|
||||
// Do not penalize peers for internal errors.
|
||||
peer_action: None,
|
||||
})
|
||||
}
|
||||
if peer_action.is_some() {
|
||||
debug!(?error, "Range sync processing error");
|
||||
} else {
|
||||
warn!(?error, "Unexpected range sync processing error");
|
||||
}
|
||||
|
||||
Err(ChainSegmentFailed {
|
||||
message: format!("{error:?}"),
|
||||
peer_action,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,10 +18,12 @@ use crate::sync::range_sync::{
|
||||
};
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::{BeaconChain, BeaconChainTypes};
|
||||
use itertools::Itertools;
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
use lighthouse_network::types::{BackFillState, NetworkGlobals};
|
||||
use lighthouse_network::{PeerAction, PeerId};
|
||||
use logging::crit;
|
||||
use parking_lot::RwLock;
|
||||
use std::collections::{
|
||||
btree_map::{BTreeMap, Entry},
|
||||
HashSet,
|
||||
@@ -30,6 +32,8 @@ use std::sync::Arc;
|
||||
use tracing::{debug, error, info, instrument, warn};
|
||||
use types::{Epoch, EthSpec};
|
||||
|
||||
use super::range_sync::BatchPeers;
|
||||
|
||||
/// Blocks are downloaded in batches from peers. This constant specifies how many epochs worth of
|
||||
/// blocks per batch are requested _at most_. A batch may request less blocks to account for
|
||||
/// already requested slots. There is a timeout for each batch request. If this value is too high,
|
||||
@@ -128,16 +132,12 @@ pub struct BackFillSync<T: BeaconChainTypes> {
|
||||
/// Batches validated by this chain.
|
||||
validated_batches: u64,
|
||||
|
||||
/// We keep track of peers that are participating in the backfill sync. Unlike RangeSync,
|
||||
/// BackFillSync uses all synced peers to download the chain from. If BackFillSync fails, we don't
|
||||
/// want to penalize all our synced peers, so we use this variable to keep track of peers that
|
||||
/// have participated and only penalize these peers if backfill sync fails.
|
||||
participating_peers: HashSet<PeerId>,
|
||||
|
||||
/// When a backfill sync fails, we keep track of whether a new fully synced peer has joined.
|
||||
/// This signifies that we are able to attempt to restart a failed chain.
|
||||
restart_failed_sync: bool,
|
||||
|
||||
peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||
|
||||
/// Reference to the beacon chain to obtain initial starting points for the backfill sync.
|
||||
beacon_chain: Arc<BeaconChain<T>>,
|
||||
|
||||
@@ -181,8 +181,8 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
network_globals,
|
||||
current_processing_batch: None,
|
||||
validated_batches: 0,
|
||||
participating_peers: HashSet::new(),
|
||||
restart_failed_sync: false,
|
||||
peers: <_>::default(),
|
||||
beacon_chain,
|
||||
};
|
||||
|
||||
@@ -222,14 +222,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
match self.state() {
|
||||
BackFillState::Syncing => {} // already syncing ignore.
|
||||
BackFillState::Paused => {
|
||||
if self
|
||||
.network_globals
|
||||
.peers
|
||||
.read()
|
||||
.synced_peers()
|
||||
.next()
|
||||
.is_some()
|
||||
{
|
||||
if !self.peers.read().is_empty() {
|
||||
// If there are peers to resume with, begin the resume.
|
||||
debug!(start_epoch = ?self.current_start, awaiting_batches = self.batches.len(), processing_target = ?self.processing_target, "Resuming backfill sync");
|
||||
self.set_state(BackFillState::Syncing);
|
||||
@@ -302,23 +295,20 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A peer has disconnected.
|
||||
/// If the peer has active batches, those are considered failed and re-requested.
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(service = "backfill_sync"),
|
||||
name = "backfill_sync",
|
||||
skip_all
|
||||
)]
|
||||
#[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"]
|
||||
pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Result<(), BackFillError> {
|
||||
if matches!(self.state(), BackFillState::Failed) {
|
||||
return Ok(());
|
||||
}
|
||||
pub fn add_peer(&mut self, peer_id: PeerId) {
|
||||
self.peers.write().insert(peer_id);
|
||||
}
|
||||
|
||||
// Remove the peer from the participation list
|
||||
self.participating_peers.remove(peer_id);
|
||||
Ok(())
|
||||
pub fn peer_disconnected(&mut self, peer_id: &PeerId) {
|
||||
self.peers.write().remove(peer_id);
|
||||
|
||||
if self.peers.read().is_empty() {
|
||||
info!(
|
||||
"reason" = "insufficient_synced_peers",
|
||||
"Backfill sync paused"
|
||||
);
|
||||
self.set_state(BackFillState::Paused);
|
||||
}
|
||||
}
|
||||
|
||||
/// An RPC error has occurred.
|
||||
@@ -335,7 +325,6 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
batch_id: BatchId,
|
||||
peer_id: &PeerId,
|
||||
request_id: Id,
|
||||
err: RpcResponseError,
|
||||
) -> Result<(), BackFillError> {
|
||||
@@ -349,11 +338,16 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
return Ok(());
|
||||
}
|
||||
debug!(batch_epoch = %batch_id, error = ?err, "Batch download failed");
|
||||
match batch.download_failed(Some(*peer_id)) {
|
||||
match batch.download_failed() {
|
||||
Err(e) => self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0)),
|
||||
Ok(BatchOperationOutcome::Failed { blacklist: _ }) => {
|
||||
self.fail_sync(BackFillError::BatchDownloadFailed(batch_id))
|
||||
}
|
||||
Ok(BatchOperationOutcome::Failed { blacklist: _ }) => self.fail_sync(match err {
|
||||
RpcResponseError::RpcError(_)
|
||||
| RpcResponseError::VerifyError(_)
|
||||
| RpcResponseError::InternalError(_) => {
|
||||
BackFillError::BatchDownloadFailed(batch_id)
|
||||
}
|
||||
RpcResponseError::RequestExpired(_) => BackFillError::Paused,
|
||||
}),
|
||||
Ok(BatchOperationOutcome::Continue) => self.send_batch(network, batch_id),
|
||||
}
|
||||
} else {
|
||||
@@ -378,7 +372,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
batch_id: BatchId,
|
||||
peer_id: &PeerId,
|
||||
batch_peers: BatchPeers,
|
||||
request_id: Id,
|
||||
blocks: Vec<RpcBlock<T::EthSpec>>,
|
||||
) -> Result<ProcessResult, BackFillError> {
|
||||
@@ -399,7 +393,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
return Ok(ProcessResult::Successful);
|
||||
}
|
||||
|
||||
match batch.download_completed(blocks, *peer_id) {
|
||||
match batch.download_completed(blocks, batch_peers) {
|
||||
Ok(received) => {
|
||||
let awaiting_batches =
|
||||
self.processing_target.saturating_sub(batch_id) / BACKFILL_EPOCHS_PER_BATCH;
|
||||
@@ -440,7 +434,6 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
self.set_state(BackFillState::Failed);
|
||||
// Remove all batches and active requests and participating peers.
|
||||
self.batches.clear();
|
||||
self.participating_peers.clear();
|
||||
self.restart_failed_sync = false;
|
||||
|
||||
// Reset all downloading and processing targets
|
||||
@@ -573,7 +566,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
};
|
||||
|
||||
let Some(peer) = batch.processing_peer() else {
|
||||
let Some(batch_peers) = batch.processing_peers() else {
|
||||
self.fail_sync(BackFillError::BatchInvalidState(
|
||||
batch_id,
|
||||
String::from("Peer does not exist"),
|
||||
@@ -585,8 +578,6 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
?result,
|
||||
%batch,
|
||||
batch_epoch = %batch_id,
|
||||
%peer,
|
||||
client = %network.client_type(peer),
|
||||
"Backfill batch processed"
|
||||
);
|
||||
|
||||
@@ -628,31 +619,57 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
}
|
||||
BatchProcessResult::FaultyFailure {
|
||||
imported_blocks,
|
||||
penalty,
|
||||
peer_action,
|
||||
error,
|
||||
} => {
|
||||
match batch.processing_completed(BatchProcessingResult::FaultyFailure) {
|
||||
// TODO(sync): De-dup between back and forwards sync
|
||||
let mut failed_peers = vec![];
|
||||
|
||||
if let Some(penalty) = peer_action.block_peer {
|
||||
// Penalize the peer appropiately.
|
||||
network.report_peer(batch_peers.block(), penalty, "faulty_batch");
|
||||
failed_peers.push(batch_peers.block());
|
||||
}
|
||||
|
||||
// Penalize each peer only once. Currently a peer_action does not mix different
|
||||
// PeerAction levels.
|
||||
for (peer, penalty) in peer_action
|
||||
.column_peer
|
||||
.iter()
|
||||
.filter_map(|(column_index, penalty)| {
|
||||
batch_peers
|
||||
.column(column_index)
|
||||
.map(|peer| (*peer, *penalty))
|
||||
})
|
||||
.unique()
|
||||
{
|
||||
network.report_peer(peer, penalty, "faulty_batch_column");
|
||||
failed_peers.push(peer);
|
||||
}
|
||||
|
||||
match batch.processing_completed(BatchProcessingResult::FaultyFailure(failed_peers))
|
||||
{
|
||||
Err(e) => {
|
||||
// Batch was in the wrong state
|
||||
self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0))
|
||||
.map(|_| ProcessResult::Successful)
|
||||
}
|
||||
Ok(BatchOperationOutcome::Failed { blacklist: _ }) => {
|
||||
// check that we have not exceeded the re-process retry counter
|
||||
// If a batch has exceeded the invalid batch lookup attempts limit, it means
|
||||
// that it is likely all peers are sending invalid batches
|
||||
// repeatedly and are either malicious or faulty. We stop the backfill sync and
|
||||
// report all synced peers that have participated.
|
||||
Ok(BatchOperationOutcome::Failed { .. }) => {
|
||||
// When backfill syncing post-PeerDAS we can't attribute fault to previous
|
||||
// peers if a batch fails to process too many times. We have strict peer
|
||||
// scoring for faulty errors, so participating peers that sent invalid
|
||||
// data are already downscored.
|
||||
//
|
||||
// Because backfill sync deals with historical data that we can assert
|
||||
// to be correct, once we import a batch that contains at least one
|
||||
// block we are sure we got the right data. There's no need to penalize
|
||||
// all participating peers in backfill sync if a batch fails
|
||||
warn!(
|
||||
score_adjustment = %penalty,
|
||||
batch_epoch = %batch_id,
|
||||
"Backfill batch failed to download. Penalizing peers"
|
||||
error,
|
||||
"Backfill sync failed after attempting to process batch too many times"
|
||||
);
|
||||
|
||||
for peer in self.participating_peers.drain() {
|
||||
// TODO(das): `participating_peers` only includes block peers. Should we
|
||||
// penalize the custody column peers too?
|
||||
network.report_peer(peer, *penalty, "backfill_batch_failed");
|
||||
}
|
||||
self.fail_sync(BackFillError::BatchProcessingFailed(batch_id))
|
||||
.map(|_| ProcessResult::Successful)
|
||||
}
|
||||
@@ -781,37 +798,38 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
// The validated batch has been re-processed
|
||||
if attempt.hash != processed_attempt.hash {
|
||||
// The re-downloaded version was different.
|
||||
if processed_attempt.peer_id != attempt.peer_id {
|
||||
// TODO(das): should penalize other peers?
|
||||
let valid_attempt_peer = processed_attempt.block_peer();
|
||||
let bad_attempt_peer = attempt.block_peer();
|
||||
if valid_attempt_peer != bad_attempt_peer {
|
||||
// A different peer sent the correct batch, the previous peer did not
|
||||
// We negatively score the original peer.
|
||||
let action = PeerAction::LowToleranceError;
|
||||
debug!(
|
||||
batch_epoch = ?id,
|
||||
score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id,
|
||||
new_peer = %processed_attempt.peer_id,
|
||||
batch_epoch = %id, score_adjustment = %action,
|
||||
original_peer = %bad_attempt_peer, new_peer = %valid_attempt_peer,
|
||||
"Re-processed batch validated. Scoring original peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
bad_attempt_peer,
|
||||
action,
|
||||
"backfill_reprocessed_original_peer",
|
||||
"batch_reprocessed_original_peer",
|
||||
);
|
||||
} else {
|
||||
// The same peer corrected it's previous mistake. There was an error, so we
|
||||
// negative score the original peer.
|
||||
let action = PeerAction::MidToleranceError;
|
||||
debug!(
|
||||
batch_epoch = ?id,
|
||||
batch_epoch = %id,
|
||||
score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id,
|
||||
new_peer = %processed_attempt.peer_id,
|
||||
original_peer = %bad_attempt_peer,
|
||||
new_peer = %valid_attempt_peer,
|
||||
"Re-processed batch validated by the same peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
bad_attempt_peer,
|
||||
action,
|
||||
"backfill_reprocessed_same_peer",
|
||||
"batch_reprocessed_same_peer",
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -918,22 +936,13 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
batch_id: BatchId,
|
||||
) -> Result<(), BackFillError> {
|
||||
if let Some(batch) = self.batches.get_mut(&batch_id) {
|
||||
let synced_peers = self
|
||||
.network_globals
|
||||
.peers
|
||||
.read()
|
||||
.synced_peers()
|
||||
.cloned()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let (request, is_blob_batch) = batch.to_blocks_by_range_request();
|
||||
let request = batch.to_blocks_by_range_request();
|
||||
let failed_peers = batch.failed_peers();
|
||||
match network.block_components_by_range_request(
|
||||
is_blob_batch,
|
||||
request,
|
||||
RangeRequestId::BackfillSync { batch_id },
|
||||
&synced_peers,
|
||||
&failed_peers,
|
||||
self.peers.clone(),
|
||||
failed_peers,
|
||||
) {
|
||||
Ok(request_id) => {
|
||||
// inform the batch about the new request
|
||||
@@ -945,10 +954,10 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => match e {
|
||||
RpcRequestSendError::NoPeer(no_peer) => {
|
||||
RpcRequestSendError::NoPeers => {
|
||||
// If we are here the chain has no more synced peers
|
||||
info!(
|
||||
"reason" = format!("insufficient_synced_peers({no_peer:?})"),
|
||||
"reason" = "insufficient_synced_peers",
|
||||
"Backfill sync paused"
|
||||
);
|
||||
self.set_state(BackFillState::Paused);
|
||||
@@ -962,7 +971,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
return self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0));
|
||||
}
|
||||
|
||||
match batch.download_failed(None) {
|
||||
match batch.download_failed() {
|
||||
Err(e) => {
|
||||
self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0))?
|
||||
}
|
||||
@@ -1089,12 +1098,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
||||
self.include_next_batch(network)
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
let batch_type = network.batch_type(batch_id);
|
||||
entry.insert(BatchInfo::new(
|
||||
&batch_id,
|
||||
BACKFILL_EPOCHS_PER_BATCH,
|
||||
batch_type,
|
||||
));
|
||||
entry.insert(BatchInfo::new(&batch_id, BACKFILL_EPOCHS_PER_BATCH));
|
||||
if self.would_complete(batch_id) {
|
||||
self.last_batch_downloaded = true;
|
||||
}
|
||||
|
||||
@@ -503,7 +503,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
let Some(lookup) = self.single_block_lookups.get_mut(&id.lookup_id) else {
|
||||
// We don't have the ability to cancel in-flight RPC requests. So this can happen
|
||||
// if we started this RPC request, and later saw the block/blobs via gossip.
|
||||
debug!(?id, "Block returned for single block lookup not present");
|
||||
debug!(%id, "Block returned for single block lookup not present");
|
||||
return Err(LookupRequestError::UnknownLookup);
|
||||
};
|
||||
|
||||
@@ -516,7 +516,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
Ok((response, peer_group, seen_timestamp)) => {
|
||||
debug!(
|
||||
?block_root,
|
||||
?id,
|
||||
%id,
|
||||
?peer_group,
|
||||
?response_type,
|
||||
"Received lookup download success"
|
||||
@@ -549,7 +549,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
// the peer and the request ID which is linked to this `id` value here.
|
||||
debug!(
|
||||
?block_root,
|
||||
?id,
|
||||
%id,
|
||||
?response_type,
|
||||
error = ?e,
|
||||
"Received lookup download failure"
|
||||
@@ -733,7 +733,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
// Collect all peers that sent a column that was invalid. Must
|
||||
// run .unique as a single peer can send multiple invalid
|
||||
// columns. Penalize once to avoid insta-bans
|
||||
.flat_map(|(index, _)| peer_group.of_index((*index) as usize))
|
||||
.flat_map(|(index, _)| peer_group.of_index(&(*index as usize)))
|
||||
.unique()
|
||||
.collect(),
|
||||
_ => peer_group.all().collect(),
|
||||
|
||||
@@ -1,15 +1,23 @@
|
||||
use beacon_chain::{
|
||||
block_verification_types::RpcBlock, data_column_verification::CustodyDataColumn, get_block_root,
|
||||
};
|
||||
use lighthouse_network::service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, DataColumnsByRangeRequestId,
|
||||
use lighthouse_network::{
|
||||
service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, DataColumnsByRangeRequestId,
|
||||
},
|
||||
PeerId,
|
||||
};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
sync::Arc,
|
||||
};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use types::{
|
||||
BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec,
|
||||
Hash256, RuntimeVariableList, SignedBeaconBlock,
|
||||
Hash256, RuntimeVariableList, SignedBeaconBlock, Slot,
|
||||
};
|
||||
|
||||
use super::range_sync::BatchPeers;
|
||||
|
||||
pub struct RangeBlockComponentsRequest<E: EthSpec> {
|
||||
/// Blocks we have received awaiting for their corresponding sidecar.
|
||||
blocks_request: ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||
@@ -19,18 +27,21 @@ pub struct RangeBlockComponentsRequest<E: EthSpec> {
|
||||
|
||||
enum ByRangeRequest<I: PartialEq + std::fmt::Display, T> {
|
||||
Active(I),
|
||||
Complete(T),
|
||||
Complete(T, PeerId),
|
||||
}
|
||||
|
||||
enum RangeBlockDataRequest<E: EthSpec> {
|
||||
/// All pre-deneb blocks
|
||||
NoData,
|
||||
/// All post-Deneb blocks, regardless of if they have data or not
|
||||
Blobs(ByRangeRequest<BlobsByRangeRequestId, Vec<Arc<BlobSidecar<E>>>>),
|
||||
/// All post-Fulu blocks, regardless of if they have data or not
|
||||
DataColumns {
|
||||
requests: HashMap<
|
||||
DataColumnsByRangeRequestId,
|
||||
ByRangeRequest<DataColumnsByRangeRequestId, DataColumnSidecarList<E>>,
|
||||
>,
|
||||
expected_custody_columns: Vec<ColumnIndex>,
|
||||
expected_column_to_peer: HashMap<ColumnIndex, PeerId>,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -38,17 +49,20 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
pub fn new(
|
||||
blocks_req_id: BlocksByRangeRequestId,
|
||||
blobs_req_id: Option<BlobsByRangeRequestId>,
|
||||
data_columns: Option<(Vec<DataColumnsByRangeRequestId>, Vec<ColumnIndex>)>,
|
||||
data_columns: Option<(
|
||||
Vec<DataColumnsByRangeRequestId>,
|
||||
HashMap<ColumnIndex, PeerId>,
|
||||
)>,
|
||||
) -> Self {
|
||||
let block_data_request = if let Some(blobs_req_id) = blobs_req_id {
|
||||
RangeBlockDataRequest::Blobs(ByRangeRequest::Active(blobs_req_id))
|
||||
} else if let Some((requests, expected_custody_columns)) = data_columns {
|
||||
} else if let Some((requests, expected_column_to_peer)) = data_columns {
|
||||
RangeBlockDataRequest::DataColumns {
|
||||
requests: requests
|
||||
.into_iter()
|
||||
.map(|id| (id, ByRangeRequest::Active(id)))
|
||||
.collect(),
|
||||
expected_custody_columns,
|
||||
expected_column_to_peer,
|
||||
}
|
||||
} else {
|
||||
RangeBlockDataRequest::NoData
|
||||
@@ -64,18 +78,20 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
&mut self,
|
||||
req_id: BlocksByRangeRequestId,
|
||||
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||
peer_id: PeerId,
|
||||
) -> Result<(), String> {
|
||||
self.blocks_request.finish(req_id, blocks)
|
||||
self.blocks_request.finish(req_id, blocks, peer_id)
|
||||
}
|
||||
|
||||
pub fn add_blobs(
|
||||
&mut self,
|
||||
req_id: BlobsByRangeRequestId,
|
||||
blobs: Vec<Arc<BlobSidecar<E>>>,
|
||||
peer_id: PeerId,
|
||||
) -> Result<(), String> {
|
||||
match &mut self.block_data_request {
|
||||
RangeBlockDataRequest::NoData => Err("received blobs but expected no data".to_owned()),
|
||||
RangeBlockDataRequest::Blobs(ref mut req) => req.finish(req_id, blobs),
|
||||
RangeBlockDataRequest::Blobs(ref mut req) => req.finish(req_id, blobs, peer_id),
|
||||
RangeBlockDataRequest::DataColumns { .. } => {
|
||||
Err("received blobs but expected data columns".to_owned())
|
||||
}
|
||||
@@ -86,6 +102,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
&mut self,
|
||||
req_id: DataColumnsByRangeRequestId,
|
||||
columns: Vec<Arc<DataColumnSidecar<E>>>,
|
||||
peer_id: PeerId,
|
||||
) -> Result<(), String> {
|
||||
match &mut self.block_data_request {
|
||||
RangeBlockDataRequest::NoData => {
|
||||
@@ -100,48 +117,60 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
let req = requests
|
||||
.get_mut(&req_id)
|
||||
.ok_or(format!("unknown data columns by range req_id {req_id}"))?;
|
||||
req.finish(req_id, columns)
|
||||
req.finish(req_id, columns, peer_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn responses(&self, spec: &ChainSpec) -> Option<Result<Vec<RpcBlock<E>>, String>> {
|
||||
let Some(blocks) = self.blocks_request.to_finished() else {
|
||||
/// If all internal requests are complete returns a Vec of coupled RpcBlocks
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn responses(
|
||||
&self,
|
||||
spec: &ChainSpec,
|
||||
) -> Option<Result<(Vec<RpcBlock<E>>, BatchPeers), String>> {
|
||||
let Some((blocks, &block_peer)) = self.blocks_request.to_finished() else {
|
||||
return None;
|
||||
};
|
||||
|
||||
match &self.block_data_request {
|
||||
RangeBlockDataRequest::NoData => {
|
||||
Some(Self::responses_with_blobs(blocks.to_vec(), vec![], spec))
|
||||
}
|
||||
RangeBlockDataRequest::NoData => Some(
|
||||
Self::responses_with_blobs(blocks.to_vec(), vec![], spec)
|
||||
.map(|blocks| (blocks, BatchPeers::new_from_block_peer(block_peer))),
|
||||
),
|
||||
RangeBlockDataRequest::Blobs(request) => {
|
||||
let Some(blobs) = request.to_finished() else {
|
||||
let Some((blobs, _blob_peer)) = request.to_finished() else {
|
||||
return None;
|
||||
};
|
||||
Some(Self::responses_with_blobs(
|
||||
blocks.to_vec(),
|
||||
blobs.to_vec(),
|
||||
spec,
|
||||
))
|
||||
Some(
|
||||
Self::responses_with_blobs(blocks.to_vec(), blobs.to_vec(), spec)
|
||||
.map(|blocks| (blocks, BatchPeers::new_from_block_peer(block_peer))),
|
||||
)
|
||||
}
|
||||
RangeBlockDataRequest::DataColumns {
|
||||
requests,
|
||||
expected_custody_columns,
|
||||
expected_column_to_peer,
|
||||
} => {
|
||||
let mut data_columns = vec![];
|
||||
let mut column_peers = HashMap::new();
|
||||
for req in requests.values() {
|
||||
let Some(data) = req.to_finished() else {
|
||||
let Some((resp_columns, column_peer)) = req.to_finished() else {
|
||||
return None;
|
||||
};
|
||||
data_columns.extend(data.clone())
|
||||
data_columns.extend(resp_columns.clone());
|
||||
for column in resp_columns {
|
||||
column_peers.insert(column.index, *column_peer);
|
||||
}
|
||||
}
|
||||
|
||||
Some(Self::responses_with_custody_columns(
|
||||
blocks.to_vec(),
|
||||
data_columns,
|
||||
expected_custody_columns,
|
||||
spec,
|
||||
))
|
||||
Some(
|
||||
Self::responses_with_custody_columns(
|
||||
blocks.to_vec(),
|
||||
data_columns,
|
||||
expected_column_to_peer.clone(),
|
||||
spec,
|
||||
)
|
||||
.map(|blocks| (blocks, BatchPeers::new(block_peer, column_peers))),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -199,100 +228,91 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
fn responses_with_custody_columns(
|
||||
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||
data_columns: DataColumnSidecarList<E>,
|
||||
expects_custody_columns: &[ColumnIndex],
|
||||
expected_custody_columns: HashMap<ColumnIndex, PeerId>,
|
||||
spec: &ChainSpec,
|
||||
) -> Result<Vec<RpcBlock<E>>, String> {
|
||||
// Group data columns by block_root and index
|
||||
let mut data_columns_by_block =
|
||||
HashMap::<Hash256, HashMap<ColumnIndex, Arc<DataColumnSidecar<E>>>>::new();
|
||||
let mut custody_columns_by_block = HashMap::<Hash256, Vec<CustodyDataColumn<E>>>::new();
|
||||
let mut block_roots_by_slot = HashMap::<Slot, HashSet<Hash256>>::new();
|
||||
|
||||
for column in data_columns {
|
||||
let block_root = column.block_root();
|
||||
let index = column.index;
|
||||
if data_columns_by_block
|
||||
.entry(block_root)
|
||||
|
||||
block_roots_by_slot
|
||||
.entry(column.slot())
|
||||
.or_default()
|
||||
.insert(index, column)
|
||||
.is_some()
|
||||
{
|
||||
.insert(block_root);
|
||||
|
||||
// Sanity check before casting to `CustodyDataColumn`. But this should never happen
|
||||
if !expected_custody_columns.contains_key(&index) {
|
||||
return Err(format!(
|
||||
"Repeated column block_root {block_root:?} index {index}"
|
||||
"Received column not in expected custody indices {index}"
|
||||
));
|
||||
}
|
||||
|
||||
custody_columns_by_block
|
||||
.entry(block_root)
|
||||
.or_default()
|
||||
.push(CustodyDataColumn::from_asserted_custody(column));
|
||||
}
|
||||
|
||||
// Now iterate all blocks ensuring that the block roots of each block and data column match,
|
||||
// plus we have columns for our custody requirements
|
||||
let mut rpc_blocks = Vec::with_capacity(blocks.len());
|
||||
let rpc_blocks = blocks
|
||||
.into_iter()
|
||||
.map(|block| {
|
||||
let block_root = get_block_root(&block);
|
||||
block_roots_by_slot
|
||||
.entry(block.slot())
|
||||
.or_default()
|
||||
.insert(block_root);
|
||||
|
||||
for block in blocks {
|
||||
let block_root = get_block_root(&block);
|
||||
rpc_blocks.push(if block.num_expected_blobs() > 0 {
|
||||
let Some(mut data_columns_by_index) = data_columns_by_block.remove(&block_root)
|
||||
else {
|
||||
// This PR ignores the fix from https://github.com/sigp/lighthouse/pull/5675
|
||||
// which allows blobs to not match blocks.
|
||||
// TODO(das): on the initial version of PeerDAS the beacon chain does not check
|
||||
// rpc custody requirements and dropping this check can allow the block to have
|
||||
// an inconsistent DB.
|
||||
return Err(format!("No columns for block {block_root:?} with data"));
|
||||
};
|
||||
|
||||
let mut custody_columns = vec![];
|
||||
for index in expects_custody_columns {
|
||||
let Some(data_column) = data_columns_by_index.remove(index) else {
|
||||
return Err(format!("No column for block {block_root:?} index {index}"));
|
||||
};
|
||||
// Safe to convert to `CustodyDataColumn`: we have asserted that the index of
|
||||
// this column is in the set of `expects_custody_columns` and with the expected
|
||||
// block root, so for the expected epoch of this batch.
|
||||
custody_columns.push(CustodyDataColumn::from_asserted_custody(data_column));
|
||||
}
|
||||
|
||||
// Assert that there are no columns left
|
||||
if !data_columns_by_index.is_empty() {
|
||||
let remaining_indices = data_columns_by_index.keys().collect::<Vec<_>>();
|
||||
return Err(format!(
|
||||
"Not all columns consumed for block {block_root:?}: {remaining_indices:?}"
|
||||
));
|
||||
}
|
||||
let custody_columns = custody_columns_by_block
|
||||
.remove(&block_root)
|
||||
.unwrap_or_default();
|
||||
|
||||
RpcBlock::new_with_custody_columns(Some(block_root), block, custody_columns, spec)
|
||||
.map_err(|e| format!("{e:?}"))?
|
||||
} else {
|
||||
// Block has no data, expects zero columns
|
||||
RpcBlock::new_without_blobs(Some(block_root), block)
|
||||
});
|
||||
}
|
||||
.map_err(|e| format!("{e:?}"))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
// Assert that there are no columns left for other blocks
|
||||
if !data_columns_by_block.is_empty() {
|
||||
let remaining_roots = data_columns_by_block.keys().collect::<Vec<_>>();
|
||||
if !custody_columns_by_block.is_empty() {
|
||||
let remaining_roots = custody_columns_by_block.keys().collect::<Vec<_>>();
|
||||
return Err(format!("Not all columns consumed: {remaining_roots:?}"));
|
||||
}
|
||||
|
||||
for (_slot, block_roots) in block_roots_by_slot {
|
||||
if block_roots.len() > 1 {
|
||||
// TODO: Some peer(s) are faulty or malicious. This batch will fail processing but
|
||||
// we want to send it to the process to better attribute fault. Maybe warn log for
|
||||
// now and track it in a metric?
|
||||
}
|
||||
}
|
||||
|
||||
Ok(rpc_blocks)
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: PartialEq + std::fmt::Display, T> ByRangeRequest<I, T> {
|
||||
fn finish(&mut self, id: I, data: T) -> Result<(), String> {
|
||||
fn finish(&mut self, id: I, data: T, peer_id: PeerId) -> Result<(), String> {
|
||||
match self {
|
||||
Self::Active(expected_id) => {
|
||||
if expected_id != &id {
|
||||
return Err(format!("unexpected req_id expected {expected_id} got {id}"));
|
||||
}
|
||||
*self = Self::Complete(data);
|
||||
*self = Self::Complete(data, peer_id);
|
||||
Ok(())
|
||||
}
|
||||
Self::Complete(_) => Err("request already complete".to_owned()),
|
||||
Self::Complete(_, _) => Err("request already complete".to_owned()),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_finished(&self) -> Option<&T> {
|
||||
fn to_finished(&self) -> Option<(&T, &PeerId)> {
|
||||
match self {
|
||||
Self::Active(_) => None,
|
||||
Self::Complete(data) => Some(data),
|
||||
Self::Complete(data, peer_id) => Some((data, peer_id)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -303,12 +323,15 @@ mod tests {
|
||||
use beacon_chain::test_utils::{
|
||||
generate_rand_block_and_blobs, generate_rand_block_and_data_columns, test_spec, NumBlobs,
|
||||
};
|
||||
use lighthouse_network::service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
||||
DataColumnsByRangeRequestId, Id, RangeRequestId,
|
||||
use lighthouse_network::{
|
||||
service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
||||
DataColumnsByRangeRequestId, Id, RangeRequestId,
|
||||
},
|
||||
PeerId,
|
||||
};
|
||||
use rand::SeedableRng;
|
||||
use std::sync::Arc;
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use types::{test_utils::XorShiftRng, Epoch, ForkName, MinimalEthSpec as E, SignedBeaconBlock};
|
||||
|
||||
fn components_id() -> ComponentsByRangeRequestId {
|
||||
@@ -353,6 +376,7 @@ mod tests {
|
||||
#[test]
|
||||
fn no_blobs_into_responses() {
|
||||
let spec = test_spec::<E>();
|
||||
let peer = PeerId::random();
|
||||
let mut rng = XorShiftRng::from_seed([42; 16]);
|
||||
let blocks = (0..4)
|
||||
.map(|_| {
|
||||
@@ -366,7 +390,7 @@ mod tests {
|
||||
let mut info = RangeBlockComponentsRequest::<E>::new(blocks_req_id, None, None);
|
||||
|
||||
// Send blocks and complete terminate response
|
||||
info.add_blocks(blocks_req_id, blocks).unwrap();
|
||||
info.add_blocks(blocks_req_id, blocks, peer).unwrap();
|
||||
|
||||
// Assert response is finished and RpcBlocks can be constructed
|
||||
info.responses(&test_spec::<E>()).unwrap().unwrap();
|
||||
@@ -375,6 +399,7 @@ mod tests {
|
||||
#[test]
|
||||
fn empty_blobs_into_responses() {
|
||||
let spec = test_spec::<E>();
|
||||
let peer = PeerId::random();
|
||||
let mut rng = XorShiftRng::from_seed([42; 16]);
|
||||
let blocks = (0..4)
|
||||
.map(|_| {
|
||||
@@ -397,9 +422,9 @@ mod tests {
|
||||
RangeBlockComponentsRequest::<E>::new(blocks_req_id, Some(blobs_req_id), None);
|
||||
|
||||
// Send blocks and complete terminate response
|
||||
info.add_blocks(blocks_req_id, blocks).unwrap();
|
||||
info.add_blocks(blocks_req_id, blocks, peer).unwrap();
|
||||
// Expect no blobs returned
|
||||
info.add_blobs(blobs_req_id, vec![]).unwrap();
|
||||
info.add_blobs(blobs_req_id, vec![], peer).unwrap();
|
||||
|
||||
// Assert response is finished and RpcBlocks can be constructed, even if blobs weren't returned.
|
||||
// This makes sure we don't expect blobs here when they have expired. Checking this logic should
|
||||
@@ -410,7 +435,8 @@ mod tests {
|
||||
#[test]
|
||||
fn rpc_block_with_custody_columns() {
|
||||
let spec = test_spec::<E>();
|
||||
let expects_custody_columns = vec![1, 2, 3, 4];
|
||||
let peer = PeerId::random();
|
||||
let expects_custody_columns = [1, 2, 3, 4];
|
||||
let mut rng = XorShiftRng::from_seed([42; 16]);
|
||||
let blocks = (0..4)
|
||||
.map(|_| {
|
||||
@@ -430,15 +456,22 @@ mod tests {
|
||||
.enumerate()
|
||||
.map(|(i, _)| columns_id(i as Id, components_id))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let column_to_peer = expects_custody_columns
|
||||
.iter()
|
||||
.map(|index| (*index, peer))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let mut info = RangeBlockComponentsRequest::<E>::new(
|
||||
blocks_req_id,
|
||||
None,
|
||||
Some((columns_req_id.clone(), expects_custody_columns.clone())),
|
||||
Some((columns_req_id.clone(), column_to_peer)),
|
||||
);
|
||||
// Send blocks and complete terminate response
|
||||
info.add_blocks(
|
||||
blocks_req_id,
|
||||
blocks.iter().map(|b| b.0.clone().into()).collect(),
|
||||
peer,
|
||||
)
|
||||
.unwrap();
|
||||
// Assert response is not finished
|
||||
@@ -452,6 +485,7 @@ mod tests {
|
||||
.iter()
|
||||
.flat_map(|b| b.1.iter().filter(|d| d.index == column_index).cloned())
|
||||
.collect(),
|
||||
peer,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -470,12 +504,13 @@ mod tests {
|
||||
#[test]
|
||||
fn rpc_block_with_custody_columns_batched() {
|
||||
let spec = test_spec::<E>();
|
||||
let peer = PeerId::random();
|
||||
let batched_column_requests = [vec![1_u64, 2], vec![3, 4]];
|
||||
let expects_custody_columns = batched_column_requests
|
||||
.iter()
|
||||
.flatten()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
.map(|index| (*index, peer))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let custody_column_request_ids =
|
||||
(0..batched_column_requests.len() as u32).collect::<Vec<_>>();
|
||||
let num_of_data_column_requests = custody_column_request_ids.len();
|
||||
@@ -510,6 +545,7 @@ mod tests {
|
||||
info.add_blocks(
|
||||
blocks_req_id,
|
||||
blocks.iter().map(|b| b.0.clone().into()).collect(),
|
||||
peer,
|
||||
)
|
||||
.unwrap();
|
||||
// Assert response is not finished
|
||||
@@ -527,6 +563,7 @@ mod tests {
|
||||
.cloned()
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
peer,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -36,18 +36,21 @@
|
||||
use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart};
|
||||
use super::block_lookups::BlockLookups;
|
||||
use super::network_context::{
|
||||
CustodyByRootResult, RangeBlockComponent, RangeRequestId, RpcEvent, SyncNetworkContext,
|
||||
CustodyRequestResult, RangeBlockComponent, RangeRequestId, RpcEvent, SyncNetworkContext,
|
||||
};
|
||||
use super::peer_sampling::{Sampling, SamplingConfig, SamplingResult};
|
||||
use super::peer_sync_info::{remote_sync_type, PeerSyncType};
|
||||
use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
|
||||
use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor};
|
||||
use crate::network_beacon_processor::{
|
||||
ChainSegmentProcessId, NetworkBeaconProcessor, PeerGroupAction,
|
||||
};
|
||||
use crate::service::NetworkMessage;
|
||||
use crate::status::ToStatusMessage;
|
||||
use crate::sync::block_lookups::{
|
||||
BlobRequestState, BlockComponent, BlockRequestState, CustodyRequestState, DownloadResult,
|
||||
};
|
||||
use crate::sync::network_context::PeerGroup;
|
||||
use crate::sync::range_sync::BATCH_BUFFER_SIZE;
|
||||
use beacon_chain::block_verification_types::AsBlock;
|
||||
use beacon_chain::validator_monitor::timestamp_now;
|
||||
use beacon_chain::{
|
||||
@@ -56,13 +59,14 @@ use beacon_chain::{
|
||||
use futures::StreamExt;
|
||||
use lighthouse_network::rpc::RPCError;
|
||||
use lighthouse_network::service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId, CustodyRequester,
|
||||
DataColumnsByRangeRequestId, DataColumnsByRootRequestId, DataColumnsByRootRequester, Id,
|
||||
SamplingId, SamplingRequester, SingleLookupReqId, SyncRequestId,
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
||||
CustodyByRangeRequestId, CustodyRequester, DataColumnsByRangeRequestId,
|
||||
DataColumnsByRootRequestId, DataColumnsByRootRequester, Id, SamplingId, SamplingRequester,
|
||||
SingleLookupReqId, SyncRequestId,
|
||||
};
|
||||
use lighthouse_network::types::{NetworkGlobals, SyncState};
|
||||
use lighthouse_network::PeerId;
|
||||
use lighthouse_network::SyncInfo;
|
||||
use lighthouse_network::{PeerAction, PeerId};
|
||||
use logging::crit;
|
||||
use lru_cache::LRUTimeCache;
|
||||
use std::ops::Sub;
|
||||
@@ -218,7 +222,8 @@ pub enum BatchProcessResult {
|
||||
/// The batch processing failed. It carries whether the processing imported any block.
|
||||
FaultyFailure {
|
||||
imported_blocks: usize,
|
||||
penalty: PeerAction,
|
||||
peer_action: PeerGroupAction,
|
||||
error: String,
|
||||
},
|
||||
NonFaultyFailure,
|
||||
}
|
||||
@@ -276,6 +281,7 @@ pub fn spawn<T: BeaconChainTypes>(
|
||||
sync_recv,
|
||||
SamplingConfig::Default,
|
||||
fork_context,
|
||||
BATCH_BUFFER_SIZE,
|
||||
);
|
||||
|
||||
// spawn the sync manager thread
|
||||
@@ -298,6 +304,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
sync_recv: mpsc::UnboundedReceiver<SyncMessage<T::EthSpec>>,
|
||||
sampling_config: SamplingConfig,
|
||||
fork_context: Arc<ForkContext>,
|
||||
batch_buffer_size: usize,
|
||||
) -> Self {
|
||||
let network_globals = beacon_processor.network_globals.clone();
|
||||
Self {
|
||||
@@ -309,7 +316,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
beacon_chain.clone(),
|
||||
fork_context.clone(),
|
||||
),
|
||||
range_sync: RangeSync::new(beacon_chain.clone()),
|
||||
range_sync: RangeSync::new(beacon_chain.clone(), batch_buffer_size),
|
||||
backfill_sync: BackFillSync::new(beacon_chain.clone(), network_globals),
|
||||
block_lookups: BlockLookups::new(),
|
||||
notified_unknown_roots: LRUTimeCache::new(Duration::from_secs(
|
||||
@@ -333,23 +340,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn get_range_sync_chains(
|
||||
&self,
|
||||
) -> Result<Option<(RangeSyncType, Slot, Slot)>, &'static str> {
|
||||
self.range_sync.state()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn range_sync_state(&self) -> super::range_sync::SyncChainStatus {
|
||||
self.range_sync.state()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn __range_failed_chains(&mut self) -> Vec<Hash256> {
|
||||
self.range_sync.__failed_chains()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn get_failed_chains(&mut self) -> Vec<Hash256> {
|
||||
self.block_lookups.get_failed_chains()
|
||||
@@ -374,6 +364,18 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
self.sampling.get_request_status(block_root, index)
|
||||
}
|
||||
|
||||
// Leak the full network context to prevent having to add many cfg(test) methods here
|
||||
#[cfg(test)]
|
||||
pub(crate) fn network(&mut self) -> &mut SyncNetworkContext<T> {
|
||||
&mut self.network
|
||||
}
|
||||
|
||||
// Leak the full range_sync to prevent having to add many cfg(test) methods here
|
||||
#[cfg(test)]
|
||||
pub(crate) fn range_sync(&mut self) -> &mut RangeSync<T> {
|
||||
&mut self.range_sync
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn update_execution_engine_state(&mut self, state: EngineState) {
|
||||
self.handle_new_execution_engine_state(state);
|
||||
@@ -431,6 +433,13 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match sync_type {
|
||||
PeerSyncType::Behind => {}
|
||||
PeerSyncType::Advanced | PeerSyncType::FullySynced => {
|
||||
self.backfill_sync.add_peer(peer_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.update_sync_state();
|
||||
@@ -439,6 +448,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
for (id, result) in self.network.continue_custody_by_root_requests() {
|
||||
self.on_custody_by_root_result(id, result);
|
||||
}
|
||||
for (id, result) in self.network.continue_custody_by_range_requests() {
|
||||
self.on_custody_by_range_result(id, result);
|
||||
}
|
||||
}
|
||||
|
||||
/// Trigger range sync for a set of peers that claim to have imported a head unknown to us.
|
||||
@@ -528,7 +540,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
|
||||
// Remove peer from all data structures
|
||||
self.range_sync.peer_disconnect(&mut self.network, peer_id);
|
||||
let _ = self.backfill_sync.peer_disconnected(peer_id);
|
||||
self.backfill_sync.peer_disconnected(peer_id);
|
||||
self.block_lookups.peer_disconnected(peer_id);
|
||||
|
||||
// Regardless of the outcome, we update the sync status.
|
||||
@@ -543,6 +555,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
for (id, result) in self.network.continue_custody_by_root_requests() {
|
||||
self.on_custody_by_root_result(id, result);
|
||||
}
|
||||
for (id, result) in self.network.continue_custody_by_range_requests() {
|
||||
self.on_custody_by_range_result(id, result);
|
||||
}
|
||||
}
|
||||
|
||||
/// Updates the syncing state of a peer.
|
||||
@@ -1199,10 +1214,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
block: RpcEvent<Arc<SignedBeaconBlock<T::EthSpec>>>,
|
||||
) {
|
||||
if let Some(resp) = self.network.on_blocks_by_range_response(id, peer_id, block) {
|
||||
self.on_range_components_response(
|
||||
self.on_block_components_by_range_response(
|
||||
id.parent_request_id,
|
||||
peer_id,
|
||||
RangeBlockComponent::Block(id, resp),
|
||||
RangeBlockComponent::Block(id, resp, peer_id),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1214,10 +1228,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
blob: RpcEvent<Arc<BlobSidecar<T::EthSpec>>>,
|
||||
) {
|
||||
if let Some(resp) = self.network.on_blobs_by_range_response(id, peer_id, blob) {
|
||||
self.on_range_components_response(
|
||||
self.on_block_components_by_range_response(
|
||||
id.parent_request_id,
|
||||
peer_id,
|
||||
RangeBlockComponent::Blob(id, resp),
|
||||
RangeBlockComponent::Blob(id, resp, peer_id),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1228,22 +1241,35 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
peer_id: PeerId,
|
||||
data_column: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
|
||||
) {
|
||||
// data_columns_by_range returns either an Ok list of data columns, or an RpcResponseError
|
||||
if let Some(resp) = self
|
||||
.network
|
||||
.on_data_columns_by_range_response(id, peer_id, data_column)
|
||||
{
|
||||
self.on_range_components_response(
|
||||
id.parent_request_id,
|
||||
peer_id,
|
||||
RangeBlockComponent::CustodyColumns(id, resp),
|
||||
);
|
||||
// custody_by_range accumulates the results of multiple data_columns_by_range requests
|
||||
// returning a bigger list of data columns across all the column indices this node has
|
||||
// to custody
|
||||
if let Some(result) = self.network.on_custody_by_range_response(id, peer_id, resp) {
|
||||
self.on_custody_by_range_result(id.parent_request_id, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn on_custody_by_range_result(
|
||||
&mut self,
|
||||
id: CustodyByRangeRequestId,
|
||||
result: CustodyRequestResult<T::EthSpec>,
|
||||
) {
|
||||
self.on_block_components_by_range_response(
|
||||
id.parent_request_id,
|
||||
RangeBlockComponent::CustodyColumns(id, result),
|
||||
);
|
||||
}
|
||||
|
||||
fn on_custody_by_root_result(
|
||||
&mut self,
|
||||
requester: CustodyRequester,
|
||||
response: CustodyByRootResult<T::EthSpec>,
|
||||
response: CustodyRequestResult<T::EthSpec>,
|
||||
) {
|
||||
self.block_lookups
|
||||
.on_download_response::<CustodyRequestState<T::EthSpec>>(
|
||||
@@ -1280,23 +1306,22 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
|
||||
/// Handles receiving a response for a range sync request that should have both blocks and
|
||||
/// blobs.
|
||||
fn on_range_components_response(
|
||||
fn on_block_components_by_range_response(
|
||||
&mut self,
|
||||
range_request_id: ComponentsByRangeRequestId,
|
||||
peer_id: PeerId,
|
||||
range_block_component: RangeBlockComponent<T::EthSpec>,
|
||||
) {
|
||||
if let Some(resp) = self
|
||||
if let Some(result) = self
|
||||
.network
|
||||
.range_block_component_response(range_request_id, range_block_component)
|
||||
.on_block_components_by_range_response(range_request_id, range_block_component)
|
||||
{
|
||||
match resp {
|
||||
Ok(blocks) => {
|
||||
match result {
|
||||
Ok((blocks, batch_peers)) => {
|
||||
match range_request_id.requester {
|
||||
RangeRequestId::RangeSync { chain_id, batch_id } => {
|
||||
self.range_sync.blocks_by_range_response(
|
||||
&mut self.network,
|
||||
peer_id,
|
||||
batch_peers,
|
||||
chain_id,
|
||||
batch_id,
|
||||
range_request_id.id,
|
||||
@@ -1308,7 +1333,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
match self.backfill_sync.on_block_response(
|
||||
&mut self.network,
|
||||
batch_id,
|
||||
&peer_id,
|
||||
batch_peers,
|
||||
range_request_id.id,
|
||||
blocks,
|
||||
) {
|
||||
@@ -1327,7 +1352,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
RangeRequestId::RangeSync { chain_id, batch_id } => {
|
||||
self.range_sync.inject_error(
|
||||
&mut self.network,
|
||||
peer_id,
|
||||
batch_id,
|
||||
chain_id,
|
||||
range_request_id.id,
|
||||
@@ -1339,7 +1363,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
match self.backfill_sync.inject_error(
|
||||
&mut self.network,
|
||||
batch_id,
|
||||
&peer_id,
|
||||
range_request_id.id,
|
||||
e,
|
||||
) {
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
//! Stores the various syncing methods for the beacon chain.
|
||||
mod backfill_sync;
|
||||
mod block_lookups;
|
||||
mod block_sidecar_coupling;
|
||||
pub mod manager;
|
||||
mod network_context;
|
||||
mod peer_sampling;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,534 @@
|
||||
use crate::sync::network_context::{
|
||||
PeerGroup, RpcRequestSendError, RpcResponseError, SyncNetworkContext,
|
||||
};
|
||||
use crate::sync::range_sync::BatchPeers;
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::data_column_verification::CustodyDataColumn;
|
||||
use beacon_chain::{get_block_root, BeaconChainTypes};
|
||||
use lighthouse_network::rpc::methods::{BlobsByRangeRequest, BlocksByRangeRequest};
|
||||
use lighthouse_network::service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
||||
CustodyByRangeRequestId,
|
||||
};
|
||||
use lighthouse_network::PeerId;
|
||||
use parking_lot::RwLock;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use types::{
|
||||
BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecarList, EthSpec, Hash256,
|
||||
RuntimeVariableList, SignedBeaconBlock, Slot,
|
||||
};
|
||||
|
||||
/// Given a `BlocksByRangeRequest` (a range of slots) fetches all necessary data to return
|
||||
/// potentially available RpcBlocks.
|
||||
///
|
||||
/// See [`State`] for the set of `*_by_range` it may issue depending on the fork.
|
||||
pub struct BlockComponentsByRangeRequest<T: BeaconChainTypes> {
|
||||
id: ComponentsByRangeRequestId,
|
||||
peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||
request: BlocksByRangeRequest,
|
||||
state: State<T::EthSpec>,
|
||||
}
|
||||
|
||||
enum State<E: EthSpec> {
|
||||
Base {
|
||||
blocks_by_range_request:
|
||||
ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||
},
|
||||
// Two single concurrent requests for block + blobs. As of now we request blocks and blobs to
|
||||
// the same peer, so we can attribute coupling errors to the same unique peer.
|
||||
DenebEnabled {
|
||||
blocks_by_range_request:
|
||||
ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||
blobs_by_range_request: ByRangeRequest<BlobsByRangeRequestId, Vec<Arc<BlobSidecar<E>>>>,
|
||||
},
|
||||
// Request blocks first, then columns. Assuming the block peer is honest we can attribute
|
||||
// custody failures to the peers serving us columns. We want to get rid of the honest block
|
||||
// peer assumption in the future, see https://github.com/sigp/lighthouse/issues/6258
|
||||
FuluEnabled(FuluEnabledState<E>),
|
||||
}
|
||||
|
||||
enum FuluEnabledState<E: EthSpec> {
|
||||
BlockRequest {
|
||||
blocks_by_range_request:
|
||||
ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||
},
|
||||
CustodyRequest {
|
||||
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||
block_peer: PeerId,
|
||||
custody_by_range_request:
|
||||
ByRangeRequest<CustodyByRangeRequestId, DataColumnSidecarList<E>, PeerGroup>,
|
||||
},
|
||||
}
|
||||
|
||||
enum ByRangeRequest<I: PartialEq + std::fmt::Display, T, P = PeerId> {
|
||||
/// Active(RequestIndex)
|
||||
Active(I),
|
||||
/// Complete(DownloadedData, Peers)
|
||||
Complete(T, P),
|
||||
}
|
||||
|
||||
pub type BlockComponentsByRangeRequestResult<E> =
|
||||
Result<Option<(Vec<RpcBlock<E>>, BatchPeers)>, Error>;
|
||||
|
||||
pub enum Error {
|
||||
InternalError(String),
|
||||
}
|
||||
|
||||
impl From<Error> for RpcResponseError {
|
||||
fn from(e: Error) -> Self {
|
||||
match e {
|
||||
Error::InternalError(e) => RpcResponseError::InternalError(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Error> for RpcRequestSendError {
|
||||
fn from(e: Error) -> Self {
|
||||
match e {
|
||||
Error::InternalError(e) => RpcRequestSendError::InternalError(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Used to typesafe assertions of state in range sync tests
|
||||
#[cfg(test)]
|
||||
#[derive(Debug)]
|
||||
pub enum BlockComponentsByRangeRequestStep {
|
||||
BlocksRequest,
|
||||
CustodyRequest,
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> BlockComponentsByRangeRequest<T> {
|
||||
pub fn new(
|
||||
id: ComponentsByRangeRequestId,
|
||||
request: BlocksByRangeRequest,
|
||||
peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||
peers_to_deprioritize: &HashSet<PeerId>,
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> Result<Self, RpcRequestSendError> {
|
||||
// Induces a compile time panic if this doesn't hold true.
|
||||
#[allow(clippy::assertions_on_constants)]
|
||||
const _: () = assert!(
|
||||
super::super::backfill_sync::BACKFILL_EPOCHS_PER_BATCH == 1
|
||||
&& super::super::range_sync::EPOCHS_PER_BATCH == 1,
|
||||
"To deal with alignment with deneb boundaries, batches need to be of just one epoch"
|
||||
);
|
||||
// The assertion above ensures each batch is in one single epoch
|
||||
let batch_epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch());
|
||||
let batch_fork = cx.spec().fork_name_at_epoch(batch_epoch);
|
||||
|
||||
// TODO(das): a change of behaviour here is that if the SyncingChain has a single peer we
|
||||
// will request all blocks for the first 5 epochs to that same single peer. Before we would
|
||||
// query only idle peers in the syncing chain.
|
||||
let Some(block_peer) = peers
|
||||
.read()
|
||||
.iter()
|
||||
.map(|peer| {
|
||||
(
|
||||
// If contains -> 1 (order after), not contains -> 0 (order first)
|
||||
peers_to_deprioritize.contains(peer),
|
||||
// Random factor to break ties, otherwise the PeerID breaks ties
|
||||
rand::random::<u32>(),
|
||||
peer,
|
||||
)
|
||||
})
|
||||
.min()
|
||||
.map(|(_, _, peer)| *peer)
|
||||
else {
|
||||
// When a peer disconnects and is removed from the SyncingChain peer set, if the set
|
||||
// reaches zero the SyncingChain is removed.
|
||||
return Err(RpcRequestSendError::NoPeers);
|
||||
};
|
||||
|
||||
let blocks_req_id = cx.send_blocks_by_range_request(block_peer, request.clone(), id)?;
|
||||
|
||||
let state = if batch_fork.fulu_enabled() {
|
||||
State::FuluEnabled(FuluEnabledState::BlockRequest {
|
||||
blocks_by_range_request: ByRangeRequest::Active(blocks_req_id),
|
||||
})
|
||||
} else if batch_fork.deneb_enabled() {
|
||||
// TODO(deneb): is it okay to send blobs_by_range requests outside the DA window? I
|
||||
// would like the beacon processor / da_checker to be the one that decides if an
|
||||
// RpcBlock is valid or not with respect to containing blobs. Having sync not even
|
||||
// attempt a requests seems like an added limitation.
|
||||
let blobs_req_id = cx.send_blobs_by_range_request(
|
||||
block_peer,
|
||||
BlobsByRangeRequest {
|
||||
start_slot: *request.start_slot(),
|
||||
count: *request.count(),
|
||||
},
|
||||
id,
|
||||
)?;
|
||||
State::DenebEnabled {
|
||||
blocks_by_range_request: ByRangeRequest::Active(blocks_req_id),
|
||||
blobs_by_range_request: ByRangeRequest::Active(blobs_req_id),
|
||||
}
|
||||
} else {
|
||||
State::Base {
|
||||
blocks_by_range_request: ByRangeRequest::Active(blocks_req_id),
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
id,
|
||||
peers,
|
||||
request,
|
||||
state,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn continue_requests(
|
||||
&mut self,
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
|
||||
match &mut self.state {
|
||||
State::Base {
|
||||
blocks_by_range_request,
|
||||
} => {
|
||||
if let Some((blocks, block_peer)) = blocks_by_range_request.to_finished() {
|
||||
let peer_group = BatchPeers::new_from_block_peer(*block_peer);
|
||||
let rpc_blocks = couple_blocks_base(blocks.to_vec());
|
||||
Ok(Some((rpc_blocks, peer_group)))
|
||||
} else {
|
||||
// Wait for blocks_by_range requests to complete
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
State::DenebEnabled {
|
||||
blocks_by_range_request,
|
||||
blobs_by_range_request,
|
||||
} => {
|
||||
if let (Some((blocks, block_peer)), Some((blobs, _))) = (
|
||||
blocks_by_range_request.to_finished(),
|
||||
blobs_by_range_request.to_finished(),
|
||||
) {
|
||||
// We use the same block_peer for the blobs request
|
||||
let peer_group = BatchPeers::new_from_block_peer(*block_peer);
|
||||
let rpc_blocks =
|
||||
couple_blocks_deneb(blocks.to_vec(), blobs.to_vec(), cx.spec())?;
|
||||
Ok(Some((rpc_blocks, peer_group)))
|
||||
} else {
|
||||
// Wait for blocks_by_range and blobs_by_range requests to complete
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
State::FuluEnabled(state) => match state {
|
||||
FuluEnabledState::BlockRequest {
|
||||
blocks_by_range_request,
|
||||
} => {
|
||||
if let Some((blocks, block_peer)) = blocks_by_range_request.to_finished() {
|
||||
let blocks_with_data = blocks
|
||||
.iter()
|
||||
.filter(|block| block.has_data())
|
||||
.map(|block| block.signed_block_header())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if blocks_with_data.is_empty() {
|
||||
let custody_column_indices = cx
|
||||
.network_globals()
|
||||
.sampling_columns()
|
||||
.iter()
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
// Done, we got blocks and no columns needed
|
||||
let peer_group = BatchPeers::new_from_block_peer(*block_peer);
|
||||
let rpc_blocks = couple_blocks_fulu(
|
||||
blocks.to_vec(),
|
||||
vec![],
|
||||
custody_column_indices,
|
||||
cx.spec(),
|
||||
)?;
|
||||
Ok(Some((rpc_blocks, peer_group)))
|
||||
} else {
|
||||
let mut column_indices = cx
|
||||
.network_globals()
|
||||
.sampling_columns()
|
||||
.iter()
|
||||
.copied()
|
||||
.collect::<Vec<_>>();
|
||||
column_indices.sort_unstable();
|
||||
|
||||
let req_id = cx
|
||||
.send_custody_by_range_request(
|
||||
self.id,
|
||||
blocks_with_data,
|
||||
self.request.clone(),
|
||||
column_indices,
|
||||
self.peers.clone(),
|
||||
)
|
||||
.map_err(|e| match e {
|
||||
RpcRequestSendError::InternalError(e) => {
|
||||
Error::InternalError(e)
|
||||
}
|
||||
RpcRequestSendError::NoPeers => Error::InternalError(
|
||||
"send_custody_by_range_request does not error with NoPeers"
|
||||
.to_owned(),
|
||||
),
|
||||
})?;
|
||||
|
||||
*state = FuluEnabledState::CustodyRequest {
|
||||
blocks: blocks.to_vec(),
|
||||
block_peer: *block_peer,
|
||||
custody_by_range_request: ByRangeRequest::Active(req_id),
|
||||
};
|
||||
|
||||
// Wait for the new custody_by_range request to complete
|
||||
Ok(None)
|
||||
}
|
||||
} else {
|
||||
// Wait for the block request to complete
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
FuluEnabledState::CustodyRequest {
|
||||
blocks,
|
||||
block_peer,
|
||||
custody_by_range_request,
|
||||
} => {
|
||||
if let Some((columns, column_peers)) = custody_by_range_request.to_finished() {
|
||||
let custody_column_indices = cx
|
||||
.network_globals()
|
||||
.sampling_columns()
|
||||
.iter()
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
let peer_group = BatchPeers::new(*block_peer, column_peers.clone());
|
||||
let rpc_blocks = couple_blocks_fulu(
|
||||
blocks.to_vec(),
|
||||
columns.to_vec(),
|
||||
custody_column_indices,
|
||||
cx.spec(),
|
||||
)?;
|
||||
Ok(Some((rpc_blocks, peer_group)))
|
||||
} else {
|
||||
// Wait for the custody_by_range request to complete
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_blocks_by_range_result(
|
||||
&mut self,
|
||||
id: BlocksByRangeRequestId,
|
||||
data: Vec<Arc<SignedBeaconBlock<T::EthSpec>>>,
|
||||
peer_id: PeerId,
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
|
||||
match &mut self.state {
|
||||
State::Base {
|
||||
blocks_by_range_request,
|
||||
}
|
||||
| State::DenebEnabled {
|
||||
blocks_by_range_request,
|
||||
..
|
||||
}
|
||||
| State::FuluEnabled(FuluEnabledState::BlockRequest {
|
||||
blocks_by_range_request,
|
||||
}) => {
|
||||
blocks_by_range_request.finish(id, data, peer_id)?;
|
||||
}
|
||||
State::FuluEnabled(FuluEnabledState::CustodyRequest { .. }) => {
|
||||
return Err(Error::InternalError(
|
||||
"Received blocks_by_range response expecting custody_by_range".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
self.continue_requests(cx)
|
||||
}
|
||||
|
||||
pub fn on_blobs_by_range_result(
|
||||
&mut self,
|
||||
id: BlobsByRangeRequestId,
|
||||
data: Vec<Arc<BlobSidecar<T::EthSpec>>>,
|
||||
peer_id: PeerId,
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
|
||||
match &mut self.state {
|
||||
State::Base { .. } => {
|
||||
return Err(Error::InternalError(
|
||||
"Received blobs_by_range response before Deneb".to_string(),
|
||||
))
|
||||
}
|
||||
State::DenebEnabled {
|
||||
blobs_by_range_request,
|
||||
..
|
||||
} => {
|
||||
blobs_by_range_request.finish(id, data, peer_id)?;
|
||||
}
|
||||
State::FuluEnabled(_) => {
|
||||
return Err(Error::InternalError(
|
||||
"Received blobs_by_range response after PeerDAS".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
self.continue_requests(cx)
|
||||
}
|
||||
|
||||
pub fn on_custody_by_range_result(
|
||||
&mut self,
|
||||
id: CustodyByRangeRequestId,
|
||||
data: DataColumnSidecarList<T::EthSpec>,
|
||||
peers: PeerGroup,
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
|
||||
match &mut self.state {
|
||||
State::Base { .. } | State::DenebEnabled { .. } => {
|
||||
return Err(Error::InternalError(
|
||||
"Received custody_by_range response before PeerDAS".to_string(),
|
||||
))
|
||||
}
|
||||
State::FuluEnabled(state) => match state {
|
||||
FuluEnabledState::BlockRequest { .. } => {
|
||||
return Err(Error::InternalError(
|
||||
"Received custody_by_range expecting blocks_by_range".to_string(),
|
||||
));
|
||||
}
|
||||
FuluEnabledState::CustodyRequest {
|
||||
custody_by_range_request,
|
||||
..
|
||||
} => {
|
||||
custody_by_range_request.finish(id, data, peers)?;
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
self.continue_requests(cx)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn state_step(&self) -> BlockComponentsByRangeRequestStep {
|
||||
match &self.state {
|
||||
State::Base { .. } => BlockComponentsByRangeRequestStep::BlocksRequest,
|
||||
State::DenebEnabled { .. } => BlockComponentsByRangeRequestStep::BlocksRequest,
|
||||
State::FuluEnabled(state) => match state {
|
||||
FuluEnabledState::BlockRequest { .. } => {
|
||||
BlockComponentsByRangeRequestStep::BlocksRequest
|
||||
}
|
||||
FuluEnabledState::CustodyRequest { .. } => {
|
||||
BlockComponentsByRangeRequestStep::CustodyRequest
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn couple_blocks_base<E: EthSpec>(blocks: Vec<Arc<SignedBeaconBlock<E>>>) -> Vec<RpcBlock<E>> {
|
||||
blocks
|
||||
.into_iter()
|
||||
.map(|block| RpcBlock::new_without_blobs(None, block))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn couple_blocks_deneb<E: EthSpec>(
|
||||
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||
blobs: Vec<Arc<BlobSidecar<E>>>,
|
||||
spec: &ChainSpec,
|
||||
) -> Result<Vec<RpcBlock<E>>, Error> {
|
||||
let mut blobs_by_block = HashMap::<Hash256, Vec<Arc<BlobSidecar<E>>>>::new();
|
||||
for blob in blobs {
|
||||
let block_root = blob.block_root();
|
||||
blobs_by_block.entry(block_root).or_default().push(blob);
|
||||
}
|
||||
|
||||
// Now collect all blobs that match to the block by block root. BlobsByRange request checks
|
||||
// the inclusion proof so we know that the commitment is the expected.
|
||||
//
|
||||
// BlobsByRange request handler ensures that we don't receive more blobs than possible.
|
||||
// If the peer serving the request sends us blobs that don't pair well we'll send to the
|
||||
// processor blocks without expected blobs, resulting in a downscoring event. A serving peer
|
||||
// could serve fake blobs for blocks that don't have data, but it would gain nothing by it
|
||||
// wasting theirs and our bandwidth 1:1. Therefore blobs that don't pair well are just ignored.
|
||||
//
|
||||
// RpcBlock::new ensures that the count of blobs is consistent with the block
|
||||
blocks
|
||||
.into_iter()
|
||||
.map(|block| {
|
||||
let block_root = get_block_root(&block);
|
||||
let max_blobs_per_block = spec.max_blobs_per_block(block.epoch()) as usize;
|
||||
let blobs = blobs_by_block.remove(&block_root).unwrap_or_default();
|
||||
// BlobsByRange request handler enforces that blobs are sorted by index
|
||||
let blobs = RuntimeVariableList::new(blobs, max_blobs_per_block).map_err(|_| {
|
||||
Error::InternalError("Blobs returned exceeds max length".to_string())
|
||||
})?;
|
||||
Ok(RpcBlock::new(Some(block_root), block, Some(blobs))
|
||||
.expect("TODO: don't do matching here"))
|
||||
})
|
||||
.collect::<Result<Vec<RpcBlock<E>>, Error>>()
|
||||
}
|
||||
|
||||
fn couple_blocks_fulu<E: EthSpec>(
|
||||
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||
data_columns: DataColumnSidecarList<E>,
|
||||
custody_column_indices: Vec<ColumnIndex>,
|
||||
spec: &ChainSpec,
|
||||
) -> Result<Vec<RpcBlock<E>>, Error> {
|
||||
// Group data columns by block_root and index
|
||||
let mut custody_columns_by_block = HashMap::<Hash256, Vec<CustodyDataColumn<E>>>::new();
|
||||
|
||||
for column in data_columns {
|
||||
let block_root = column.block_root();
|
||||
|
||||
if custody_column_indices.contains(&column.index) {
|
||||
custody_columns_by_block
|
||||
.entry(block_root)
|
||||
.or_default()
|
||||
// Safe to convert to `CustodyDataColumn`: we have asserted that the index of
|
||||
// this column is in the set of `expects_custody_columns` and with the expected
|
||||
// block root, so for the expected epoch of this batch.
|
||||
.push(CustodyDataColumn::from_asserted_custody(column));
|
||||
}
|
||||
}
|
||||
|
||||
// Now iterate all blocks ensuring that the block roots of each block and data column match,
|
||||
blocks
|
||||
.into_iter()
|
||||
.map(|block| {
|
||||
let block_root = get_block_root(&block);
|
||||
let data_columns_with_block_root = custody_columns_by_block
|
||||
// Remove to only use columns once
|
||||
.remove(&block_root)
|
||||
.unwrap_or_default();
|
||||
|
||||
RpcBlock::new_with_custody_columns(
|
||||
Some(block_root),
|
||||
block,
|
||||
data_columns_with_block_root,
|
||||
spec,
|
||||
)
|
||||
.map_err(Error::InternalError)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}
|
||||
|
||||
impl<I: PartialEq + std::fmt::Display, T, P> ByRangeRequest<I, T, P> {
|
||||
fn finish(&mut self, id: I, data: T, peer_id: P) -> Result<(), Error> {
|
||||
match self {
|
||||
Self::Active(expected_id) => {
|
||||
if expected_id != &id {
|
||||
return Err(Error::InternalError(format!(
|
||||
"unexpected req_id expected {expected_id} got {id}"
|
||||
)));
|
||||
}
|
||||
*self = Self::Complete(data, peer_id);
|
||||
Ok(())
|
||||
}
|
||||
Self::Complete(_, _) => Err(Error::InternalError(format!(
|
||||
"request already complete {id}"
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_finished(&self) -> Option<(&T, &P)> {
|
||||
match self {
|
||||
Self::Active(_) => None,
|
||||
Self::Complete(data, peer_id) => Some((data, peer_id)),
|
||||
}
|
||||
}
|
||||
}
|
||||
429
beacon_node/network/src/sync/network_context/custody_by_range.rs
Normal file
429
beacon_node/network/src/sync/network_context/custody_by_range.rs
Normal file
@@ -0,0 +1,429 @@
|
||||
use super::custody_by_root::{ColumnRequest, Error};
|
||||
use beacon_chain::validator_monitor::timestamp_now;
|
||||
use beacon_chain::BeaconChainTypes;
|
||||
use fnv::FnvHashMap;
|
||||
use lighthouse_network::rpc::{methods::DataColumnsByRangeRequest, BlocksByRangeRequest};
|
||||
use lighthouse_network::service::api_types::{
|
||||
CustodyByRangeRequestId, DataColumnsByRangeRequestId,
|
||||
};
|
||||
use lighthouse_network::{PeerAction, PeerId};
|
||||
use lru_cache::LRUTimeCache;
|
||||
use parking_lot::RwLock;
|
||||
use rand::Rng;
|
||||
use std::collections::HashSet;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{collections::HashMap, marker::PhantomData, sync::Arc};
|
||||
use tracing::{debug, warn};
|
||||
use types::{
|
||||
data_column_sidecar::ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Hash256,
|
||||
SignedBeaconBlockHeader, Slot,
|
||||
};
|
||||
|
||||
use super::{PeerGroup, RpcResponseResult, SyncNetworkContext};
|
||||
|
||||
const FAILED_PEERS_EXPIRY_SECONDS: u64 = 15;
|
||||
const REQUEST_EXPIRY_SECONDS: u64 = 300;
|
||||
|
||||
pub struct ActiveCustodyByRangeRequest<T: BeaconChainTypes> {
|
||||
start_time: Instant,
|
||||
id: CustodyByRangeRequestId,
|
||||
request: BlocksByRangeRequest,
|
||||
/// Blocks that we expect peers to serve data columns for
|
||||
blocks_with_data: Vec<SignedBeaconBlockHeader>,
|
||||
/// List of column indices this request needs to download to complete successfully
|
||||
column_requests: FnvHashMap<
|
||||
ColumnIndex,
|
||||
ColumnRequest<DataColumnsByRangeRequestId, DataColumnSidecarList<T::EthSpec>>,
|
||||
>,
|
||||
/// Active requests for 1 or more columns each
|
||||
active_batch_columns_requests:
|
||||
FnvHashMap<DataColumnsByRangeRequestId, ActiveBatchColumnsRequest>,
|
||||
/// Peers that have recently failed to successfully respond to a columns by root request.
|
||||
/// Having a LRUTimeCache allows this request to not have to track disconnecting peers.
|
||||
failed_peers: LRUTimeCache<PeerId>,
|
||||
/// Set of peers that claim to have imported this block and their custody columns
|
||||
lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
struct ActiveBatchColumnsRequest {
|
||||
indices: Vec<ColumnIndex>,
|
||||
}
|
||||
|
||||
pub type CustodyByRangeRequestResult<E> =
|
||||
Result<Option<(DataColumnSidecarList<E>, PeerGroup, Duration)>, Error>;
|
||||
|
||||
enum ColumnResponseError {
|
||||
NonMatchingColumn {
|
||||
slot: Slot,
|
||||
actual_block_root: Hash256,
|
||||
expected_block_root: Hash256,
|
||||
},
|
||||
MissingColumn(Slot),
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> ActiveCustodyByRangeRequest<T> {
|
||||
pub(crate) fn new(
|
||||
id: CustodyByRangeRequestId,
|
||||
request: BlocksByRangeRequest,
|
||||
blocks_with_data: Vec<SignedBeaconBlockHeader>,
|
||||
column_indices: &[ColumnIndex],
|
||||
lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
start_time: Instant::now(),
|
||||
id,
|
||||
request,
|
||||
blocks_with_data,
|
||||
column_requests: HashMap::from_iter(
|
||||
column_indices
|
||||
.iter()
|
||||
.map(|index| (*index, ColumnRequest::new())),
|
||||
),
|
||||
active_batch_columns_requests: <_>::default(),
|
||||
failed_peers: LRUTimeCache::new(Duration::from_secs(FAILED_PEERS_EXPIRY_SECONDS)),
|
||||
lookup_peers,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a downloaded column into an active custody request. Then make progress on the
|
||||
/// entire request.
|
||||
///
|
||||
/// ### Returns
|
||||
///
|
||||
/// - `Err`: Custody request has failed and will be dropped
|
||||
/// - `Ok(Some)`: Custody request has successfully completed and will be dropped
|
||||
/// - `Ok(None)`: Custody request still active
|
||||
pub(crate) fn on_data_column_downloaded(
|
||||
&mut self,
|
||||
peer_id: PeerId,
|
||||
req_id: DataColumnsByRangeRequestId,
|
||||
resp: RpcResponseResult<DataColumnSidecarList<T::EthSpec>>,
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> CustodyByRangeRequestResult<T::EthSpec> {
|
||||
let Some(batch_request) = self.active_batch_columns_requests.get_mut(&req_id) else {
|
||||
warn!(
|
||||
id = %self.id,
|
||||
%req_id,
|
||||
"Received custody by range response for unrequested index"
|
||||
);
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
match resp {
|
||||
Ok((data_columns, seen_timestamp)) => {
|
||||
// Map columns by index as an optimization to not loop the returned list on each
|
||||
// requested index. The worse case is 128 loops over a 128 item vec + mutation to
|
||||
// drop the consumed columns.
|
||||
let mut data_columns_by_index =
|
||||
HashMap::<(ColumnIndex, Slot), Arc<DataColumnSidecar<T::EthSpec>>>::new();
|
||||
for data_column in data_columns {
|
||||
data_columns_by_index
|
||||
.insert((data_column.index, data_column.slot()), data_column);
|
||||
}
|
||||
|
||||
// Accumulate columns that the peer does not have to issue a single log per request
|
||||
let mut missing_column_indices = vec![];
|
||||
let mut incorrect_column_indices = vec![];
|
||||
let mut imported_column_indices = vec![];
|
||||
|
||||
for index in &batch_request.indices {
|
||||
let column_request =
|
||||
self.column_requests
|
||||
.get_mut(index)
|
||||
.ok_or(Error::InternalError(format!(
|
||||
"unknown column_index {index}"
|
||||
)))?;
|
||||
|
||||
let columns_at_index = self
|
||||
.blocks_with_data
|
||||
.iter()
|
||||
.map(|block| {
|
||||
let slot = block.message.slot;
|
||||
if let Some(data_column) = data_columns_by_index.remove(&(*index, slot))
|
||||
{
|
||||
let actual_block_root =
|
||||
data_column.signed_block_header.message.canonical_root();
|
||||
let expected_block_root = block.message.canonical_root();
|
||||
if actual_block_root != expected_block_root {
|
||||
Err(ColumnResponseError::NonMatchingColumn {
|
||||
slot,
|
||||
actual_block_root: data_column
|
||||
.signed_block_header
|
||||
.message
|
||||
.canonical_root(),
|
||||
expected_block_root: block.message.canonical_root(),
|
||||
})
|
||||
} else {
|
||||
Ok(data_column)
|
||||
}
|
||||
} else {
|
||||
// The following three statements are true:
|
||||
// - block at `slot` is not missed, and has data
|
||||
// - peer custodies this column `index`
|
||||
// - peer claims to be synced to at least `slot`
|
||||
//
|
||||
// Then we penalize the faulty peer, mark it as failed and try with
|
||||
// another.
|
||||
Err(ColumnResponseError::MissingColumn(slot))
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>();
|
||||
|
||||
match columns_at_index {
|
||||
Ok(columns_at_index) => {
|
||||
column_request.on_download_success(
|
||||
req_id,
|
||||
peer_id,
|
||||
columns_at_index,
|
||||
seen_timestamp,
|
||||
)?;
|
||||
|
||||
imported_column_indices.push(index);
|
||||
}
|
||||
Err(e) => {
|
||||
column_request.on_download_error(req_id)?;
|
||||
|
||||
match e {
|
||||
ColumnResponseError::NonMatchingColumn {
|
||||
slot,
|
||||
actual_block_root,
|
||||
expected_block_root,
|
||||
} => {
|
||||
incorrect_column_indices.push((
|
||||
index,
|
||||
slot,
|
||||
actual_block_root,
|
||||
expected_block_root,
|
||||
));
|
||||
}
|
||||
ColumnResponseError::MissingColumn(slot) => {
|
||||
missing_column_indices.push((index, slot));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Log `imported_column_indices`, `missing_column_indexes` and
|
||||
// `incorrect_column_indices` once per request to make the logs less noisy.
|
||||
if !imported_column_indices.is_empty() {
|
||||
// TODO(das): this log may be redundant. We already log on DataColumnsByRange
|
||||
// completed, and on DataColumnsByRange sent we log the column indices
|
||||
// ```
|
||||
// Sync RPC request sent method="DataColumnsByRange" slots=8 epoch=4 columns=[52] peer=16Uiu2HAmEooeoHzHDYS35TSHrJDSfmREecPyFskrLPYm9Gm1EURj id=493/399/10/RangeSync/4/1
|
||||
// Sync RPC request completed id=493/399/10/RangeSync/4/1 method="DataColumnsByRange" count=1
|
||||
// ```
|
||||
// Which can be traced to this custody by range request, and the initial log
|
||||
debug!(
|
||||
id = %self.id,
|
||||
data_columns_by_range_req_id = %req_id,
|
||||
%peer_id,
|
||||
count = imported_column_indices.len(),
|
||||
"Custody by range request download imported columns"
|
||||
);
|
||||
}
|
||||
|
||||
if !incorrect_column_indices.is_empty() {
|
||||
debug!(
|
||||
id = %self.id,
|
||||
data_columns_by_range_req_id = %req_id,
|
||||
%peer_id,
|
||||
?incorrect_column_indices,
|
||||
"Custody by range peer returned non-matching columns"
|
||||
);
|
||||
|
||||
// Returning a non-canonical column is not a permanent fault. We should not
|
||||
// retry the peer for some time but the peer may return a canonical column in
|
||||
// the future.
|
||||
self.failed_peers.insert(peer_id);
|
||||
cx.report_peer(
|
||||
peer_id,
|
||||
PeerAction::MidToleranceError,
|
||||
"non-matching data column",
|
||||
);
|
||||
}
|
||||
|
||||
if !missing_column_indices.is_empty() {
|
||||
debug!(
|
||||
id = %self.id,
|
||||
data_columns_by_range_req_id = %req_id,
|
||||
%peer_id,
|
||||
?missing_column_indices,
|
||||
"Custody by range peer claims to not have some data"
|
||||
);
|
||||
|
||||
// Not having columns is not a permanent fault. The peer may be backfilling.
|
||||
self.failed_peers.insert(peer_id);
|
||||
cx.report_peer(peer_id, PeerAction::MidToleranceError, "custody_failure");
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
debug!(
|
||||
id = %self.id,
|
||||
%req_id,
|
||||
%peer_id,
|
||||
error = ?err,
|
||||
"Custody by range download error"
|
||||
);
|
||||
|
||||
for column_index in &batch_request.indices {
|
||||
self.column_requests
|
||||
.get_mut(column_index)
|
||||
.ok_or(Error::InternalError("unknown column_index".to_owned()))?
|
||||
.on_download_error_and_mark_failure(req_id, err.clone())?;
|
||||
}
|
||||
|
||||
// An RpcResponseError is already downscored in network_context
|
||||
self.failed_peers.insert(peer_id);
|
||||
}
|
||||
};
|
||||
|
||||
self.continue_requests(cx)
|
||||
}
|
||||
|
||||
pub(crate) fn continue_requests(
|
||||
&mut self,
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> CustodyByRangeRequestResult<T::EthSpec> {
|
||||
if self.column_requests.values().all(|r| r.is_downloaded()) {
|
||||
// All requests have completed successfully.
|
||||
let mut peers = HashMap::<PeerId, Vec<usize>>::new();
|
||||
let mut seen_timestamps = vec![];
|
||||
let columns = std::mem::take(&mut self.column_requests)
|
||||
.into_values()
|
||||
.map(|request| {
|
||||
let (peer, data_columns, seen_timestamp) = request.complete()?;
|
||||
|
||||
for data_column in &data_columns {
|
||||
let columns_by_peer = peers.entry(peer).or_default();
|
||||
if !columns_by_peer.contains(&(data_column.index as usize)) {
|
||||
columns_by_peer.push(data_column.index as usize);
|
||||
}
|
||||
}
|
||||
|
||||
seen_timestamps.push(seen_timestamp);
|
||||
|
||||
Ok(data_columns)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?
|
||||
// Flatten Vec<Vec<Columns>> to Vec<Columns>
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect();
|
||||
|
||||
let peer_group = PeerGroup::from_set(peers);
|
||||
let max_seen_timestamp = seen_timestamps.into_iter().max().unwrap_or(timestamp_now());
|
||||
return Ok(Some((columns, peer_group, max_seen_timestamp)));
|
||||
}
|
||||
|
||||
let active_request_count_by_peer = cx.active_request_count_by_peer();
|
||||
let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
|
||||
let lookup_peers = self.lookup_peers.read();
|
||||
|
||||
// Need to:
|
||||
// - track how many active requests a peer has for load balancing
|
||||
// - which peers have failures to attempt others
|
||||
// - which peer returned what to have PeerGroup attributability
|
||||
|
||||
for (column_index, request) in self.column_requests.iter_mut() {
|
||||
if request.is_awaiting_download() {
|
||||
if let Some(last_error) = request.too_many_failures() {
|
||||
return Err(Error::TooManyDownloadErrors(last_error));
|
||||
}
|
||||
|
||||
// TODO(das): We should only query peers that are likely to know about this block.
|
||||
// For by_range requests, only peers in the SyncingChain peer set. Else consider a
|
||||
// fallback to the peers that are synced up to the epoch we want to query.
|
||||
let custodial_peers = cx.get_custodial_peers(*column_index);
|
||||
|
||||
// We draw from the total set of peers, but prioritize those peers who we have
|
||||
// received an attestation / status / block message claiming to have imported the
|
||||
// lookup. The frequency of those messages is low, so drawing only from lookup_peers
|
||||
// could cause many lookups to take much longer or fail as they don't have enough
|
||||
// custody peers on a given column
|
||||
let mut priorized_peers = custodial_peers
|
||||
.iter()
|
||||
.filter(|peer| {
|
||||
// Do not request faulty peers for some time
|
||||
!self.failed_peers.contains(peer)
|
||||
})
|
||||
.map(|peer| {
|
||||
(
|
||||
// Prioritize peers that claim to know have imported this block
|
||||
if lookup_peers.contains(peer) { 0 } else { 1 },
|
||||
// Prefer peers with fewer requests to load balance across peers.
|
||||
// We batch requests to the same peer, so count existence in the
|
||||
// `columns_to_request_by_peer` as a single 1 request.
|
||||
active_request_count_by_peer.get(peer).copied().unwrap_or(0)
|
||||
+ columns_to_request_by_peer.get(peer).map(|_| 1).unwrap_or(0),
|
||||
// Random factor to break ties, otherwise the PeerID breaks ties
|
||||
rand::thread_rng().gen::<u32>(),
|
||||
*peer,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
priorized_peers.sort_unstable();
|
||||
|
||||
if let Some((_, _, _, peer_id)) = priorized_peers.first() {
|
||||
columns_to_request_by_peer
|
||||
.entry(*peer_id)
|
||||
.or_default()
|
||||
.push(*column_index);
|
||||
} else {
|
||||
// Do not issue requests if there is no custody peer on this column. The request
|
||||
// will sit idle without making progress. The only way to make to progress is:
|
||||
// - Add a new peer that custodies the missing columns
|
||||
// - Call `continue_requests`
|
||||
//
|
||||
// Otherwise this request will be dropped and failed after some time.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (peer_id, indices) in columns_to_request_by_peer.into_iter() {
|
||||
let req_id = cx
|
||||
.send_data_columns_by_range_request(
|
||||
peer_id,
|
||||
DataColumnsByRangeRequest {
|
||||
start_slot: *self.request.start_slot(),
|
||||
count: *self.request.count(),
|
||||
columns: indices.clone(),
|
||||
},
|
||||
self.id,
|
||||
)
|
||||
.map_err(|e| Error::InternalError(format!("send failed {e}")))?;
|
||||
|
||||
for column_index in &indices {
|
||||
let column_request = self
|
||||
.column_requests
|
||||
.get_mut(column_index)
|
||||
// Should never happen: column_index is iterated from column_requests
|
||||
.ok_or(Error::InternalError(format!(
|
||||
"Unknown column_request {column_index}"
|
||||
)))?;
|
||||
|
||||
column_request.on_download_start(req_id)?;
|
||||
}
|
||||
|
||||
self.active_batch_columns_requests
|
||||
.insert(req_id, ActiveBatchColumnsRequest { indices });
|
||||
}
|
||||
|
||||
if self.start_time.elapsed() > Duration::from_secs(REQUEST_EXPIRY_SECONDS)
|
||||
&& !self.column_requests.values().any(|r| r.is_downloading())
|
||||
{
|
||||
let awaiting_peers_indicies = self
|
||||
.column_requests
|
||||
.iter()
|
||||
.filter(|(_, r)| r.is_awaiting_download())
|
||||
.map(|(id, _)| *id)
|
||||
.collect::<Vec<_>>();
|
||||
return Err(Error::ExpiredNoCustodyPeers(awaiting_peers_indicies));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::sync::network_context::{
|
||||
DataColumnsByRootRequestId, DataColumnsByRootSingleBlockRequest,
|
||||
DataColumnsByRootRequestId, DataColumnsByRootSingleBlockRequest, RpcRequestSendError,
|
||||
RpcResponseError,
|
||||
};
|
||||
use beacon_chain::validator_monitor::timestamp_now;
|
||||
use beacon_chain::BeaconChainTypes;
|
||||
@@ -12,22 +13,28 @@ use rand::Rng;
|
||||
use std::collections::HashSet;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{collections::HashMap, marker::PhantomData, sync::Arc};
|
||||
use strum::IntoStaticStr;
|
||||
use tracing::{debug, warn};
|
||||
use types::EthSpec;
|
||||
use types::{data_column_sidecar::ColumnIndex, DataColumnSidecar, Hash256};
|
||||
use types::{data_column_sidecar::ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Hash256};
|
||||
|
||||
use super::{LookupRequestResult, PeerGroup, RpcResponseResult, SyncNetworkContext};
|
||||
|
||||
const FAILED_PEERS_CACHE_EXPIRY_SECONDS: u64 = 5;
|
||||
const MAX_STALE_NO_PEERS_DURATION: Duration = Duration::from_secs(30);
|
||||
const REQUEST_EXPIRY_SECONDS: u64 = 300;
|
||||
/// TODO(das): Reconsider this retry count, it was choosen as a placeholder value. Each
|
||||
/// `custody_by_*` request is already retried multiple inside of a lookup or batch
|
||||
const MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS: usize = 3;
|
||||
|
||||
type DataColumnSidecarList<E> = Vec<Arc<DataColumnSidecar<E>>>;
|
||||
|
||||
pub struct ActiveCustodyRequest<T: BeaconChainTypes> {
|
||||
pub struct ActiveCustodyByRootRequest<T: BeaconChainTypes> {
|
||||
start_time: Instant,
|
||||
block_root: Hash256,
|
||||
custody_id: CustodyId,
|
||||
/// List of column indices this request needs to download to complete successfully
|
||||
column_requests: FnvHashMap<ColumnIndex, ColumnRequest<T::EthSpec>>,
|
||||
#[allow(clippy::type_complexity)]
|
||||
column_requests: FnvHashMap<
|
||||
ColumnIndex,
|
||||
ColumnRequest<DataColumnsByRootRequestId, Arc<DataColumnSidecar<T::EthSpec>>>,
|
||||
>,
|
||||
/// Active requests for 1 or more columns each
|
||||
active_batch_columns_requests:
|
||||
FnvHashMap<DataColumnsByRootRequestId, ActiveBatchColumnsRequest>,
|
||||
@@ -40,29 +47,47 @@ pub struct ActiveCustodyRequest<T: BeaconChainTypes> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
SendFailed(&'static str),
|
||||
TooManyFailures,
|
||||
BadState(String),
|
||||
NoPeer(ColumnIndex),
|
||||
/// Received a download result for a different request id than the in-flight request.
|
||||
/// There should only exist a single request at a time. Having multiple requests is a bug and
|
||||
/// can result in undefined state, so it's treated as a hard error and the lookup is dropped.
|
||||
UnexpectedRequestId {
|
||||
expected_req_id: DataColumnsByRootRequestId,
|
||||
req_id: DataColumnsByRootRequestId,
|
||||
},
|
||||
InternalError(String),
|
||||
TooManyDownloadErrors(RpcResponseError),
|
||||
ExpiredNoCustodyPeers(Vec<ColumnIndex>),
|
||||
}
|
||||
|
||||
impl From<Error> for RpcResponseError {
|
||||
fn from(e: Error) -> Self {
|
||||
match e {
|
||||
Error::InternalError(e) => RpcResponseError::InternalError(e),
|
||||
Error::TooManyDownloadErrors(e) => e,
|
||||
Error::ExpiredNoCustodyPeers(indices) => RpcResponseError::RequestExpired(format!(
|
||||
"Expired waiting for custody peers {indices:?}"
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Error> for RpcRequestSendError {
|
||||
fn from(e: Error) -> Self {
|
||||
match e {
|
||||
Error::TooManyDownloadErrors(_) => {
|
||||
RpcRequestSendError::InternalError("Download error in request send".to_string())
|
||||
}
|
||||
Error::InternalError(e) => RpcRequestSendError::InternalError(e),
|
||||
Error::ExpiredNoCustodyPeers(_) => RpcRequestSendError::InternalError(
|
||||
"Request can not expire when requesting it".to_string(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ActiveBatchColumnsRequest {
|
||||
indices: Vec<ColumnIndex>,
|
||||
}
|
||||
|
||||
pub type CustodyRequestResult<E> =
|
||||
pub type CustodyByRootRequestResult<E> =
|
||||
Result<Option<(DataColumnSidecarList<E>, PeerGroup, Duration)>, Error>;
|
||||
|
||||
impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
impl<T: BeaconChainTypes> ActiveCustodyByRootRequest<T> {
|
||||
pub(crate) fn new(
|
||||
block_root: Hash256,
|
||||
custody_id: CustodyId,
|
||||
@@ -70,6 +95,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
start_time: Instant::now(),
|
||||
block_root,
|
||||
custody_id,
|
||||
column_requests: HashMap::from_iter(
|
||||
@@ -98,7 +124,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
req_id: DataColumnsByRootRequestId,
|
||||
resp: RpcResponseResult<DataColumnSidecarList<T::EthSpec>>,
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> CustodyRequestResult<T::EthSpec> {
|
||||
) -> CustodyByRootRequestResult<T::EthSpec> {
|
||||
let Some(batch_request) = self.active_batch_columns_requests.get_mut(&req_id) else {
|
||||
warn!(
|
||||
block_root = ?self.block_root,
|
||||
@@ -131,7 +157,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
let column_request = self
|
||||
.column_requests
|
||||
.get_mut(column_index)
|
||||
.ok_or(Error::BadState("unknown column_index".to_owned()))?;
|
||||
.ok_or(Error::InternalError("unknown column_index".to_owned()))?;
|
||||
|
||||
if let Some(data_column) = data_columns.remove(column_index) {
|
||||
column_request.on_download_success(
|
||||
@@ -182,8 +208,8 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
for column_index in &batch_request.indices {
|
||||
self.column_requests
|
||||
.get_mut(column_index)
|
||||
.ok_or(Error::BadState("unknown column_index".to_owned()))?
|
||||
.on_download_error_and_mark_failure(req_id)?;
|
||||
.ok_or(Error::InternalError("unknown column_index".to_owned()))?
|
||||
.on_download_error_and_mark_failure(req_id, err.clone())?;
|
||||
}
|
||||
|
||||
self.failed_peers.insert(peer_id);
|
||||
@@ -196,7 +222,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
pub(crate) fn continue_requests(
|
||||
&mut self,
|
||||
cx: &mut SyncNetworkContext<T>,
|
||||
) -> CustodyRequestResult<T::EthSpec> {
|
||||
) -> CustodyByRootRequestResult<T::EthSpec> {
|
||||
if self.column_requests.values().all(|r| r.is_downloaded()) {
|
||||
// All requests have completed successfully.
|
||||
let mut peers = HashMap::<PeerId, Vec<usize>>::new();
|
||||
@@ -229,9 +255,9 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
// - which peer returned what to have PeerGroup attributability
|
||||
|
||||
for (column_index, request) in self.column_requests.iter_mut() {
|
||||
if let Some(wait_duration) = request.is_awaiting_download() {
|
||||
if request.download_failures > MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS {
|
||||
return Err(Error::TooManyFailures);
|
||||
if request.is_awaiting_download() {
|
||||
if let Some(last_error) = request.too_many_failures() {
|
||||
return Err(Error::TooManyDownloadErrors(last_error));
|
||||
}
|
||||
|
||||
// TODO(das): When is a fork and only a subset of your peers know about a block, we should
|
||||
@@ -270,21 +296,20 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
.entry(*peer_id)
|
||||
.or_default()
|
||||
.push(*column_index);
|
||||
} else if wait_duration > MAX_STALE_NO_PEERS_DURATION {
|
||||
// Allow to request to sit stale in `NotStarted` state for at most
|
||||
// `MAX_STALE_NO_PEERS_DURATION`, else error and drop the request. Note that
|
||||
// lookup will naturally retry when other peers send us attestations for
|
||||
// descendants of this un-available lookup.
|
||||
return Err(Error::NoPeer(*column_index));
|
||||
} else {
|
||||
// Do not issue requests if there is no custody peer on this column
|
||||
// Do not issue requests if there is no custody peer on this column. The request
|
||||
// will sit idle without making progress. The only way to make to progress is:
|
||||
// - Add a new peer that custodies the missing columns
|
||||
// - Call `continue_requests`
|
||||
//
|
||||
// Otherwise this request will be dropped and failed after some time.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (peer_id, indices) in columns_to_request_by_peer.into_iter() {
|
||||
let request_result = cx
|
||||
.data_column_lookup_request(
|
||||
.data_columns_by_root_request(
|
||||
DataColumnsByRootRequester::Custody(self.custody_id),
|
||||
peer_id,
|
||||
DataColumnsByRootSingleBlockRequest {
|
||||
@@ -297,7 +322,9 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
// columns. For the rest of peers, don't downscore if columns are missing.
|
||||
lookup_peers.contains(&peer_id),
|
||||
)
|
||||
.map_err(Error::SendFailed)?;
|
||||
.map_err(|e| {
|
||||
Error::InternalError(format!("Send failed data_columns_by_root {e:?}"))
|
||||
})?;
|
||||
|
||||
match request_result {
|
||||
LookupRequestResult::RequestSent(req_id) => {
|
||||
@@ -306,7 +333,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
.column_requests
|
||||
.get_mut(column_index)
|
||||
// Should never happen: column_index is iterated from column_requests
|
||||
.ok_or(Error::BadState("unknown column_index".to_owned()))?;
|
||||
.ok_or(Error::InternalError("unknown column_index".to_owned()))?;
|
||||
|
||||
column_request.on_download_start(req_id)?;
|
||||
}
|
||||
@@ -319,117 +346,149 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
||||
}
|
||||
}
|
||||
|
||||
if self.start_time.elapsed() > Duration::from_secs(REQUEST_EXPIRY_SECONDS)
|
||||
&& !self.column_requests.values().any(|r| r.is_downloading())
|
||||
{
|
||||
let awaiting_peers_indicies = self
|
||||
.column_requests
|
||||
.iter()
|
||||
.filter(|(_, r)| r.is_awaiting_download())
|
||||
.map(|(id, _)| *id)
|
||||
.collect::<Vec<_>>();
|
||||
return Err(Error::ExpiredNoCustodyPeers(awaiting_peers_indicies));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO(das): this attempt count is nested into the existing lookup request count.
|
||||
const MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS: usize = 3;
|
||||
|
||||
struct ColumnRequest<E: EthSpec> {
|
||||
status: Status<E>,
|
||||
download_failures: usize,
|
||||
pub struct ColumnRequest<I: std::fmt::Display + PartialEq, T> {
|
||||
status: Status<I, T>,
|
||||
download_failures: Vec<RpcResponseError>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Status<E: EthSpec> {
|
||||
NotStarted(Instant),
|
||||
Downloading(DataColumnsByRootRequestId),
|
||||
Downloaded(PeerId, Arc<DataColumnSidecar<E>>, Duration),
|
||||
#[derive(Debug, Clone, IntoStaticStr)]
|
||||
pub enum Status<I, T> {
|
||||
NotStarted,
|
||||
Downloading(I),
|
||||
Downloaded(PeerId, T, Duration),
|
||||
}
|
||||
|
||||
impl<E: EthSpec> ColumnRequest<E> {
|
||||
fn new() -> Self {
|
||||
impl<I: std::fmt::Display + PartialEq, T> ColumnRequest<I, T> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
status: Status::NotStarted(Instant::now()),
|
||||
download_failures: 0,
|
||||
status: Status::NotStarted,
|
||||
download_failures: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn is_awaiting_download(&self) -> Option<Duration> {
|
||||
pub fn is_awaiting_download(&self) -> bool {
|
||||
match self.status {
|
||||
Status::NotStarted(start_time) => Some(start_time.elapsed()),
|
||||
Status::Downloading { .. } | Status::Downloaded { .. } => None,
|
||||
Status::NotStarted => true,
|
||||
Status::Downloading { .. } | Status::Downloaded { .. } => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_downloaded(&self) -> bool {
|
||||
pub fn is_downloading(&self) -> bool {
|
||||
match self.status {
|
||||
Status::NotStarted { .. } | Status::Downloading { .. } => false,
|
||||
Status::NotStarted => false,
|
||||
Status::Downloading { .. } => true,
|
||||
Status::Downloaded { .. } => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_downloaded(&self) -> bool {
|
||||
match self.status {
|
||||
Status::NotStarted | Status::Downloading { .. } => false,
|
||||
Status::Downloaded { .. } => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn on_download_start(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> {
|
||||
pub fn too_many_failures(&self) -> Option<RpcResponseError> {
|
||||
if self.download_failures.len() > MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS {
|
||||
Some(
|
||||
self.download_failures
|
||||
.last()
|
||||
.cloned()
|
||||
.expect("download_failures is not empty"),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_download_start(&mut self, req_id: I) -> Result<(), Error> {
|
||||
match &self.status {
|
||||
Status::NotStarted { .. } => {
|
||||
Status::NotStarted => {
|
||||
self.status = Status::Downloading(req_id);
|
||||
Ok(())
|
||||
}
|
||||
other => Err(Error::BadState(format!(
|
||||
"bad state on_download_start expected NotStarted got {other:?}"
|
||||
other => Err(Error::InternalError(format!(
|
||||
"bad state on_download_start expected NotStarted got {}",
|
||||
Into::<&'static str>::into(other),
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn on_download_error(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> {
|
||||
pub fn on_download_error(&mut self, req_id: I) -> Result<(), Error> {
|
||||
match &self.status {
|
||||
Status::Downloading(expected_req_id) => {
|
||||
if req_id != *expected_req_id {
|
||||
return Err(Error::UnexpectedRequestId {
|
||||
expected_req_id: *expected_req_id,
|
||||
req_id,
|
||||
});
|
||||
return Err(Error::InternalError(format!(
|
||||
"Received download result for req_id {req_id} expecting {expected_req_id}"
|
||||
)));
|
||||
}
|
||||
self.status = Status::NotStarted(Instant::now());
|
||||
self.status = Status::NotStarted;
|
||||
Ok(())
|
||||
}
|
||||
other => Err(Error::BadState(format!(
|
||||
"bad state on_download_error expected Downloading got {other:?}"
|
||||
other => Err(Error::InternalError(format!(
|
||||
"bad state on_download_error expected Downloading got {}",
|
||||
Into::<&'static str>::into(other),
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn on_download_error_and_mark_failure(
|
||||
pub fn on_download_error_and_mark_failure(
|
||||
&mut self,
|
||||
req_id: DataColumnsByRootRequestId,
|
||||
req_id: I,
|
||||
e: RpcResponseError,
|
||||
) -> Result<(), Error> {
|
||||
// TODO(das): Should track which peers don't have data
|
||||
self.download_failures += 1;
|
||||
self.download_failures.push(e);
|
||||
self.on_download_error(req_id)
|
||||
}
|
||||
|
||||
fn on_download_success(
|
||||
pub fn on_download_success(
|
||||
&mut self,
|
||||
req_id: DataColumnsByRootRequestId,
|
||||
req_id: I,
|
||||
peer_id: PeerId,
|
||||
data_column: Arc<DataColumnSidecar<E>>,
|
||||
data_column: T,
|
||||
seen_timestamp: Duration,
|
||||
) -> Result<(), Error> {
|
||||
match &self.status {
|
||||
Status::Downloading(expected_req_id) => {
|
||||
if req_id != *expected_req_id {
|
||||
return Err(Error::UnexpectedRequestId {
|
||||
expected_req_id: *expected_req_id,
|
||||
req_id,
|
||||
});
|
||||
return Err(Error::InternalError(format!(
|
||||
"Received download result for req_id {req_id} expecting {expected_req_id}"
|
||||
)));
|
||||
}
|
||||
self.status = Status::Downloaded(peer_id, data_column, seen_timestamp);
|
||||
Ok(())
|
||||
}
|
||||
other => Err(Error::BadState(format!(
|
||||
"bad state on_download_success expected Downloading got {other:?}"
|
||||
other => Err(Error::InternalError(format!(
|
||||
"bad state on_download_success expected Downloading got {}",
|
||||
Into::<&'static str>::into(other),
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn complete(self) -> Result<(PeerId, Arc<DataColumnSidecar<E>>, Duration), Error> {
|
||||
pub fn complete(self) -> Result<(PeerId, T, Duration), Error> {
|
||||
match self.status {
|
||||
Status::Downloaded(peer_id, data_column, seen_timestamp) => {
|
||||
Ok((peer_id, data_column, seen_timestamp))
|
||||
}
|
||||
other => Err(Error::BadState(format!(
|
||||
"bad state complete expected Downloaded got {other:?}"
|
||||
other => Err(Error::InternalError(format!(
|
||||
"bad state complete expected Downloaded got {}",
|
||||
Into::<&'static str>::into(other),
|
||||
))),
|
||||
}
|
||||
}
|
||||
@@ -26,13 +26,19 @@ mod blocks_by_root;
|
||||
mod data_columns_by_range;
|
||||
mod data_columns_by_root;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, IntoStaticStr)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, IntoStaticStr)]
|
||||
pub enum LookupVerifyError {
|
||||
NotEnoughResponsesReturned { actual: usize },
|
||||
NotEnoughResponsesReturned {
|
||||
actual: usize,
|
||||
},
|
||||
TooManyResponses,
|
||||
UnrequestedBlockRoot(Hash256),
|
||||
UnrequestedIndex(u64),
|
||||
UnrequestedSlot(Slot),
|
||||
UnrequestedSlot {
|
||||
slot: Slot,
|
||||
start_slot: Slot,
|
||||
end_slot: Slot,
|
||||
},
|
||||
InvalidInclusionProof,
|
||||
DuplicatedData(Slot, u64),
|
||||
InternalError(String),
|
||||
@@ -171,12 +177,10 @@ impl<K: Eq + Hash, T: ActiveRequestItems> ActiveRequests<K, T> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn active_requests_of_peer(&self, peer_id: &PeerId) -> Vec<&K> {
|
||||
pub fn active_requests(&self) -> impl Iterator<Item = (&K, &PeerId)> {
|
||||
self.requests
|
||||
.iter()
|
||||
.filter(|(_, request)| &request.peer_id == peer_id)
|
||||
.map(|(id, _)| id)
|
||||
.collect()
|
||||
.map(|(id, request)| (id, &request.peer_id))
|
||||
}
|
||||
|
||||
pub fn iter_request_peers(&self) -> impl Iterator<Item = PeerId> + '_ {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::{ActiveRequestItems, LookupVerifyError};
|
||||
use lighthouse_network::rpc::methods::BlobsByRangeRequest;
|
||||
use std::sync::Arc;
|
||||
use types::{BlobSidecar, EthSpec};
|
||||
use types::{BlobSidecar, EthSpec, Slot};
|
||||
|
||||
/// Accumulates results of a blobs_by_range request. Only returns items after receiving the
|
||||
/// stream termination.
|
||||
@@ -25,10 +25,15 @@ impl<E: EthSpec> ActiveRequestItems for BlobsByRangeRequestItems<E> {
|
||||
type Item = Arc<BlobSidecar<E>>;
|
||||
|
||||
fn add(&mut self, blob: Self::Item) -> Result<bool, LookupVerifyError> {
|
||||
if blob.slot() < self.request.start_slot
|
||||
|| blob.slot() >= self.request.start_slot + self.request.count
|
||||
{
|
||||
return Err(LookupVerifyError::UnrequestedSlot(blob.slot()));
|
||||
let start_slot = Slot::new(self.request.start_slot);
|
||||
let end_slot = start_slot + Slot::new(self.request.count);
|
||||
|
||||
if blob.slot() < start_slot || blob.slot() >= end_slot {
|
||||
return Err(LookupVerifyError::UnrequestedSlot {
|
||||
slot: blob.slot(),
|
||||
start_slot,
|
||||
end_slot,
|
||||
});
|
||||
}
|
||||
if blob.index >= self.max_blobs_per_block {
|
||||
return Err(LookupVerifyError::UnrequestedIndex(blob.index));
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::{ActiveRequestItems, LookupVerifyError};
|
||||
use lighthouse_network::rpc::BlocksByRangeRequest;
|
||||
use std::sync::Arc;
|
||||
use types::{EthSpec, SignedBeaconBlock};
|
||||
use types::{EthSpec, SignedBeaconBlock, Slot};
|
||||
|
||||
/// Accumulates results of a blocks_by_range request. Only returns items after receiving the
|
||||
/// stream termination.
|
||||
@@ -23,10 +23,15 @@ impl<E: EthSpec> ActiveRequestItems for BlocksByRangeRequestItems<E> {
|
||||
type Item = Arc<SignedBeaconBlock<E>>;
|
||||
|
||||
fn add(&mut self, block: Self::Item) -> Result<bool, LookupVerifyError> {
|
||||
if block.slot().as_u64() < *self.request.start_slot()
|
||||
|| block.slot().as_u64() >= self.request.start_slot() + self.request.count()
|
||||
{
|
||||
return Err(LookupVerifyError::UnrequestedSlot(block.slot()));
|
||||
let start_slot = Slot::new(*self.request.start_slot());
|
||||
let end_slot = start_slot + Slot::new(*self.request.count());
|
||||
|
||||
if block.slot() < start_slot || block.slot() >= end_slot {
|
||||
return Err(LookupVerifyError::UnrequestedSlot {
|
||||
slot: block.slot(),
|
||||
start_slot,
|
||||
end_slot,
|
||||
});
|
||||
}
|
||||
if self
|
||||
.items
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use super::{ActiveRequestItems, LookupVerifyError};
|
||||
use lighthouse_network::rpc::methods::DataColumnsByRangeRequest;
|
||||
use std::sync::Arc;
|
||||
use types::{DataColumnSidecar, EthSpec};
|
||||
use types::{DataColumnSidecar, DataColumnSidecarList, EthSpec, Slot};
|
||||
|
||||
/// Accumulates results of a data_columns_by_range request. Only returns items after receiving the
|
||||
/// stream termination.
|
||||
pub struct DataColumnsByRangeRequestItems<E: EthSpec> {
|
||||
request: DataColumnsByRangeRequest,
|
||||
items: Vec<Arc<DataColumnSidecar<E>>>,
|
||||
items: DataColumnSidecarList<E>,
|
||||
}
|
||||
|
||||
impl<E: EthSpec> DataColumnsByRangeRequestItems<E> {
|
||||
@@ -23,10 +23,15 @@ impl<E: EthSpec> ActiveRequestItems for DataColumnsByRangeRequestItems<E> {
|
||||
type Item = Arc<DataColumnSidecar<E>>;
|
||||
|
||||
fn add(&mut self, data_column: Self::Item) -> Result<bool, LookupVerifyError> {
|
||||
if data_column.slot() < self.request.start_slot
|
||||
|| data_column.slot() >= self.request.start_slot + self.request.count
|
||||
{
|
||||
return Err(LookupVerifyError::UnrequestedSlot(data_column.slot()));
|
||||
let start_slot = Slot::new(self.request.start_slot);
|
||||
let end_slot = start_slot + Slot::new(self.request.count);
|
||||
|
||||
if data_column.slot() < start_slot || data_column.slot() >= end_slot {
|
||||
return Err(LookupVerifyError::UnrequestedSlot {
|
||||
slot: data_column.slot(),
|
||||
start_slot,
|
||||
end_slot,
|
||||
});
|
||||
}
|
||||
if !self.request.columns.contains(&data_column.index) {
|
||||
return Err(LookupVerifyError::UnrequestedIndex(data_column.index));
|
||||
|
||||
@@ -98,13 +98,13 @@ impl<T: BeaconChainTypes> Sampling<T> {
|
||||
// TODO(das): Should track failed sampling request for some time? Otherwise there's
|
||||
// a risk of a loop with multiple triggers creating the request, then failing,
|
||||
// and repeat.
|
||||
debug!(?id, "Ignoring duplicate sampling request");
|
||||
debug!(%id, "Ignoring duplicate sampling request");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
debug!(
|
||||
?id,
|
||||
%id,
|
||||
column_selection = ?request.column_selection(),
|
||||
"Created new sample request"
|
||||
);
|
||||
@@ -138,7 +138,7 @@ impl<T: BeaconChainTypes> Sampling<T> {
|
||||
) -> Option<(SamplingRequester, SamplingResult)> {
|
||||
let Some(request) = self.requests.get_mut(&id.id) else {
|
||||
// TOOD(das): This log can happen if the request is error'ed early and dropped
|
||||
debug!(?id, "Sample downloaded event for unknown request");
|
||||
debug!(%id, "Sample downloaded event for unknown request");
|
||||
return None;
|
||||
};
|
||||
|
||||
@@ -167,7 +167,7 @@ impl<T: BeaconChainTypes> Sampling<T> {
|
||||
) -> Option<(SamplingRequester, SamplingResult)> {
|
||||
let Some(request) = self.requests.get_mut(&id.id) else {
|
||||
// TOOD(das): This log can happen if the request is error'ed early and dropped
|
||||
debug!(?id, "Sample verified event for unknown request");
|
||||
debug!(%id, "Sample verified event for unknown request");
|
||||
return None;
|
||||
};
|
||||
|
||||
@@ -191,7 +191,7 @@ impl<T: BeaconChainTypes> Sampling<T> {
|
||||
) -> Option<(SamplingRequester, SamplingResult)> {
|
||||
let result = result.transpose();
|
||||
if let Some(result) = result {
|
||||
debug!(?id, ?result, "Sampling request completed, removing");
|
||||
debug!(%id, ?result, "Sampling request completed, removing");
|
||||
metrics::inc_counter_vec(
|
||||
&metrics::SAMPLING_REQUEST_RESULT,
|
||||
&[metrics::from_result(&result)],
|
||||
@@ -570,7 +570,7 @@ impl<T: BeaconChainTypes> ActiveSamplingRequest<T> {
|
||||
// Send requests.
|
||||
let mut sent_request = false;
|
||||
for (peer_id, column_indexes) in column_indexes_to_request {
|
||||
cx.data_column_lookup_request(
|
||||
cx.data_columns_by_root_request(
|
||||
DataColumnsByRootRequester::Sampling(SamplingId {
|
||||
id: self.requester_id,
|
||||
sampling_request_id: self.current_sampling_request_id,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::sync::network_context::PeerGroup;
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use lighthouse_network::rpc::methods::BlocksByRangeRequest;
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
@@ -8,7 +9,7 @@ use std::hash::{Hash, Hasher};
|
||||
use std::ops::Sub;
|
||||
use std::time::{Duration, Instant};
|
||||
use strum::Display;
|
||||
use types::{Epoch, EthSpec, Slot};
|
||||
use types::{ColumnIndex, Epoch, EthSpec, Slot};
|
||||
|
||||
/// The number of times to retry a batch before it is considered failed.
|
||||
const MAX_BATCH_DOWNLOAD_ATTEMPTS: u8 = 5;
|
||||
@@ -17,13 +18,33 @@ const MAX_BATCH_DOWNLOAD_ATTEMPTS: u8 = 5;
|
||||
/// after `MAX_BATCH_PROCESSING_ATTEMPTS` times, it is considered faulty.
|
||||
const MAX_BATCH_PROCESSING_ATTEMPTS: u8 = 3;
|
||||
|
||||
/// Type of expected batch.
|
||||
#[derive(Debug, Copy, Clone, Display)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub enum ByRangeRequestType {
|
||||
BlocksAndColumns,
|
||||
BlocksAndBlobs,
|
||||
Blocks,
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct BatchPeers {
|
||||
block_peer: PeerId,
|
||||
column_peers: PeerGroup,
|
||||
}
|
||||
|
||||
impl BatchPeers {
|
||||
pub fn new_from_block_peer(block_peer: PeerId) -> Self {
|
||||
Self {
|
||||
block_peer,
|
||||
column_peers: PeerGroup::empty(),
|
||||
}
|
||||
}
|
||||
pub fn new(block_peer: PeerId, column_peers: PeerGroup) -> Self {
|
||||
Self {
|
||||
block_peer,
|
||||
column_peers,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn block(&self) -> PeerId {
|
||||
self.block_peer
|
||||
}
|
||||
|
||||
pub fn column(&self, index: &ColumnIndex) -> Option<&PeerId> {
|
||||
self.column_peers.of_index(&((*index) as usize))
|
||||
}
|
||||
}
|
||||
|
||||
/// Allows customisation of the above constants used in other sync methods such as BackFillSync.
|
||||
@@ -91,7 +112,7 @@ pub enum BatchOperationOutcome {
|
||||
|
||||
pub enum BatchProcessingResult {
|
||||
Success,
|
||||
FaultyFailure,
|
||||
FaultyFailure(Vec<PeerId>),
|
||||
NonFaultyFailure,
|
||||
}
|
||||
|
||||
@@ -107,11 +128,11 @@ pub struct BatchInfo<E: EthSpec, B: BatchConfig = RangeSyncBatchConfig> {
|
||||
/// Number of processing attempts that have failed but we do not count.
|
||||
non_faulty_processing_attempts: u8,
|
||||
/// The number of download retries this batch has undergone due to a failed request.
|
||||
failed_download_attempts: Vec<Option<PeerId>>,
|
||||
failed_download_attempts: usize,
|
||||
/// Peers that returned bad data, and we want to de-prioritize
|
||||
failed_peers: HashSet<PeerId>,
|
||||
/// State of the batch.
|
||||
state: BatchState<E>,
|
||||
/// Whether this batch contains all blocks or all blocks and blobs.
|
||||
batch_type: ByRangeRequestType,
|
||||
/// Pin the generic
|
||||
marker: std::marker::PhantomData<B>,
|
||||
}
|
||||
@@ -134,7 +155,7 @@ pub enum BatchState<E: EthSpec> {
|
||||
/// The batch is being downloaded.
|
||||
Downloading(Id),
|
||||
/// The batch has been completely downloaded and is ready for processing.
|
||||
AwaitingProcessing(PeerId, Vec<RpcBlock<E>>, Instant),
|
||||
AwaitingProcessing(BatchPeers, Vec<RpcBlock<E>>, Instant),
|
||||
/// The batch is being processed.
|
||||
Processing(Attempt),
|
||||
/// The batch was successfully processed and is waiting to be validated.
|
||||
@@ -171,37 +192,25 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
/// fork boundary will be of mixed type (all blocks and one last blockblob), and I don't want to
|
||||
/// deal with this for now.
|
||||
/// This means finalization might be slower in deneb
|
||||
pub fn new(start_epoch: &Epoch, num_of_epochs: u64, batch_type: ByRangeRequestType) -> Self {
|
||||
pub fn new(start_epoch: &Epoch, num_of_epochs: u64) -> Self {
|
||||
let start_slot = start_epoch.start_slot(E::slots_per_epoch());
|
||||
let end_slot = start_slot + num_of_epochs * E::slots_per_epoch();
|
||||
BatchInfo {
|
||||
start_slot,
|
||||
end_slot,
|
||||
failed_processing_attempts: Vec::new(),
|
||||
failed_download_attempts: Vec::new(),
|
||||
failed_download_attempts: 0,
|
||||
failed_peers: <_>::default(),
|
||||
non_faulty_processing_attempts: 0,
|
||||
state: BatchState::AwaitingDownload,
|
||||
batch_type,
|
||||
marker: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gives a list of peers from which this batch has had a failed download or processing
|
||||
/// attempt.
|
||||
pub fn failed_peers(&self) -> HashSet<PeerId> {
|
||||
let mut peers = HashSet::with_capacity(
|
||||
self.failed_processing_attempts.len() + self.failed_download_attempts.len(),
|
||||
);
|
||||
|
||||
for attempt in &self.failed_processing_attempts {
|
||||
peers.insert(attempt.peer_id);
|
||||
}
|
||||
|
||||
for peer in self.failed_download_attempts.iter().flatten() {
|
||||
peers.insert(*peer);
|
||||
}
|
||||
|
||||
peers
|
||||
pub fn failed_peers(&self) -> &HashSet<PeerId> {
|
||||
&self.failed_peers
|
||||
}
|
||||
|
||||
/// Verifies if an incoming block belongs to this batch.
|
||||
@@ -212,13 +221,13 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns the peer that is currently responsible for progressing the state of the batch.
|
||||
pub fn processing_peer(&self) -> Option<&PeerId> {
|
||||
/// Returns the peers that provided this batch's downloaded contents
|
||||
pub fn processing_peers(&self) -> Option<&BatchPeers> {
|
||||
match &self.state {
|
||||
BatchState::AwaitingDownload | BatchState::Failed | BatchState::Downloading(..) => None,
|
||||
BatchState::AwaitingProcessing(peer_id, _, _)
|
||||
| BatchState::Processing(Attempt { peer_id, .. })
|
||||
| BatchState::AwaitingValidation(Attempt { peer_id, .. }) => Some(peer_id),
|
||||
BatchState::AwaitingProcessing(peers, _, _)
|
||||
| BatchState::Processing(Attempt { peers, .. })
|
||||
| BatchState::AwaitingValidation(Attempt { peers, .. }) => Some(peers),
|
||||
BatchState::Poisoned => unreachable!("Poisoned batch"),
|
||||
}
|
||||
}
|
||||
@@ -237,13 +246,10 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
}
|
||||
|
||||
/// Returns a BlocksByRange request associated with the batch.
|
||||
pub fn to_blocks_by_range_request(&self) -> (BlocksByRangeRequest, ByRangeRequestType) {
|
||||
(
|
||||
BlocksByRangeRequest::new(
|
||||
self.start_slot.into(),
|
||||
self.end_slot.sub(self.start_slot).into(),
|
||||
),
|
||||
self.batch_type,
|
||||
pub fn to_blocks_by_range_request(&self) -> BlocksByRangeRequest {
|
||||
BlocksByRangeRequest::new(
|
||||
self.start_slot.into(),
|
||||
self.end_slot.sub(self.start_slot).into(),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -254,8 +260,7 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
match self.state {
|
||||
BatchState::Poisoned => unreachable!("Poisoned batch"),
|
||||
BatchState::Failed => BatchOperationOutcome::Failed {
|
||||
blacklist: self.failed_processing_attempts.len()
|
||||
> self.failed_download_attempts.len(),
|
||||
blacklist: self.failed_processing_attempts.len() > self.failed_download_attempts,
|
||||
},
|
||||
_ => BatchOperationOutcome::Continue,
|
||||
}
|
||||
@@ -275,12 +280,12 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
pub fn download_completed(
|
||||
&mut self,
|
||||
blocks: Vec<RpcBlock<E>>,
|
||||
peer: PeerId,
|
||||
batch_peers: BatchPeers,
|
||||
) -> Result<usize /* Received blocks */, WrongState> {
|
||||
match self.state.poison() {
|
||||
BatchState::Downloading(_) => {
|
||||
BatchState::Downloading(_request_id) => {
|
||||
let received = blocks.len();
|
||||
self.state = BatchState::AwaitingProcessing(peer, blocks, Instant::now());
|
||||
self.state = BatchState::AwaitingProcessing(batch_peers, blocks, Instant::now());
|
||||
Ok(received)
|
||||
}
|
||||
BatchState::Poisoned => unreachable!("Poisoned batch"),
|
||||
@@ -300,23 +305,18 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
/// The `peer` parameter, when set to None, does not increment the failed attempts of
|
||||
/// this batch and register the peer, rather attempts a re-download.
|
||||
#[must_use = "Batch may have failed"]
|
||||
pub fn download_failed(
|
||||
&mut self,
|
||||
peer: Option<PeerId>,
|
||||
) -> Result<BatchOperationOutcome, WrongState> {
|
||||
pub fn download_failed(&mut self) -> Result<BatchOperationOutcome, WrongState> {
|
||||
match self.state.poison() {
|
||||
BatchState::Downloading(_) => {
|
||||
// register the attempt and check if the batch can be tried again
|
||||
self.failed_download_attempts.push(peer);
|
||||
BatchState::Downloading(_request_id) => {
|
||||
self.failed_download_attempts += 1;
|
||||
|
||||
self.state = if self.failed_download_attempts.len()
|
||||
>= B::max_batch_download_attempts() as usize
|
||||
{
|
||||
BatchState::Failed
|
||||
} else {
|
||||
// drop the blocks
|
||||
BatchState::AwaitingDownload
|
||||
};
|
||||
self.state =
|
||||
if self.failed_download_attempts >= B::max_batch_download_attempts() as usize {
|
||||
BatchState::Failed
|
||||
} else {
|
||||
// drop the blocks
|
||||
BatchState::AwaitingDownload
|
||||
};
|
||||
Ok(self.outcome())
|
||||
}
|
||||
BatchState::Poisoned => unreachable!("Poisoned batch"),
|
||||
@@ -349,8 +349,8 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
|
||||
pub fn start_processing(&mut self) -> Result<(Vec<RpcBlock<E>>, Duration), WrongState> {
|
||||
match self.state.poison() {
|
||||
BatchState::AwaitingProcessing(peer, blocks, start_instant) => {
|
||||
self.state = BatchState::Processing(Attempt::new::<B, E>(peer, &blocks));
|
||||
BatchState::AwaitingProcessing(peers, blocks, start_instant) => {
|
||||
self.state = BatchState::Processing(Attempt::new::<B, E>(peers, &blocks));
|
||||
Ok((blocks, start_instant.elapsed()))
|
||||
}
|
||||
BatchState::Poisoned => unreachable!("Poisoned batch"),
|
||||
@@ -373,9 +373,12 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
BatchState::Processing(attempt) => {
|
||||
self.state = match procesing_result {
|
||||
BatchProcessingResult::Success => BatchState::AwaitingValidation(attempt),
|
||||
BatchProcessingResult::FaultyFailure => {
|
||||
BatchProcessingResult::FaultyFailure(failed_peers) => {
|
||||
// register the failed attempt
|
||||
self.failed_processing_attempts.push(attempt);
|
||||
for peer in failed_peers {
|
||||
self.failed_peers.insert(peer);
|
||||
}
|
||||
|
||||
// check if the batch can be downloaded again
|
||||
if self.failed_processing_attempts.len()
|
||||
@@ -438,39 +441,41 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a peer's attempt and providing the result for this batch.
|
||||
/// Represents a batch attempt awaiting validation
|
||||
///
|
||||
/// Invalid attempts will downscore a peer.
|
||||
#[derive(PartialEq, Debug)]
|
||||
/// Invalid attempts will downscore its peers
|
||||
#[derive(Debug)]
|
||||
pub struct Attempt {
|
||||
/// The peer that made the attempt.
|
||||
pub peer_id: PeerId,
|
||||
/// The peers that served this batch contents
|
||||
peers: BatchPeers,
|
||||
/// The hash of the blocks of the attempt.
|
||||
pub hash: u64,
|
||||
}
|
||||
|
||||
impl Attempt {
|
||||
fn new<B: BatchConfig, E: EthSpec>(peer_id: PeerId, blocks: &[RpcBlock<E>]) -> Self {
|
||||
fn new<B: BatchConfig, E: EthSpec>(peers: BatchPeers, blocks: &[RpcBlock<E>]) -> Self {
|
||||
let hash = B::batch_attempt_hash(blocks);
|
||||
Attempt { peer_id, hash }
|
||||
Attempt { peers, hash }
|
||||
}
|
||||
|
||||
pub fn block_peer(&self) -> PeerId {
|
||||
self.peers.block()
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: EthSpec> std::fmt::Debug for BatchState<E> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
BatchState::Processing(Attempt {
|
||||
ref peer_id,
|
||||
hash: _,
|
||||
}) => write!(f, "Processing({})", peer_id),
|
||||
BatchState::AwaitingValidation(Attempt {
|
||||
ref peer_id,
|
||||
hash: _,
|
||||
}) => write!(f, "AwaitingValidation({})", peer_id),
|
||||
BatchState::Processing(Attempt { ref peers, hash: _ }) => {
|
||||
write!(f, "Processing({})", peers.block())
|
||||
}
|
||||
BatchState::AwaitingValidation(Attempt { ref peers, hash: _ }) => {
|
||||
write!(f, "AwaitingValidation({})", peers.block())
|
||||
}
|
||||
BatchState::AwaitingDownload => f.write_str("AwaitingDownload"),
|
||||
BatchState::Failed => f.write_str("Failed"),
|
||||
BatchState::AwaitingProcessing(ref peer, ref blocks, _) => {
|
||||
write!(f, "AwaitingProcessing({}, {} blocks)", peer, blocks.len())
|
||||
BatchState::AwaitingProcessing(_, ref blocks, _) => {
|
||||
write!(f, "AwaitingProcessing({} blocks)", blocks.len())
|
||||
}
|
||||
BatchState::Downloading(request_id) => {
|
||||
write!(f, "Downloading({})", request_id)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use super::batch::{BatchInfo, BatchProcessingResult, BatchState};
|
||||
use super::batch::{BatchInfo, BatchPeers, BatchProcessingResult, BatchState};
|
||||
use super::RangeSyncType;
|
||||
use crate::metrics;
|
||||
use crate::network_beacon_processor::ChainSegmentProcessId;
|
||||
@@ -6,10 +6,13 @@ use crate::sync::network_context::{RangeRequestId, RpcRequestSendError, RpcRespo
|
||||
use crate::sync::{network_context::SyncNetworkContext, BatchOperationOutcome, BatchProcessResult};
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::BeaconChainTypes;
|
||||
use itertools::Itertools;
|
||||
use lighthouse_network::service::api_types::Id;
|
||||
use lighthouse_network::{PeerAction, PeerId};
|
||||
use logging::crit;
|
||||
use parking_lot::RwLock;
|
||||
use std::collections::{btree_map::Entry, BTreeMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use strum::IntoStaticStr;
|
||||
use tracing::{debug, instrument, warn};
|
||||
use types::{Epoch, EthSpec, Hash256, Slot};
|
||||
@@ -23,7 +26,7 @@ use types::{Epoch, EthSpec, Hash256, Slot};
|
||||
pub const EPOCHS_PER_BATCH: u64 = 1;
|
||||
|
||||
/// The maximum number of batches to queue before requesting more.
|
||||
const BATCH_BUFFER_SIZE: u8 = 5;
|
||||
pub const BATCH_BUFFER_SIZE: usize = 5;
|
||||
|
||||
/// A return type for functions that act on a `Chain` which informs the caller whether the chain
|
||||
/// has been completed and should be removed or to be kept if further processing is
|
||||
@@ -86,9 +89,8 @@ pub struct SyncingChain<T: BeaconChainTypes> {
|
||||
batches: BTreeMap<BatchId, BatchInfo<T::EthSpec>>,
|
||||
|
||||
/// The peers that agree on the `target_head_slot` and `target_head_root` as a canonical chain
|
||||
/// and thus available to download this chain from, as well as the batches we are currently
|
||||
/// requesting.
|
||||
peers: HashSet<PeerId>,
|
||||
/// and thus available to download this chain from.
|
||||
peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||
|
||||
/// Starting epoch of the next batch that needs to be downloaded.
|
||||
to_be_downloaded: BatchId,
|
||||
@@ -110,6 +112,9 @@ pub struct SyncingChain<T: BeaconChainTypes> {
|
||||
|
||||
/// The current processing batch, if any.
|
||||
current_processing_batch: Option<BatchId>,
|
||||
|
||||
/// The maximum number of batches to queue before requesting more.
|
||||
batch_buffer_size: usize,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
@@ -121,6 +126,15 @@ pub enum ChainSyncingState {
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// Leaks the state of all active batches for assertions in tests.
|
||||
#[cfg(test)]
|
||||
pub fn batches_state(&self) -> Vec<(BatchId, &BatchState<T::EthSpec>)> {
|
||||
self.batches
|
||||
.iter()
|
||||
.map(|(id, batch)| (*id, batch.state()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
id: Id,
|
||||
@@ -129,6 +143,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
target_head_root: Hash256,
|
||||
peer_id: PeerId,
|
||||
chain_type: SyncingChainType,
|
||||
batch_buffer_size: usize,
|
||||
) -> Self {
|
||||
SyncingChain {
|
||||
id,
|
||||
@@ -137,13 +152,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
target_head_slot,
|
||||
target_head_root,
|
||||
batches: BTreeMap::new(),
|
||||
peers: HashSet::from_iter([peer_id]),
|
||||
peers: Arc::new(RwLock::new(HashSet::from_iter([peer_id]))),
|
||||
to_be_downloaded: start_epoch,
|
||||
processing_target: start_epoch,
|
||||
optimistic_start: None,
|
||||
attempted_optimistic_starts: HashSet::default(),
|
||||
state: ChainSyncingState::Stopped,
|
||||
current_processing_batch: None,
|
||||
batch_buffer_size,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -155,7 +171,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// Check if the chain has peers from which to process batches.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn available_peers(&self) -> usize {
|
||||
self.peers.len()
|
||||
self.peers.read().len()
|
||||
}
|
||||
|
||||
/// Get the chain's id.
|
||||
@@ -167,7 +183,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// Peers currently syncing this chain.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn peers(&self) -> impl Iterator<Item = PeerId> + '_ {
|
||||
self.peers.iter().cloned()
|
||||
self.peers
|
||||
.read()
|
||||
.iter()
|
||||
.copied()
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
}
|
||||
|
||||
/// Progress in epochs made by the chain
|
||||
@@ -191,9 +212,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// If the peer has active batches, those are considered failed and re-requested.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
pub fn remove_peer(&mut self, peer_id: &PeerId) -> ProcessingResult {
|
||||
self.peers.remove(peer_id);
|
||||
self.peers.write().remove(peer_id);
|
||||
|
||||
if self.peers.is_empty() {
|
||||
if self.peers.read().is_empty() {
|
||||
Err(RemoveChain::EmptyPeerPool)
|
||||
} else {
|
||||
Ok(KeepChain)
|
||||
@@ -216,7 +237,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
batch_id: BatchId,
|
||||
peer_id: &PeerId,
|
||||
batch_peers: BatchPeers,
|
||||
request_id: Id,
|
||||
blocks: Vec<RpcBlock<T::EthSpec>>,
|
||||
) -> ProcessingResult {
|
||||
@@ -244,8 +265,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// A stream termination has been sent. This batch has ended. Process a completed batch.
|
||||
// Remove the request from the peer's active batches
|
||||
|
||||
// TODO(das): should use peer group here https://github.com/sigp/lighthouse/issues/6258
|
||||
let received = batch.download_completed(blocks, *peer_id)?;
|
||||
let received = batch.download_completed(blocks, batch_peers)?;
|
||||
let awaiting_batches = batch_id
|
||||
.saturating_sub(self.optimistic_start.unwrap_or(self.processing_target))
|
||||
/ EPOCHS_PER_BATCH;
|
||||
@@ -400,11 +420,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.request_batches(network)?;
|
||||
}
|
||||
}
|
||||
} else if !self.good_peers_on_sampling_subnets(self.processing_target, network) {
|
||||
// This is to handle the case where no batch was sent for the current processing
|
||||
// target when there is no sampling peers available. This is a valid state and should not
|
||||
// return an error.
|
||||
return Ok(KeepChain);
|
||||
} else {
|
||||
return Err(RemoveChain::WrongChainState(format!(
|
||||
"Batch not found for current processing target {}",
|
||||
@@ -447,7 +462,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
};
|
||||
|
||||
let peer = batch.processing_peer().cloned().ok_or_else(|| {
|
||||
let batch_peers = batch.processing_peers().ok_or_else(|| {
|
||||
RemoveChain::WrongBatchState(format!(
|
||||
"Processing target is in wrong state: {:?}",
|
||||
batch.state(),
|
||||
@@ -458,7 +473,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
debug!(
|
||||
result = ?result,
|
||||
batch_epoch = %batch_id,
|
||||
client = %network.client_type(&peer),
|
||||
batch_state = ?batch_state,
|
||||
?batch,
|
||||
"Batch processing result"
|
||||
@@ -521,13 +535,39 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
BatchProcessResult::FaultyFailure {
|
||||
imported_blocks,
|
||||
penalty,
|
||||
peer_action,
|
||||
// TODO(sync): propagate error in logs
|
||||
error: _,
|
||||
} => {
|
||||
// Penalize the peer appropiately.
|
||||
network.report_peer(peer, *penalty, "faulty_batch");
|
||||
let mut failed_peers = vec![];
|
||||
|
||||
// TODO(sync): De-dup between back and forwards sync
|
||||
if let Some(penalty) = peer_action.block_peer {
|
||||
// Penalize the peer appropiately.
|
||||
network.report_peer(batch_peers.block(), penalty, "faulty_batch");
|
||||
failed_peers.push(batch_peers.block());
|
||||
}
|
||||
|
||||
// Penalize each peer only once. Currently a peer_action does not mix different
|
||||
// PeerAction levels.
|
||||
for (peer, penalty) in peer_action
|
||||
.column_peer
|
||||
.iter()
|
||||
.filter_map(|(column_index, penalty)| {
|
||||
batch_peers
|
||||
.column(column_index)
|
||||
.map(|peer| (*peer, *penalty))
|
||||
})
|
||||
.unique()
|
||||
{
|
||||
network.report_peer(peer, penalty, "faulty_batch_column");
|
||||
failed_peers.push(peer);
|
||||
}
|
||||
|
||||
// Check if this batch is allowed to continue
|
||||
match batch.processing_completed(BatchProcessingResult::FaultyFailure)? {
|
||||
match batch
|
||||
.processing_completed(BatchProcessingResult::FaultyFailure(failed_peers))?
|
||||
{
|
||||
BatchOperationOutcome::Continue => {
|
||||
// Chain can continue. Check if it can be moved forward.
|
||||
if *imported_blocks > 0 {
|
||||
@@ -540,6 +580,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.handle_invalid_batch(network, batch_id)
|
||||
}
|
||||
BatchOperationOutcome::Failed { blacklist } => {
|
||||
// TODO(das): what peer action should we apply to the rest of
|
||||
// peers? Say a batch repeatedly fails because a custody peer is not
|
||||
// sending us its custody columns
|
||||
let penalty = PeerAction::LowToleranceError;
|
||||
|
||||
// Check that we have not exceeded the re-process retry counter,
|
||||
// If a batch has exceeded the invalid batch lookup attempts limit, it means
|
||||
// that it is likely all peers in this chain are are sending invalid batches
|
||||
@@ -553,8 +598,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
"Batch failed to download. Dropping chain scoring peers"
|
||||
);
|
||||
|
||||
for peer in self.peers.drain() {
|
||||
network.report_peer(peer, *penalty, "faulty_chain");
|
||||
for peer in self.peers.write().drain() {
|
||||
network.report_peer(peer, penalty, "faulty_chain");
|
||||
}
|
||||
Err(RemoveChain::ChainFailed {
|
||||
blacklist,
|
||||
@@ -633,17 +678,20 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// The validated batch has been re-processed
|
||||
if attempt.hash != processed_attempt.hash {
|
||||
// The re-downloaded version was different
|
||||
if processed_attempt.peer_id != attempt.peer_id {
|
||||
// TODO(das): should penalize other peers?
|
||||
let valid_attempt_peer = processed_attempt.block_peer();
|
||||
let bad_attempt_peer = attempt.block_peer();
|
||||
if valid_attempt_peer != bad_attempt_peer {
|
||||
// A different peer sent the correct batch, the previous peer did not
|
||||
// We negatively score the original peer.
|
||||
let action = PeerAction::LowToleranceError;
|
||||
debug!(
|
||||
batch_epoch = %id, score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id, new_peer = %processed_attempt.peer_id,
|
||||
original_peer = %bad_attempt_peer, new_peer = %valid_attempt_peer,
|
||||
"Re-processed batch validated. Scoring original peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
bad_attempt_peer,
|
||||
action,
|
||||
"batch_reprocessed_original_peer",
|
||||
);
|
||||
@@ -654,12 +702,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
debug!(
|
||||
batch_epoch = %id,
|
||||
score_adjustment = %action,
|
||||
original_peer = %attempt.peer_id,
|
||||
new_peer = %processed_attempt.peer_id,
|
||||
original_peer = %bad_attempt_peer,
|
||||
new_peer = %valid_attempt_peer,
|
||||
"Re-processed batch validated by the same peer"
|
||||
);
|
||||
network.report_peer(
|
||||
attempt.peer_id,
|
||||
bad_attempt_peer,
|
||||
action,
|
||||
"batch_reprocessed_same_peer",
|
||||
);
|
||||
@@ -815,7 +863,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
peer_id: PeerId,
|
||||
) -> ProcessingResult {
|
||||
self.peers.insert(peer_id);
|
||||
self.peers.write().insert(peer_id);
|
||||
self.request_batches(network)
|
||||
}
|
||||
|
||||
@@ -827,7 +875,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
batch_id: BatchId,
|
||||
peer_id: &PeerId,
|
||||
request_id: Id,
|
||||
err: RpcResponseError,
|
||||
) -> ProcessingResult {
|
||||
@@ -842,7 +889,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
debug!(
|
||||
batch_epoch = %batch_id,
|
||||
batch_state = ?batch.state(),
|
||||
%peer_id,
|
||||
%request_id,
|
||||
?batch_state,
|
||||
"Batch not expecting block"
|
||||
@@ -853,13 +899,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
batch_epoch = %batch_id,
|
||||
batch_state = ?batch.state(),
|
||||
error = ?err,
|
||||
%peer_id,
|
||||
%request_id,
|
||||
"Batch download error"
|
||||
);
|
||||
if let BatchOperationOutcome::Failed { blacklist } =
|
||||
batch.download_failed(Some(*peer_id))?
|
||||
{
|
||||
if let BatchOperationOutcome::Failed { blacklist } = batch.download_failed()? {
|
||||
return Err(RemoveChain::ChainFailed {
|
||||
blacklist,
|
||||
failing_batch: batch_id,
|
||||
@@ -869,7 +912,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
} else {
|
||||
debug!(
|
||||
batch_epoch = %batch_id,
|
||||
%peer_id,
|
||||
%request_id,
|
||||
batch_state,
|
||||
"Batch not found"
|
||||
@@ -888,29 +930,17 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
) -> ProcessingResult {
|
||||
let batch_state = self.visualize_batch_state();
|
||||
if let Some(batch) = self.batches.get_mut(&batch_id) {
|
||||
let (request, batch_type) = batch.to_blocks_by_range_request();
|
||||
let request = batch.to_blocks_by_range_request();
|
||||
let failed_peers = batch.failed_peers();
|
||||
|
||||
// TODO(das): we should request only from peers that are part of this SyncingChain.
|
||||
// However, then we hit the NoPeer error frequently which causes the batch to fail and
|
||||
// the SyncingChain to be dropped. We need to handle this case more gracefully.
|
||||
let synced_peers = network
|
||||
.network_globals()
|
||||
.peers
|
||||
.read()
|
||||
.synced_peers()
|
||||
.cloned()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
match network.block_components_by_range_request(
|
||||
batch_type,
|
||||
request,
|
||||
RangeRequestId::RangeSync {
|
||||
chain_id: self.id,
|
||||
batch_id,
|
||||
},
|
||||
&synced_peers,
|
||||
&failed_peers,
|
||||
self.peers.clone(),
|
||||
failed_peers,
|
||||
) {
|
||||
Ok(request_id) => {
|
||||
// inform the batch about the new request
|
||||
@@ -927,19 +957,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
Err(e) => match e {
|
||||
// TODO(das): Handle the NoPeer case explicitly and don't drop the batch. For
|
||||
// sync to work properly it must be okay to have "stalled" batches in
|
||||
// AwaitingDownload state. Currently it will error with invalid state if
|
||||
// that happens. Sync manager must periodicatlly prune stalled batches like
|
||||
// we do for lookup sync. Then we can deprecate the redundant
|
||||
// `good_peers_on_sampling_subnets` checks.
|
||||
e
|
||||
@ (RpcRequestSendError::NoPeer(_) | RpcRequestSendError::InternalError(_)) => {
|
||||
e @ (RpcRequestSendError::NoPeers | RpcRequestSendError::InternalError(_)) => {
|
||||
// NOTE: under normal conditions this shouldn't happen but we handle it anyway
|
||||
warn!(%batch_id, error = ?e, "batch_id" = %batch_id, %batch, "Could not send batch request");
|
||||
// register the failed download and check if the batch can be retried
|
||||
batch.start_downloading(1)?; // fake request_id = 1 is not relevant
|
||||
match batch.download_failed(None)? {
|
||||
match batch.download_failed()? {
|
||||
BatchOperationOutcome::Failed { blacklist } => {
|
||||
return Err(RemoveChain::ChainFailed {
|
||||
blacklist,
|
||||
@@ -993,14 +1016,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// check if we have the batch for our optimistic start. If not, request it first.
|
||||
// We wait for this batch before requesting any other batches.
|
||||
if let Some(epoch) = self.optimistic_start {
|
||||
if !self.good_peers_on_sampling_subnets(epoch, network) {
|
||||
debug!("Waiting for peers to be available on sampling column subnets");
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
|
||||
if let Entry::Vacant(entry) = self.batches.entry(epoch) {
|
||||
let batch_type = network.batch_type(epoch);
|
||||
let optimistic_batch = BatchInfo::new(&epoch, EPOCHS_PER_BATCH, batch_type);
|
||||
let optimistic_batch = BatchInfo::new(&epoch, EPOCHS_PER_BATCH);
|
||||
entry.insert(optimistic_batch);
|
||||
self.send_batch(network, epoch)?;
|
||||
}
|
||||
@@ -1021,35 +1038,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
Ok(KeepChain)
|
||||
}
|
||||
|
||||
/// Checks all sampling column subnets for peers. Returns `true` if there is at least one peer in
|
||||
/// every sampling column subnet.
|
||||
fn good_peers_on_sampling_subnets(
|
||||
&self,
|
||||
epoch: Epoch,
|
||||
network: &SyncNetworkContext<T>,
|
||||
) -> bool {
|
||||
if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) {
|
||||
// Require peers on all sampling column subnets before sending batches
|
||||
let peers_on_all_custody_subnets = network
|
||||
.network_globals()
|
||||
.sampling_subnets()
|
||||
.iter()
|
||||
.all(|subnet_id| {
|
||||
let peer_count = network
|
||||
.network_globals()
|
||||
.peers
|
||||
.read()
|
||||
.good_custody_subnet_peer(*subnet_id)
|
||||
.count();
|
||||
|
||||
peer_count > 0
|
||||
});
|
||||
peers_on_all_custody_subnets
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates the next required batch from the chain. If there are no more batches required,
|
||||
/// `false` is returned.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
@@ -1077,20 +1065,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
.iter()
|
||||
.filter(|&(_epoch, batch)| in_buffer(batch))
|
||||
.count()
|
||||
> BATCH_BUFFER_SIZE as usize
|
||||
>= self.batch_buffer_size
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
// don't send batch requests until we have peers on sampling subnets
|
||||
// TODO(das): this is a workaround to avoid sending out excessive block requests because
|
||||
// block and data column requests are currently coupled. This can be removed once we find a
|
||||
// way to decouple the requests and do retries individually, see issue #6258.
|
||||
if !self.good_peers_on_sampling_subnets(self.to_be_downloaded, network) {
|
||||
debug!("Waiting for peers to be available on custody column subnets");
|
||||
return None;
|
||||
}
|
||||
|
||||
// If no batch needs a retry, attempt to send the batch of the next epoch to download
|
||||
let next_batch_id = self.to_be_downloaded;
|
||||
// this batch could have been included already being an optimistic batch
|
||||
@@ -1101,8 +1080,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
self.include_next_batch(network)
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
let batch_type = network.batch_type(next_batch_id);
|
||||
entry.insert(BatchInfo::new(&next_batch_id, EPOCHS_PER_BATCH, batch_type));
|
||||
entry.insert(BatchInfo::new(&next_batch_id, EPOCHS_PER_BATCH));
|
||||
self.to_be_downloaded += EPOCHS_PER_BATCH;
|
||||
Some(next_batch_id)
|
||||
}
|
||||
@@ -1117,28 +1095,28 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
/// batch states. See [BatchState::visualize] for symbol definitions.
|
||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||
fn visualize_batch_state(&self) -> String {
|
||||
let mut visualization_string = String::with_capacity((BATCH_BUFFER_SIZE * 3) as usize);
|
||||
let mut visualization_string = String::with_capacity(self.batch_buffer_size * 3);
|
||||
|
||||
// Start of the block
|
||||
visualization_string.push('[');
|
||||
|
||||
for mut batch_index in 0..BATCH_BUFFER_SIZE {
|
||||
for mut batch_index in 0..self.batch_buffer_size {
|
||||
if let Some(batch) = self
|
||||
.batches
|
||||
.get(&(self.processing_target + batch_index as u64 * EPOCHS_PER_BATCH))
|
||||
{
|
||||
visualization_string.push(batch.visualize());
|
||||
if batch_index != BATCH_BUFFER_SIZE {
|
||||
if batch_index != self.batch_buffer_size {
|
||||
// Add a comma in between elements
|
||||
visualization_string.push(',');
|
||||
}
|
||||
} else {
|
||||
// No batch exists, it is on our list to be downloaded
|
||||
// Fill in the rest of the gaps
|
||||
while batch_index < BATCH_BUFFER_SIZE {
|
||||
while batch_index < self.batch_buffer_size {
|
||||
visualization_string.push('E');
|
||||
// Add a comma between the empty batches
|
||||
if batch_index < BATCH_BUFFER_SIZE.saturating_sub(1) {
|
||||
if batch_index < self.batch_buffer_size.saturating_sub(1) {
|
||||
visualization_string.push(',')
|
||||
}
|
||||
batch_index += 1;
|
||||
|
||||
@@ -51,15 +51,25 @@ pub struct ChainCollection<T: BeaconChainTypes> {
|
||||
head_chains: FnvHashMap<ChainId, SyncingChain<T>>,
|
||||
/// The current sync state of the process.
|
||||
state: RangeSyncState,
|
||||
/// The maximum number of batches to queue before requesting more.
|
||||
batch_buffer_size: usize,
|
||||
}
|
||||
|
||||
impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>) -> Self {
|
||||
#[cfg(test)]
|
||||
pub(crate) fn iter(&self) -> impl Iterator<Item = &SyncingChain<T>> {
|
||||
self.finalized_chains
|
||||
.values()
|
||||
.chain(self.head_chains.values())
|
||||
}
|
||||
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>, batch_buffer_size: usize) -> Self {
|
||||
ChainCollection {
|
||||
beacon_chain,
|
||||
finalized_chains: FnvHashMap::default(),
|
||||
head_chains: FnvHashMap::default(),
|
||||
state: RangeSyncState::Idle,
|
||||
batch_buffer_size,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -497,6 +507,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||
target_head_root,
|
||||
peer,
|
||||
sync_type.into(),
|
||||
self.batch_buffer_size,
|
||||
);
|
||||
|
||||
debug!(
|
||||
|
||||
@@ -8,11 +8,8 @@ mod range;
|
||||
mod sync_type;
|
||||
|
||||
pub use batch::{
|
||||
BatchConfig, BatchInfo, BatchOperationOutcome, BatchProcessingResult, BatchState,
|
||||
ByRangeRequestType,
|
||||
BatchConfig, BatchInfo, BatchOperationOutcome, BatchPeers, BatchProcessingResult, BatchState,
|
||||
};
|
||||
pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
|
||||
#[cfg(test)]
|
||||
pub use chain_collection::SyncChainStatus;
|
||||
pub use chain::{BatchId, ChainId, BATCH_BUFFER_SIZE, EPOCHS_PER_BATCH};
|
||||
pub use range::RangeSync;
|
||||
pub use sync_type::RangeSyncType;
|
||||
|
||||
@@ -42,9 +42,12 @@
|
||||
use super::chain::{BatchId, ChainId, RemoveChain, SyncingChain};
|
||||
use super::chain_collection::{ChainCollection, SyncChainStatus};
|
||||
use super::sync_type::RangeSyncType;
|
||||
use super::BatchPeers;
|
||||
use crate::metrics;
|
||||
use crate::status::ToStatusMessage;
|
||||
use crate::sync::network_context::{RpcResponseError, SyncNetworkContext};
|
||||
#[cfg(test)]
|
||||
use crate::sync::range_sync::BatchState;
|
||||
use crate::sync::BatchProcessResult;
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
use beacon_chain::{BeaconChain, BeaconChainTypes};
|
||||
@@ -87,10 +90,10 @@ where
|
||||
name = "range_sync",
|
||||
skip_all
|
||||
)]
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>) -> Self {
|
||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>, batch_buffer_size: usize) -> Self {
|
||||
RangeSync {
|
||||
beacon_chain: beacon_chain.clone(),
|
||||
chains: ChainCollection::new(beacon_chain),
|
||||
chains: ChainCollection::new(beacon_chain, batch_buffer_size),
|
||||
failed_chains: LRUTimeCache::new(std::time::Duration::from_secs(
|
||||
FAILED_CHAINS_EXPIRY_SECONDS,
|
||||
)),
|
||||
@@ -99,10 +102,23 @@ where
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn __failed_chains(&mut self) -> Vec<Hash256> {
|
||||
pub(crate) fn failed_chains(&mut self) -> Vec<Hash256> {
|
||||
self.failed_chains.keys().copied().collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn batches_state(&self) -> Vec<(ChainId, BatchId, &BatchState<T::EthSpec>)> {
|
||||
self.chains
|
||||
.iter()
|
||||
.flat_map(|chain| {
|
||||
chain
|
||||
.batches_state()
|
||||
.into_iter()
|
||||
.map(|(batch_id, state)| (chain.id(), batch_id, state))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[instrument(parent = None,
|
||||
level = "info",
|
||||
fields(component = "range_sync"),
|
||||
@@ -227,7 +243,7 @@ where
|
||||
pub fn blocks_by_range_response(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
peer_id: PeerId,
|
||||
batch_peers: BatchPeers,
|
||||
chain_id: ChainId,
|
||||
batch_id: BatchId,
|
||||
request_id: Id,
|
||||
@@ -235,7 +251,7 @@ where
|
||||
) {
|
||||
// check if this chunk removes the chain
|
||||
match self.chains.call_by_id(chain_id, |chain| {
|
||||
chain.on_block_response(network, batch_id, &peer_id, request_id, blocks)
|
||||
chain.on_block_response(network, batch_id, batch_peers, request_id, blocks)
|
||||
}) {
|
||||
Ok((removed_chain, sync_type)) => {
|
||||
if let Some((removed_chain, remove_reason)) = removed_chain {
|
||||
@@ -343,7 +359,6 @@ where
|
||||
pub fn inject_error(
|
||||
&mut self,
|
||||
network: &mut SyncNetworkContext<T>,
|
||||
peer_id: PeerId,
|
||||
batch_id: BatchId,
|
||||
chain_id: ChainId,
|
||||
request_id: Id,
|
||||
@@ -351,7 +366,7 @@ where
|
||||
) {
|
||||
// check that this request is pending
|
||||
match self.chains.call_by_id(chain_id, |chain| {
|
||||
chain.inject_error(network, batch_id, &peer_id, request_id, err)
|
||||
chain.inject_error(network, batch_id, request_id, err)
|
||||
}) {
|
||||
Ok((removed_chain, sync_type)) => {
|
||||
if let Some((removed_chain, remove_reason)) = removed_chain {
|
||||
|
||||
@@ -2,6 +2,7 @@ use crate::network_beacon_processor::NetworkBeaconProcessor;
|
||||
use crate::sync::block_lookups::{
|
||||
BlockLookupSummary, PARENT_DEPTH_TOLERANCE, SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS,
|
||||
};
|
||||
use crate::sync::range_sync::BATCH_BUFFER_SIZE;
|
||||
use crate::sync::{
|
||||
manager::{BlockProcessType, BlockProcessingResult, SyncManager},
|
||||
peer_sampling::SamplingConfig,
|
||||
@@ -36,7 +37,7 @@ use lighthouse_network::{
|
||||
SamplingRequester, SingleLookupReqId, SyncRequestId,
|
||||
},
|
||||
types::SyncState,
|
||||
NetworkConfig, NetworkGlobals, PeerId,
|
||||
NetworkConfig, NetworkGlobals, PeerId, SyncInfo,
|
||||
};
|
||||
use slot_clock::{SlotClock, TestingSlotClock};
|
||||
use tokio::sync::mpsc;
|
||||
@@ -44,8 +45,8 @@ use tracing::info;
|
||||
use types::{
|
||||
data_column_sidecar::ColumnIndex,
|
||||
test_utils::{SeedableRng, TestRandom, XorShiftRng},
|
||||
BeaconState, BeaconStateBase, BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, ForkName,
|
||||
Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot,
|
||||
BeaconState, BeaconStateBase, BlobSidecar, DataColumnSidecar, DataColumnSidecarList, EthSpec,
|
||||
ForkContext, ForkName, Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot,
|
||||
};
|
||||
|
||||
const D: Duration = Duration::new(0, 0);
|
||||
@@ -54,8 +55,34 @@ const SAMPLING_REQUIRED_SUCCESSES: usize = 2;
|
||||
type DCByRootIds = Vec<DCByRootId>;
|
||||
type DCByRootId = (SyncRequestId, Vec<ColumnIndex>);
|
||||
|
||||
pub enum PeersConfig {
|
||||
SupernodeAndRandom,
|
||||
SupernodeOnly,
|
||||
}
|
||||
|
||||
pub struct TestOptions {
|
||||
/// If the node created by this test harness is a supernode
|
||||
pub is_supernode: bool,
|
||||
/// The maximum number of batches to queue before requesting more.
|
||||
pub batch_buffer_size: usize,
|
||||
}
|
||||
|
||||
impl TestRig {
|
||||
pub fn test_setup() -> Self {
|
||||
Self::test_setup_with_options(TestOptions {
|
||||
is_supernode: false,
|
||||
batch_buffer_size: BATCH_BUFFER_SIZE,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn test_setup_as_supernode() -> Self {
|
||||
Self::test_setup_with_options(TestOptions {
|
||||
is_supernode: true,
|
||||
batch_buffer_size: BATCH_BUFFER_SIZE,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn test_setup_with_options(options: TestOptions) -> Self {
|
||||
// Use `fork_from_env` logic to set correct fork epochs
|
||||
let spec = test_spec::<E>();
|
||||
|
||||
@@ -84,10 +111,11 @@ impl TestRig {
|
||||
// TODO(das): make the generation of the ENR use the deterministic rng to have consistent
|
||||
// column assignments
|
||||
let network_config = Arc::new(NetworkConfig::default());
|
||||
let globals = Arc::new(NetworkGlobals::new_test_globals(
|
||||
let globals = Arc::new(NetworkGlobals::new_test_globals_as_supernode(
|
||||
Vec::new(),
|
||||
network_config,
|
||||
chain.spec.clone(),
|
||||
options.is_supernode,
|
||||
));
|
||||
let (beacon_processor, beacon_processor_rx) = NetworkBeaconProcessor::null_for_testing(
|
||||
globals,
|
||||
@@ -116,6 +144,7 @@ impl TestRig {
|
||||
network_rx,
|
||||
network_rx_queue: vec![],
|
||||
sync_rx,
|
||||
sent_blocks_by_range: <_>::default(),
|
||||
rng,
|
||||
network_globals: beacon_processor.network_globals.clone(),
|
||||
sync_manager: SyncManager::new(
|
||||
@@ -128,6 +157,7 @@ impl TestRig {
|
||||
required_successes: vec![SAMPLING_REQUIRED_SUCCESSES],
|
||||
},
|
||||
fork_context,
|
||||
options.batch_buffer_size,
|
||||
),
|
||||
harness,
|
||||
fork_name,
|
||||
@@ -204,9 +234,7 @@ impl TestRig {
|
||||
generate_rand_block_and_blobs::<E>(fork_name, num_blobs, rng, &self.spec)
|
||||
}
|
||||
|
||||
fn rand_block_and_data_columns(
|
||||
&mut self,
|
||||
) -> (SignedBeaconBlock<E>, Vec<Arc<DataColumnSidecar<E>>>) {
|
||||
fn rand_block_and_data_columns(&mut self) -> (SignedBeaconBlock<E>, DataColumnSidecarList<E>) {
|
||||
let num_blobs = NumBlobs::Number(1);
|
||||
generate_rand_block_and_data_columns::<E>(
|
||||
self.fork_name,
|
||||
@@ -247,8 +275,8 @@ impl TestRig {
|
||||
self.sync_manager.active_parent_lookups().len()
|
||||
}
|
||||
|
||||
fn active_range_sync_chain(&self) -> (RangeSyncType, Slot, Slot) {
|
||||
self.sync_manager.get_range_sync_chains().unwrap().unwrap()
|
||||
fn active_range_sync_chain(&mut self) -> (RangeSyncType, Slot, Slot) {
|
||||
self.sync_manager.range_sync().state().unwrap().unwrap()
|
||||
}
|
||||
|
||||
fn assert_single_lookups_count(&self, count: usize) {
|
||||
@@ -358,29 +386,67 @@ impl TestRig {
|
||||
self.expect_empty_network();
|
||||
}
|
||||
|
||||
pub fn new_connected_peer(&mut self) -> PeerId {
|
||||
// Note: prefer to use `add_connected_peer_testing_only`. This is currently extensively used in
|
||||
// lookup tests. We should consolidate this "add peer" methods in a future refactor
|
||||
fn new_connected_peer(&mut self) -> PeerId {
|
||||
self.add_connected_peer_testing_only(false)
|
||||
}
|
||||
|
||||
// Note: prefer to use `add_connected_peer_testing_only`. This is currently extensively used in
|
||||
// lookup tests. We should consolidate this "add peer" methods in a future refactor
|
||||
fn new_connected_supernode_peer(&mut self) -> PeerId {
|
||||
self.add_connected_peer_testing_only(true)
|
||||
}
|
||||
|
||||
/// Add a random connected peer that is not known by the sync module
|
||||
pub fn add_connected_peer_testing_only(&mut self, supernode: bool) -> PeerId {
|
||||
let key = self.determinstic_key();
|
||||
let peer_id = self
|
||||
.network_globals
|
||||
.peers
|
||||
.write()
|
||||
.__add_connected_peer_testing_only(false, &self.harness.spec, key);
|
||||
self.log(&format!("Added new peer for testing {peer_id:?}"));
|
||||
.__add_connected_peer_testing_only(supernode, &self.harness.spec, key);
|
||||
let mut peer_custody_subnets = self
|
||||
.network_globals
|
||||
.peers
|
||||
.read()
|
||||
.peer_info(&peer_id)
|
||||
.expect("peer was just added")
|
||||
.custody_subnets_iter()
|
||||
.map(|subnet| **subnet)
|
||||
.collect::<Vec<_>>();
|
||||
peer_custody_subnets.sort_unstable();
|
||||
self.log(&format!(
|
||||
"Added new peer for testing {peer_id:?} custody subnets {peer_custody_subnets:?}"
|
||||
));
|
||||
peer_id
|
||||
}
|
||||
|
||||
pub fn new_connected_supernode_peer(&mut self) -> PeerId {
|
||||
let key = self.determinstic_key();
|
||||
self.network_globals
|
||||
.peers
|
||||
.write()
|
||||
.__add_connected_peer_testing_only(true, &self.harness.spec, key)
|
||||
/// Add a random connected peer + add it to sync with a specific remote Status
|
||||
pub fn add_sync_peer(&mut self, supernode: bool, remote_info: SyncInfo) -> PeerId {
|
||||
let peer_id = self.add_connected_peer_testing_only(supernode);
|
||||
self.send_sync_message(SyncMessage::AddPeer(peer_id, remote_info));
|
||||
peer_id
|
||||
}
|
||||
|
||||
fn determinstic_key(&mut self) -> CombinedKey {
|
||||
k256::ecdsa::SigningKey::random(&mut self.rng).into()
|
||||
}
|
||||
|
||||
pub fn add_sync_peers(&mut self, config: PeersConfig, remote_info: SyncInfo) {
|
||||
match config {
|
||||
PeersConfig::SupernodeAndRandom => {
|
||||
for _ in 0..100 {
|
||||
self.add_sync_peer(false, remote_info.clone());
|
||||
}
|
||||
self.add_sync_peer(true, remote_info);
|
||||
}
|
||||
PeersConfig::SupernodeOnly => {
|
||||
self.add_sync_peer(true, remote_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_connected_peers_for_peerdas(&mut self) {
|
||||
// Enough sampling peers with few columns
|
||||
for _ in 0..100 {
|
||||
@@ -675,7 +741,7 @@ impl TestRig {
|
||||
fn complete_valid_sampling_column_requests(
|
||||
&mut self,
|
||||
ids: DCByRootIds,
|
||||
data_columns: Vec<Arc<DataColumnSidecar<E>>>,
|
||||
data_columns: DataColumnSidecarList<E>,
|
||||
) {
|
||||
for id in ids {
|
||||
self.log(&format!("return valid data column for {id:?}"));
|
||||
@@ -720,7 +786,7 @@ impl TestRig {
|
||||
fn complete_valid_custody_request(
|
||||
&mut self,
|
||||
ids: DCByRootIds,
|
||||
data_columns: Vec<Arc<DataColumnSidecar<E>>>,
|
||||
data_columns: DataColumnSidecarList<E>,
|
||||
missing_components: bool,
|
||||
) {
|
||||
let lookup_id = if let SyncRequestId::DataColumnsByRoot(DataColumnsByRootRequestId {
|
||||
@@ -843,6 +909,19 @@ impl TestRig {
|
||||
}
|
||||
}
|
||||
|
||||
/// Similar to `pop_received_network_events` but finds matching events without removing them.
|
||||
pub fn filter_received_network_events<T, F: Fn(&NetworkMessage<E>) -> Option<T>>(
|
||||
&mut self,
|
||||
predicate_transform: F,
|
||||
) -> Vec<T> {
|
||||
self.drain_network_rx();
|
||||
|
||||
self.network_rx_queue
|
||||
.iter()
|
||||
.filter_map(predicate_transform)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn pop_received_processor_event<T, F: Fn(&WorkEvent<E>) -> Option<T>>(
|
||||
&mut self,
|
||||
predicate_transform: F,
|
||||
@@ -1091,6 +1170,16 @@ impl TestRig {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expect_no_penalty_for_anyone(&mut self) {
|
||||
let downscore_events = self.filter_received_network_events(|ev| match ev {
|
||||
NetworkMessage::ReportPeer { peer_id, msg, .. } => Some((*peer_id, *msg)),
|
||||
_ => None,
|
||||
});
|
||||
if !downscore_events.is_empty() {
|
||||
panic!("Expected no downscoring events but found: {downscore_events:?}");
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn expect_parent_chain_process(&mut self) {
|
||||
match self.beacon_processor_rx.try_recv() {
|
||||
@@ -1126,6 +1215,25 @@ impl TestRig {
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
pub fn expect_penalties(&mut self, expected_penalty_msg: &'static str) {
|
||||
let all_penalties = self.filter_received_network_events(|ev| match ev {
|
||||
NetworkMessage::ReportPeer { peer_id, msg, .. } => Some((*peer_id, *msg)),
|
||||
_ => None,
|
||||
});
|
||||
if all_penalties
|
||||
.iter()
|
||||
.any(|(_, msg)| *msg != expected_penalty_msg)
|
||||
{
|
||||
panic!(
|
||||
"Expected penalties only of {expected_penalty_msg}, but found {all_penalties:?}"
|
||||
);
|
||||
}
|
||||
self.log(&format!(
|
||||
"Found expected penalties {expected_penalty_msg}: {all_penalties:?}"
|
||||
));
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
pub fn expect_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) {
|
||||
let penalty_msg = self
|
||||
|
||||
@@ -6,9 +6,12 @@ use beacon_chain::builder::Witness;
|
||||
use beacon_chain::eth1_chain::CachingEth1Backend;
|
||||
use beacon_chain::test_utils::{BeaconChainHarness, EphemeralHarnessType};
|
||||
use beacon_processor::WorkEvent;
|
||||
use lighthouse_network::service::api_types::ComponentsByRangeRequestId;
|
||||
use lighthouse_network::NetworkGlobals;
|
||||
pub use lookups::PeersConfig;
|
||||
use rand_chacha::ChaCha20Rng;
|
||||
use slot_clock::ManualSlotClock;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::Write;
|
||||
use std::sync::{Arc, Once};
|
||||
@@ -17,7 +20,7 @@ use tokio::sync::mpsc;
|
||||
use tracing_subscriber::fmt::MakeWriter;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::util::SubscriberInitExt;
|
||||
use types::{ChainSpec, ForkName, MinimalEthSpec as E};
|
||||
use types::{ChainSpec, ForkName, MinimalEthSpec as E, SignedBeaconBlock};
|
||||
|
||||
mod lookups;
|
||||
mod range;
|
||||
@@ -69,6 +72,9 @@ struct TestRig {
|
||||
rng: ChaCha20Rng,
|
||||
fork_name: ForkName,
|
||||
spec: Arc<ChainSpec>,
|
||||
|
||||
// Cache of sent blocks for PeerDAS responses
|
||||
sent_blocks_by_range: HashMap<ComponentsByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||
}
|
||||
|
||||
// Environment variable to read if `fork_from_env` feature is enabled.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -34,9 +34,6 @@ pub enum Error {
|
||||
///
|
||||
/// The block is invalid.
|
||||
IncorrectBlockProposer { block: u64, local_shuffling: u64 },
|
||||
/// The public keys supplied do not match the number of objects requiring keys. Block validity
|
||||
/// was not determined.
|
||||
MismatchedPublicKeyLen { pubkey_len: usize, other_len: usize },
|
||||
/// Pubkey decompression failed. The block is invalid.
|
||||
PublicKeyDecompressionFailed,
|
||||
/// The public key bytes stored in the `BeaconState` were not valid. This is a serious internal
|
||||
|
||||
@@ -321,6 +321,10 @@ impl<E: EthSpec, Payload: AbstractExecPayload<E>> SignedBeaconBlock<E, Payload>
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn has_data(&self) -> bool {
|
||||
self.num_expected_blobs() > 0
|
||||
}
|
||||
|
||||
/// Used for displaying commitments in logs.
|
||||
pub fn commitments_formatted(&self) -> String {
|
||||
let Ok(commitments) = self.message().body().blob_kzg_commitments() else {
|
||||
|
||||
Reference in New Issue
Block a user