mirror of
https://github.com/sigp/lighthouse.git
synced 2026-07-05 05:44:30 +00:00
Implement reliable range sync for PeerDAS
This commit is contained in:
@@ -199,7 +199,7 @@ impl<E: EthSpec> RpcBlock<E> {
|
|||||||
custody_columns: Vec<CustodyDataColumn<E>>,
|
custody_columns: Vec<CustodyDataColumn<E>>,
|
||||||
expected_custody_indices: Vec<ColumnIndex>,
|
expected_custody_indices: Vec<ColumnIndex>,
|
||||||
spec: &ChainSpec,
|
spec: &ChainSpec,
|
||||||
) -> Result<Self, AvailabilityCheckError> {
|
) -> Result<Self, String> {
|
||||||
let block_root = block_root.unwrap_or_else(|| get_block_root(&block));
|
let block_root = block_root.unwrap_or_else(|| get_block_root(&block));
|
||||||
|
|
||||||
let custody_columns_count = expected_custody_indices.len();
|
let custody_columns_count = expected_custody_indices.len();
|
||||||
@@ -209,11 +209,7 @@ impl<E: EthSpec> RpcBlock<E> {
|
|||||||
custody_columns,
|
custody_columns,
|
||||||
spec.number_of_columns as usize,
|
spec.number_of_columns as usize,
|
||||||
)
|
)
|
||||||
.map_err(|e| {
|
.map_err(|e| format!("custody_columns len exceeds number_of_columns: {e:?}"))?,
|
||||||
AvailabilityCheckError::Unexpected(format!(
|
|
||||||
"custody_columns len exceeds number_of_columns: {e:?}"
|
|
||||||
))
|
|
||||||
})?,
|
|
||||||
expected_custody_indices,
|
expected_custody_indices,
|
||||||
};
|
};
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
|
|||||||
@@ -2418,7 +2418,8 @@ where
|
|||||||
columns,
|
columns,
|
||||||
expected_custody_indices,
|
expected_custody_indices,
|
||||||
&self.spec,
|
&self.spec,
|
||||||
)?
|
)
|
||||||
|
.map_err(BlockError::InternalError)?
|
||||||
} else {
|
} else {
|
||||||
RpcBlock::new_without_blobs(Some(block_root), block, sampling_column_count)
|
RpcBlock::new_without_blobs(Some(block_root), block, sampling_column_count)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,6 +59,14 @@ pub struct BlobsByRangeRequestId {
|
|||||||
pub struct DataColumnsByRangeRequestId {
|
pub struct DataColumnsByRangeRequestId {
|
||||||
/// Id to identify this attempt at a data_columns_by_range request for `parent_request_id`
|
/// Id to identify this attempt at a data_columns_by_range request for `parent_request_id`
|
||||||
pub id: Id,
|
pub id: Id,
|
||||||
|
/// The Id of the parent custody by range request that issued this data_columns_by_range request
|
||||||
|
pub parent_request_id: CustodyByRangeRequestId,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
|
||||||
|
pub struct CustodyByRangeRequestId {
|
||||||
|
/// Id to identify this attempt at a meta custody by range request for `parent_request_id`
|
||||||
|
pub id: Id,
|
||||||
/// The Id of the overall By Range request for block components.
|
/// The Id of the overall By Range request for block components.
|
||||||
pub parent_request_id: ComponentsByRangeRequestId,
|
pub parent_request_id: ComponentsByRangeRequestId,
|
||||||
}
|
}
|
||||||
@@ -221,6 +229,7 @@ macro_rules! impl_display {
|
|||||||
impl_display!(BlocksByRangeRequestId, "{}/{}", id, parent_request_id);
|
impl_display!(BlocksByRangeRequestId, "{}/{}", id, parent_request_id);
|
||||||
impl_display!(BlobsByRangeRequestId, "{}/{}", id, parent_request_id);
|
impl_display!(BlobsByRangeRequestId, "{}/{}", id, parent_request_id);
|
||||||
impl_display!(DataColumnsByRangeRequestId, "{}/{}", id, parent_request_id);
|
impl_display!(DataColumnsByRangeRequestId, "{}/{}", id, parent_request_id);
|
||||||
|
impl_display!(CustodyByRangeRequestId, "{}/{}", id, parent_request_id);
|
||||||
impl_display!(ComponentsByRangeRequestId, "{}/{}", id, requester);
|
impl_display!(ComponentsByRangeRequestId, "{}/{}", id, requester);
|
||||||
impl_display!(DataColumnsByRootRequestId, "{}/{}", id, requester);
|
impl_display!(DataColumnsByRootRequestId, "{}/{}", id, requester);
|
||||||
impl_display!(SingleLookupReqId, "{}/Lookup/{}", req_id, lookup_id);
|
impl_display!(SingleLookupReqId, "{}/Lookup/{}", req_id, lookup_id);
|
||||||
@@ -299,14 +308,17 @@ mod tests {
|
|||||||
fn display_id_data_columns_by_range() {
|
fn display_id_data_columns_by_range() {
|
||||||
let id = DataColumnsByRangeRequestId {
|
let id = DataColumnsByRangeRequestId {
|
||||||
id: 123,
|
id: 123,
|
||||||
parent_request_id: ComponentsByRangeRequestId {
|
parent_request_id: CustodyByRangeRequestId {
|
||||||
id: 122,
|
id: 122,
|
||||||
requester: RangeRequestId::RangeSync {
|
parent_request_id: ComponentsByRangeRequestId {
|
||||||
chain_id: 54,
|
id: 121,
|
||||||
batch_id: Epoch::new(0),
|
requester: RangeRequestId::RangeSync {
|
||||||
|
chain_id: 54,
|
||||||
|
batch_id: Epoch::new(0),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
assert_eq!(format!("{id}"), "123/122/RangeSync/0/54");
|
assert_eq!(format!("{id}"), "123/122/121/RangeSync/0/54");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -245,6 +245,25 @@ impl<E: EthSpec> NetworkGlobals<E> {
|
|||||||
Self::new_test_globals_with_metadata(trusted_peers, metadata, config, spec)
|
Self::new_test_globals_with_metadata(trusted_peers, metadata, config, spec)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn new_test_globals_as_supernode(
|
||||||
|
trusted_peers: Vec<PeerId>,
|
||||||
|
config: Arc<NetworkConfig>,
|
||||||
|
spec: Arc<ChainSpec>,
|
||||||
|
is_supernode: bool,
|
||||||
|
) -> NetworkGlobals<E> {
|
||||||
|
let metadata = MetaData::V3(MetaDataV3 {
|
||||||
|
seq_number: 0,
|
||||||
|
attnets: Default::default(),
|
||||||
|
syncnets: Default::default(),
|
||||||
|
custody_group_count: if is_supernode {
|
||||||
|
spec.number_of_custody_groups
|
||||||
|
} else {
|
||||||
|
spec.custody_requirement
|
||||||
|
},
|
||||||
|
});
|
||||||
|
Self::new_test_globals_with_metadata(trusted_peers, metadata, config, spec)
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn new_test_globals_with_metadata(
|
pub(crate) fn new_test_globals_with_metadata(
|
||||||
trusted_peers: Vec<PeerId>,
|
trusted_peers: Vec<PeerId>,
|
||||||
metadata: MetaData<E>,
|
metadata: MetaData<E>,
|
||||||
|
|||||||
@@ -21,11 +21,11 @@ use beacon_chain::{BeaconChain, BeaconChainTypes};
|
|||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use lighthouse_network::service::api_types::Id;
|
use lighthouse_network::service::api_types::Id;
|
||||||
use lighthouse_network::types::{BackFillState, NetworkGlobals};
|
use lighthouse_network::types::{BackFillState, NetworkGlobals};
|
||||||
use lighthouse_network::{PeerAction, PeerId};
|
use lighthouse_network::PeerAction;
|
||||||
use logging::crit;
|
use logging::crit;
|
||||||
use std::collections::{
|
use std::collections::{
|
||||||
btree_map::{BTreeMap, Entry},
|
btree_map::{BTreeMap, Entry},
|
||||||
HashSet,
|
HashMap, HashSet,
|
||||||
};
|
};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tracing::{debug, error, info, instrument, warn};
|
use tracing::{debug, error, info, instrument, warn};
|
||||||
@@ -312,7 +312,6 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
network: &mut SyncNetworkContext<T>,
|
network: &mut SyncNetworkContext<T>,
|
||||||
batch_id: BatchId,
|
batch_id: BatchId,
|
||||||
peer_id: &PeerId,
|
|
||||||
request_id: Id,
|
request_id: Id,
|
||||||
err: RpcResponseError,
|
err: RpcResponseError,
|
||||||
) -> Result<(), BackFillError> {
|
) -> Result<(), BackFillError> {
|
||||||
@@ -326,11 +325,18 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
debug!(batch_epoch = %batch_id, error = ?err, "Batch download failed");
|
debug!(batch_epoch = %batch_id, error = ?err, "Batch download failed");
|
||||||
match batch.download_failed(Some(*peer_id)) {
|
// TODO(das): Is it necessary for the batch to track failed peers? Can we make this
|
||||||
|
// mechanism compatible with PeerDAS and before PeerDAS?
|
||||||
|
match batch.download_failed(None) {
|
||||||
Err(e) => self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0)),
|
Err(e) => self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0)),
|
||||||
Ok(BatchOperationOutcome::Failed { blacklist: _ }) => {
|
Ok(BatchOperationOutcome::Failed { blacklist: _ }) => self.fail_sync(match err {
|
||||||
self.fail_sync(BackFillError::BatchDownloadFailed(batch_id))
|
RpcResponseError::RpcError(_)
|
||||||
}
|
| RpcResponseError::VerifyError(_)
|
||||||
|
| RpcResponseError::InternalError(_) => {
|
||||||
|
BackFillError::BatchDownloadFailed(batch_id)
|
||||||
|
}
|
||||||
|
RpcResponseError::RequestExpired(_) => BackFillError::Paused,
|
||||||
|
}),
|
||||||
Ok(BatchOperationOutcome::Continue) => self.send_batch(network, batch_id),
|
Ok(BatchOperationOutcome::Continue) => self.send_batch(network, batch_id),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -929,6 +935,8 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
|||||||
RangeRequestId::BackfillSync { batch_id },
|
RangeRequestId::BackfillSync { batch_id },
|
||||||
&synced_peers,
|
&synced_peers,
|
||||||
&failed_peers,
|
&failed_peers,
|
||||||
|
// Does not track total requests per peers for now
|
||||||
|
&HashMap::new(),
|
||||||
) {
|
) {
|
||||||
Ok(request_id) => {
|
Ok(request_id) => {
|
||||||
// inform the batch about the new request
|
// inform the batch about the new request
|
||||||
@@ -940,15 +948,9 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
Err(e) => match e {
|
Err(e) => match e {
|
||||||
RpcRequestSendError::NoPeer(no_peer) => {
|
// TODO(das): block_components_by_range requests can now hang out indefinitely.
|
||||||
// If we are here the chain has no more synced peers
|
// Is that fine? Maybe we should fail the requests from the network_context
|
||||||
info!(
|
// level without involving the BackfillSync itself.
|
||||||
"reason" = format!("insufficient_synced_peers({no_peer:?})"),
|
|
||||||
"Backfill sync paused"
|
|
||||||
);
|
|
||||||
self.set_state(BackFillState::Paused);
|
|
||||||
return Err(BackFillError::Paused);
|
|
||||||
}
|
|
||||||
RpcRequestSendError::InternalError(e) => {
|
RpcRequestSendError::InternalError(e) => {
|
||||||
// NOTE: under normal conditions this shouldn't happen but we handle it anyway
|
// NOTE: under normal conditions this shouldn't happen but we handle it anyway
|
||||||
warn!(%batch_id, error = ?e, %batch,"Could not send batch request");
|
warn!(%batch_id, error = ?e, %batch,"Could not send batch request");
|
||||||
|
|||||||
@@ -494,7 +494,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
|||||||
let Some(lookup) = self.single_block_lookups.get_mut(&id.lookup_id) else {
|
let Some(lookup) = self.single_block_lookups.get_mut(&id.lookup_id) else {
|
||||||
// We don't have the ability to cancel in-flight RPC requests. So this can happen
|
// We don't have the ability to cancel in-flight RPC requests. So this can happen
|
||||||
// if we started this RPC request, and later saw the block/blobs via gossip.
|
// if we started this RPC request, and later saw the block/blobs via gossip.
|
||||||
debug!(?id, "Block returned for single block lookup not present");
|
debug!(%id, "Block returned for single block lookup not present");
|
||||||
return Err(LookupRequestError::UnknownLookup);
|
return Err(LookupRequestError::UnknownLookup);
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -507,7 +507,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
|||||||
Ok((response, peer_group, seen_timestamp)) => {
|
Ok((response, peer_group, seen_timestamp)) => {
|
||||||
debug!(
|
debug!(
|
||||||
?block_root,
|
?block_root,
|
||||||
?id,
|
%id,
|
||||||
?peer_group,
|
?peer_group,
|
||||||
?response_type,
|
?response_type,
|
||||||
"Received lookup download success"
|
"Received lookup download success"
|
||||||
@@ -540,7 +540,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
|||||||
// the peer and the request ID which is linked to this `id` value here.
|
// the peer and the request ID which is linked to this `id` value here.
|
||||||
debug!(
|
debug!(
|
||||||
?block_root,
|
?block_root,
|
||||||
?id,
|
%id,
|
||||||
?response_type,
|
?response_type,
|
||||||
error = ?e,
|
error = ?e,
|
||||||
"Received lookup download failure"
|
"Received lookup download failure"
|
||||||
|
|||||||
@@ -36,7 +36,8 @@
|
|||||||
use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart};
|
use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart};
|
||||||
use super::block_lookups::BlockLookups;
|
use super::block_lookups::BlockLookups;
|
||||||
use super::network_context::{
|
use super::network_context::{
|
||||||
CustodyByRootResult, RangeBlockComponent, RangeRequestId, RpcEvent, SyncNetworkContext,
|
CustodyByRangeResult, CustodyByRootResult, RangeBlockComponent, RangeRequestId, RpcEvent,
|
||||||
|
SyncNetworkContext,
|
||||||
};
|
};
|
||||||
use super::peer_sampling::{Sampling, SamplingConfig, SamplingResult};
|
use super::peer_sampling::{Sampling, SamplingConfig, SamplingResult};
|
||||||
use super::peer_sync_info::{remote_sync_type, PeerSyncType};
|
use super::peer_sync_info::{remote_sync_type, PeerSyncType};
|
||||||
@@ -58,9 +59,10 @@ use beacon_chain::{
|
|||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use lighthouse_network::rpc::RPCError;
|
use lighthouse_network::rpc::RPCError;
|
||||||
use lighthouse_network::service::api_types::{
|
use lighthouse_network::service::api_types::{
|
||||||
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId, CustodyRequester,
|
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
||||||
DataColumnsByRangeRequestId, DataColumnsByRootRequestId, DataColumnsByRootRequester, Id,
|
CustodyByRangeRequestId, CustodyRequester, DataColumnsByRangeRequestId,
|
||||||
SamplingId, SamplingRequester, SingleLookupReqId, SyncRequestId,
|
DataColumnsByRootRequestId, DataColumnsByRootRequester, Id, SamplingId, SamplingRequester,
|
||||||
|
SingleLookupReqId, SyncRequestId,
|
||||||
};
|
};
|
||||||
use lighthouse_network::types::{NetworkGlobals, SyncState};
|
use lighthouse_network::types::{NetworkGlobals, SyncState};
|
||||||
use lighthouse_network::PeerId;
|
use lighthouse_network::PeerId;
|
||||||
@@ -336,23 +338,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub(crate) fn get_range_sync_chains(
|
|
||||||
&self,
|
|
||||||
) -> Result<Option<(RangeSyncType, Slot, Slot)>, &'static str> {
|
|
||||||
self.range_sync.state()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub(crate) fn range_sync_state(&self) -> super::range_sync::SyncChainStatus {
|
|
||||||
self.range_sync.state()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub(crate) fn __range_failed_chains(&mut self) -> Vec<Hash256> {
|
|
||||||
self.range_sync.__failed_chains()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) fn get_failed_chains(&mut self) -> Vec<Hash256> {
|
pub(crate) fn get_failed_chains(&mut self) -> Vec<Hash256> {
|
||||||
self.block_lookups.get_failed_chains()
|
self.block_lookups.get_failed_chains()
|
||||||
@@ -377,6 +362,18 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
self.sampling.get_request_status(block_root, index)
|
self.sampling.get_request_status(block_root, index)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Leak the full network context to prevent having to add many cfg(test) methods here
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) fn network(&mut self) -> &mut SyncNetworkContext<T> {
|
||||||
|
&mut self.network
|
||||||
|
}
|
||||||
|
|
||||||
|
// Leak the full range_sync to prevent having to add many cfg(test) methods here
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) fn range_sync(&mut self) -> &mut RangeSync<T> {
|
||||||
|
&mut self.range_sync
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) fn update_execution_engine_state(&mut self, state: EngineState) {
|
pub(crate) fn update_execution_engine_state(&mut self, state: EngineState) {
|
||||||
self.handle_new_execution_engine_state(state);
|
self.handle_new_execution_engine_state(state);
|
||||||
@@ -442,6 +439,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
for (id, result) in self.network.continue_custody_by_root_requests() {
|
for (id, result) in self.network.continue_custody_by_root_requests() {
|
||||||
self.on_custody_by_root_result(id, result);
|
self.on_custody_by_root_result(id, result);
|
||||||
}
|
}
|
||||||
|
for (id, result) in self.network.continue_custody_by_range_requests() {
|
||||||
|
self.on_custody_by_range_result(id, result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Trigger range sync for a set of peers that claim to have imported a head unknown to us.
|
/// Trigger range sync for a set of peers that claim to have imported a head unknown to us.
|
||||||
@@ -545,6 +545,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
for (id, result) in self.network.continue_custody_by_root_requests() {
|
for (id, result) in self.network.continue_custody_by_root_requests() {
|
||||||
self.on_custody_by_root_result(id, result);
|
self.on_custody_by_root_result(id, result);
|
||||||
}
|
}
|
||||||
|
for (id, result) in self.network.continue_custody_by_range_requests() {
|
||||||
|
self.on_custody_by_range_result(id, result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Updates the syncing state of a peer.
|
/// Updates the syncing state of a peer.
|
||||||
@@ -1186,10 +1189,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
block: RpcEvent<Arc<SignedBeaconBlock<T::EthSpec>>>,
|
block: RpcEvent<Arc<SignedBeaconBlock<T::EthSpec>>>,
|
||||||
) {
|
) {
|
||||||
if let Some(resp) = self.network.on_blocks_by_range_response(id, peer_id, block) {
|
if let Some(resp) = self.network.on_blocks_by_range_response(id, peer_id, block) {
|
||||||
self.on_range_components_response(
|
self.on_block_components_by_range_response(
|
||||||
id.parent_request_id,
|
id.parent_request_id,
|
||||||
peer_id,
|
RangeBlockComponent::Block(id, resp, peer_id),
|
||||||
RangeBlockComponent::Block(id, resp),
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1201,10 +1203,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
blob: RpcEvent<Arc<BlobSidecar<T::EthSpec>>>,
|
blob: RpcEvent<Arc<BlobSidecar<T::EthSpec>>>,
|
||||||
) {
|
) {
|
||||||
if let Some(resp) = self.network.on_blobs_by_range_response(id, peer_id, blob) {
|
if let Some(resp) = self.network.on_blobs_by_range_response(id, peer_id, blob) {
|
||||||
self.on_range_components_response(
|
self.on_block_components_by_range_response(
|
||||||
id.parent_request_id,
|
id.parent_request_id,
|
||||||
peer_id,
|
RangeBlockComponent::Blob(id, resp, peer_id),
|
||||||
RangeBlockComponent::Blob(id, resp),
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1215,18 +1216,46 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
peer_id: PeerId,
|
peer_id: PeerId,
|
||||||
data_column: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
|
data_column: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
|
||||||
) {
|
) {
|
||||||
|
// data_columns_by_range returns either an Ok list of data columns, or an RpcResponseError
|
||||||
if let Some(resp) = self
|
if let Some(resp) = self
|
||||||
.network
|
.network
|
||||||
.on_data_columns_by_range_response(id, peer_id, data_column)
|
.on_data_columns_by_range_response(id, peer_id, data_column)
|
||||||
{
|
{
|
||||||
self.on_range_components_response(
|
// custody_by_range accumulates the results of multiple data_columns_by_range requests
|
||||||
id.parent_request_id,
|
// returning a bigger list of data columns across all the column indices this node has
|
||||||
peer_id,
|
// to custody
|
||||||
RangeBlockComponent::CustodyColumns(id, resp),
|
if let Some(result) =
|
||||||
);
|
self.network
|
||||||
|
.on_custody_by_range_response(id.parent_request_id, id, peer_id, resp)
|
||||||
|
{
|
||||||
|
self.on_custody_by_range_result(id.parent_request_id, result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn on_custody_by_range_result(
|
||||||
|
&mut self,
|
||||||
|
id: CustodyByRangeRequestId,
|
||||||
|
result: CustodyByRangeResult<T::EthSpec>,
|
||||||
|
) {
|
||||||
|
// TODO(das): Improve the type of RangeBlockComponent::CustodyColumns, not
|
||||||
|
// not have to pass a PeerGroup in case of error
|
||||||
|
let peers = match &result {
|
||||||
|
Ok((_, peers, _)) => peers.clone(),
|
||||||
|
// TODO(das): this PeerGroup with no peers incorrect
|
||||||
|
Err(_) => PeerGroup::from_set(<_>::default()),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.on_block_components_by_range_response(
|
||||||
|
id.parent_request_id,
|
||||||
|
RangeBlockComponent::CustodyColumns(
|
||||||
|
id,
|
||||||
|
result.map(|(data, _peers, timestamp)| (data, timestamp)),
|
||||||
|
peers,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn on_custody_by_root_result(
|
fn on_custody_by_root_result(
|
||||||
&mut self,
|
&mut self,
|
||||||
requester: CustodyRequester,
|
requester: CustodyRequester,
|
||||||
@@ -1267,17 +1296,15 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
|
|
||||||
/// Handles receiving a response for a range sync request that should have both blocks and
|
/// Handles receiving a response for a range sync request that should have both blocks and
|
||||||
/// blobs.
|
/// blobs.
|
||||||
fn on_range_components_response(
|
fn on_block_components_by_range_response(
|
||||||
&mut self,
|
&mut self,
|
||||||
range_request_id: ComponentsByRangeRequestId,
|
range_request_id: ComponentsByRangeRequestId,
|
||||||
peer_id: PeerId,
|
|
||||||
range_block_component: RangeBlockComponent<T::EthSpec>,
|
range_block_component: RangeBlockComponent<T::EthSpec>,
|
||||||
) {
|
) {
|
||||||
if let Some(resp) = self.network.range_block_component_response(
|
if let Some(resp) = self
|
||||||
range_request_id,
|
.network
|
||||||
peer_id,
|
.on_block_components_by_range_response(range_request_id, range_block_component)
|
||||||
range_block_component,
|
{
|
||||||
) {
|
|
||||||
match resp {
|
match resp {
|
||||||
Ok((blocks, batch_peers)) => {
|
Ok((blocks, batch_peers)) => {
|
||||||
match range_request_id.requester {
|
match range_request_id.requester {
|
||||||
@@ -1315,7 +1342,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
RangeRequestId::RangeSync { chain_id, batch_id } => {
|
RangeRequestId::RangeSync { chain_id, batch_id } => {
|
||||||
self.range_sync.inject_error(
|
self.range_sync.inject_error(
|
||||||
&mut self.network,
|
&mut self.network,
|
||||||
peer_id,
|
|
||||||
batch_id,
|
batch_id,
|
||||||
chain_id,
|
chain_id,
|
||||||
range_request_id.id,
|
range_request_id.id,
|
||||||
@@ -1327,7 +1353,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
|||||||
match self.backfill_sync.inject_error(
|
match self.backfill_sync.inject_error(
|
||||||
&mut self.network,
|
&mut self.network,
|
||||||
batch_id,
|
batch_id,
|
||||||
&peer_id,
|
|
||||||
range_request_id.id,
|
range_request_id.id,
|
||||||
e,
|
e,
|
||||||
) {
|
) {
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
//! Stores the various syncing methods for the beacon chain.
|
//! Stores the various syncing methods for the beacon chain.
|
||||||
mod backfill_sync;
|
mod backfill_sync;
|
||||||
mod block_lookups;
|
mod block_lookups;
|
||||||
mod block_sidecar_coupling;
|
|
||||||
pub mod manager;
|
pub mod manager;
|
||||||
mod network_context;
|
mod network_context;
|
||||||
mod peer_sampling;
|
mod peer_sampling;
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
//! Provides network functionality for the Syncing thread. This fundamentally wraps a network
|
//! Provides network functionality for the Syncing thread. This fundamentally wraps a network
|
||||||
//! channel and stores a global RPC ID to perform requests.
|
//! channel and stores a global RPC ID to perform requests.
|
||||||
|
|
||||||
use self::custody::{ActiveCustodyRequest, Error as CustodyRequestError};
|
use self::custody_by_range::{ActiveCustodyByRangeRequest, CustodyByRangeRequestResult};
|
||||||
|
use self::custody_by_root::{ActiveCustodyByRootRequest, CustodyByRootRequestResult};
|
||||||
pub use self::requests::{BlocksByRootSingleRequest, DataColumnsByRootSingleBlockRequest};
|
pub use self::requests::{BlocksByRootSingleRequest, DataColumnsByRootSingleBlockRequest};
|
||||||
use super::block_sidecar_coupling::RangeBlockComponentsRequest;
|
|
||||||
use super::manager::BlockProcessType;
|
use super::manager::BlockProcessType;
|
||||||
use super::range_sync::{BatchPeers, ByRangeRequestType};
|
use super::range_sync::BatchPeers;
|
||||||
use super::SyncMessage;
|
use super::SyncMessage;
|
||||||
use crate::metrics;
|
use crate::metrics;
|
||||||
use crate::network_beacon_processor::NetworkBeaconProcessor;
|
use crate::network_beacon_processor::NetworkBeaconProcessor;
|
||||||
@@ -17,15 +17,17 @@ use crate::sync::block_lookups::SingleLookupId;
|
|||||||
use crate::sync::network_context::requests::BlobsByRootSingleBlockRequest;
|
use crate::sync::network_context::requests::BlobsByRootSingleBlockRequest;
|
||||||
use beacon_chain::block_verification_types::RpcBlock;
|
use beacon_chain::block_verification_types::RpcBlock;
|
||||||
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
|
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
|
||||||
use custody::CustodyRequestResult;
|
pub use block_components_by_range::BlockComponentsByRangeRequest;
|
||||||
|
#[cfg(test)]
|
||||||
|
pub use block_components_by_range::BlockComponentsByRangeRequestStep;
|
||||||
use fnv::FnvHashMap;
|
use fnv::FnvHashMap;
|
||||||
use lighthouse_network::rpc::methods::{BlobsByRangeRequest, DataColumnsByRangeRequest};
|
use lighthouse_network::rpc::methods::{BlobsByRangeRequest, DataColumnsByRangeRequest};
|
||||||
use lighthouse_network::rpc::{BlocksByRangeRequest, GoodbyeReason, RPCError, RequestType};
|
use lighthouse_network::rpc::{BlocksByRangeRequest, GoodbyeReason, RPCError, RequestType};
|
||||||
pub use lighthouse_network::service::api_types::RangeRequestId;
|
pub use lighthouse_network::service::api_types::RangeRequestId;
|
||||||
use lighthouse_network::service::api_types::{
|
use lighthouse_network::service::api_types::{
|
||||||
AppRequestId, BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
AppRequestId, BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
||||||
CustodyId, CustodyRequester, DataColumnsByRangeRequestId, DataColumnsByRootRequestId,
|
CustodyByRangeRequestId, CustodyId, CustodyRequester, DataColumnsByRangeRequestId,
|
||||||
DataColumnsByRootRequester, Id, SingleLookupReqId, SyncRequestId,
|
DataColumnsByRootRequestId, DataColumnsByRootRequester, Id, SingleLookupReqId, SyncRequestId,
|
||||||
};
|
};
|
||||||
use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource};
|
use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource};
|
||||||
use parking_lot::RwLock;
|
use parking_lot::RwLock;
|
||||||
@@ -36,7 +38,6 @@ use requests::{
|
|||||||
};
|
};
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use slot_clock::SlotClock;
|
use slot_clock::SlotClock;
|
||||||
use std::collections::hash_map::Entry;
|
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -47,11 +48,13 @@ use tokio::sync::mpsc;
|
|||||||
use tracing::{debug, error, span, warn, Level};
|
use tracing::{debug, error, span, warn, Level};
|
||||||
use types::blob_sidecar::FixedBlobSidecarList;
|
use types::blob_sidecar::FixedBlobSidecarList;
|
||||||
use types::{
|
use types::{
|
||||||
BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, ForkContext,
|
BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec,
|
||||||
Hash256, SignedBeaconBlock, Slot,
|
ForkContext, Hash256, SignedBeaconBlock, SignedBeaconBlockHeader, Slot,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub mod custody;
|
pub mod block_components_by_range;
|
||||||
|
pub mod custody_by_range;
|
||||||
|
pub mod custody_by_root;
|
||||||
mod requests;
|
mod requests;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@@ -72,32 +75,29 @@ impl<T> RpcEvent<T> {
|
|||||||
|
|
||||||
pub type RpcResponseResult<T> = Result<(T, Duration), RpcResponseError>;
|
pub type RpcResponseResult<T> = Result<(T, Duration), RpcResponseError>;
|
||||||
|
|
||||||
/// Duration = latest seen timestamp of all received data columns
|
pub type RpcResponseBatchResult<T> = Result<(T, PeerGroup, Duration), RpcResponseError>;
|
||||||
pub type CustodyByRootResult<T> =
|
|
||||||
Result<(DataColumnSidecarList<T>, PeerGroup, Duration), RpcResponseError>;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
/// Duration = latest seen timestamp of all received data columns
|
||||||
|
pub type CustodyByRootResult<T> = RpcResponseBatchResult<DataColumnSidecarList<T>>;
|
||||||
|
|
||||||
|
pub type CustodyByRangeResult<T> = RpcResponseBatchResult<DataColumnSidecarList<T>>;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
pub enum RpcResponseError {
|
pub enum RpcResponseError {
|
||||||
RpcError(#[allow(dead_code)] RPCError),
|
RpcError(#[allow(dead_code)] RPCError),
|
||||||
VerifyError(LookupVerifyError),
|
VerifyError(LookupVerifyError),
|
||||||
CustodyRequestError(#[allow(dead_code)] CustodyRequestError),
|
RequestExpired(String),
|
||||||
BlockComponentCouplingError(#[allow(dead_code)] String),
|
InternalError(#[allow(dead_code)] String),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub enum RpcRequestSendError {
|
pub enum RpcRequestSendError {
|
||||||
/// No peer available matching the required criteria
|
|
||||||
NoPeer(NoPeerError),
|
|
||||||
/// These errors should never happen, including unreachable custody errors or network send
|
/// These errors should never happen, including unreachable custody errors or network send
|
||||||
/// errors.
|
/// errors.
|
||||||
InternalError(String),
|
InternalError(String),
|
||||||
}
|
// If RpcRequestSendError has a single variant `InternalError` it's to signal to downstream
|
||||||
|
// consumers that sends are expected to be infallible. If this assumption changes in the future,
|
||||||
/// Type of peer missing that caused a `RpcRequestSendError::NoPeers`
|
// add a new variant.
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
pub enum NoPeerError {
|
|
||||||
BlockPeer,
|
|
||||||
CustodyPeer(ColumnIndex),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
@@ -150,6 +150,17 @@ impl PeerGroup {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn as_reversed_map(&self) -> HashMap<u64, PeerId> {
|
||||||
|
// TODO(das): should we change PeerGroup to hold this map?
|
||||||
|
let mut index_to_peer = HashMap::<u64, PeerId>::new();
|
||||||
|
for (peer, indices) in self.peers.iter() {
|
||||||
|
for &index in indices {
|
||||||
|
index_to_peer.insert(index as u64, *peer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
index_to_peer
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sequential ID that uniquely identifies ReqResp outgoing requests
|
/// Sequential ID that uniquely identifies ReqResp outgoing requests
|
||||||
@@ -195,12 +206,15 @@ pub struct SyncNetworkContext<T: BeaconChainTypes> {
|
|||||||
data_columns_by_range_requests:
|
data_columns_by_range_requests:
|
||||||
ActiveRequests<DataColumnsByRangeRequestId, DataColumnsByRangeRequestItems<T::EthSpec>>,
|
ActiveRequests<DataColumnsByRangeRequestId, DataColumnsByRangeRequestItems<T::EthSpec>>,
|
||||||
|
|
||||||
/// Mapping of active custody column requests for a block root
|
/// Mapping of active custody column by root requests for a block root
|
||||||
custody_by_root_requests: FnvHashMap<CustodyRequester, ActiveCustodyRequest<T>>,
|
custody_by_root_requests: FnvHashMap<CustodyRequester, ActiveCustodyByRootRequest<T>>,
|
||||||
|
|
||||||
|
/// Mapping of active custody column by range requests
|
||||||
|
custody_by_range_requests: FnvHashMap<CustodyByRangeRequestId, ActiveCustodyByRangeRequest<T>>,
|
||||||
|
|
||||||
/// BlocksByRange requests paired with other ByRange requests for data components
|
/// BlocksByRange requests paired with other ByRange requests for data components
|
||||||
components_by_range_requests:
|
block_components_by_range_requests:
|
||||||
FnvHashMap<ComponentsByRangeRequestId, RangeBlockComponentsRequest<T::EthSpec>>,
|
FnvHashMap<ComponentsByRangeRequestId, BlockComponentsByRangeRequest<T>>,
|
||||||
|
|
||||||
/// Whether the ee is online. If it's not, we don't allow access to the
|
/// Whether the ee is online. If it's not, we don't allow access to the
|
||||||
/// `beacon_processor_send`.
|
/// `beacon_processor_send`.
|
||||||
@@ -219,14 +233,17 @@ pub enum RangeBlockComponent<E: EthSpec> {
|
|||||||
Block(
|
Block(
|
||||||
BlocksByRangeRequestId,
|
BlocksByRangeRequestId,
|
||||||
RpcResponseResult<Vec<Arc<SignedBeaconBlock<E>>>>,
|
RpcResponseResult<Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||||
|
PeerId,
|
||||||
),
|
),
|
||||||
Blob(
|
Blob(
|
||||||
BlobsByRangeRequestId,
|
BlobsByRangeRequestId,
|
||||||
RpcResponseResult<Vec<Arc<BlobSidecar<E>>>>,
|
RpcResponseResult<Vec<Arc<BlobSidecar<E>>>>,
|
||||||
|
PeerId,
|
||||||
),
|
),
|
||||||
CustodyColumns(
|
CustodyColumns(
|
||||||
DataColumnsByRangeRequestId,
|
CustodyByRangeRequestId,
|
||||||
RpcResponseResult<Vec<Arc<DataColumnSidecar<E>>>>,
|
RpcResponseResult<Vec<Arc<DataColumnSidecar<E>>>>,
|
||||||
|
PeerGroup,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -283,7 +300,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
blobs_by_range_requests: ActiveRequests::new("blobs_by_range"),
|
blobs_by_range_requests: ActiveRequests::new("blobs_by_range"),
|
||||||
data_columns_by_range_requests: ActiveRequests::new("data_columns_by_range"),
|
data_columns_by_range_requests: ActiveRequests::new("data_columns_by_range"),
|
||||||
custody_by_root_requests: <_>::default(),
|
custody_by_root_requests: <_>::default(),
|
||||||
components_by_range_requests: FnvHashMap::default(),
|
custody_by_range_requests: <_>::default(),
|
||||||
|
block_components_by_range_requests: <_>::default(),
|
||||||
network_beacon_processor,
|
network_beacon_processor,
|
||||||
chain,
|
chain,
|
||||||
fork_context,
|
fork_context,
|
||||||
@@ -297,6 +315,14 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
|
|
||||||
/// Returns the ids of all the requests made to the given peer_id.
|
/// Returns the ids of all the requests made to the given peer_id.
|
||||||
pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Vec<SyncRequestId> {
|
pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Vec<SyncRequestId> {
|
||||||
|
self.active_requests()
|
||||||
|
.filter(|(_, request_peer)| *request_peer == peer_id)
|
||||||
|
.map(|(id, _)| id)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the ids of all active requests
|
||||||
|
pub fn active_requests(&mut self) -> impl Iterator<Item = (SyncRequestId, &PeerId)> {
|
||||||
// Note: using destructuring pattern without a default case to make sure we don't forget to
|
// Note: using destructuring pattern without a default case to make sure we don't forget to
|
||||||
// add new request types to this function. Otherwise, lookup sync can break and lookups
|
// add new request types to this function. Otherwise, lookup sync can break and lookups
|
||||||
// will get stuck if a peer disconnects during an active requests.
|
// will get stuck if a peer disconnects during an active requests.
|
||||||
@@ -311,8 +337,9 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
data_columns_by_range_requests,
|
data_columns_by_range_requests,
|
||||||
// custody_by_root_requests is a meta request of data_columns_by_root_requests
|
// custody_by_root_requests is a meta request of data_columns_by_root_requests
|
||||||
custody_by_root_requests: _,
|
custody_by_root_requests: _,
|
||||||
|
custody_by_range_requests: _,
|
||||||
// components_by_range_requests is a meta request of various _by_range requests
|
// components_by_range_requests is a meta request of various _by_range requests
|
||||||
components_by_range_requests: _,
|
block_components_by_range_requests: _,
|
||||||
execution_engine_state: _,
|
execution_engine_state: _,
|
||||||
network_beacon_processor: _,
|
network_beacon_processor: _,
|
||||||
chain: _,
|
chain: _,
|
||||||
@@ -320,29 +347,23 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
let blocks_by_root_ids = blocks_by_root_requests
|
let blocks_by_root_ids = blocks_by_root_requests
|
||||||
.active_requests_of_peer(peer_id)
|
.active_requests()
|
||||||
.into_iter()
|
.map(|(id, peer)| (SyncRequestId::SingleBlock { id: *id }, peer));
|
||||||
.map(|id| SyncRequestId::SingleBlock { id: *id });
|
|
||||||
let blobs_by_root_ids = blobs_by_root_requests
|
let blobs_by_root_ids = blobs_by_root_requests
|
||||||
.active_requests_of_peer(peer_id)
|
.active_requests()
|
||||||
.into_iter()
|
.map(|(id, peer)| (SyncRequestId::SingleBlob { id: *id }, peer));
|
||||||
.map(|id| SyncRequestId::SingleBlob { id: *id });
|
|
||||||
let data_column_by_root_ids = data_columns_by_root_requests
|
let data_column_by_root_ids = data_columns_by_root_requests
|
||||||
.active_requests_of_peer(peer_id)
|
.active_requests()
|
||||||
.into_iter()
|
.map(|(id, peer)| (SyncRequestId::DataColumnsByRoot(*id), peer));
|
||||||
.map(|req_id| SyncRequestId::DataColumnsByRoot(*req_id));
|
|
||||||
let blocks_by_range_ids = blocks_by_range_requests
|
let blocks_by_range_ids = blocks_by_range_requests
|
||||||
.active_requests_of_peer(peer_id)
|
.active_requests()
|
||||||
.into_iter()
|
.map(|(id, peer)| (SyncRequestId::BlocksByRange(*id), peer));
|
||||||
.map(|req_id| SyncRequestId::BlocksByRange(*req_id));
|
|
||||||
let blobs_by_range_ids = blobs_by_range_requests
|
let blobs_by_range_ids = blobs_by_range_requests
|
||||||
.active_requests_of_peer(peer_id)
|
.active_requests()
|
||||||
.into_iter()
|
.map(|(id, peer)| (SyncRequestId::BlobsByRange(*id), peer));
|
||||||
.map(|req_id| SyncRequestId::BlobsByRange(*req_id));
|
|
||||||
let data_column_by_range_ids = data_columns_by_range_requests
|
let data_column_by_range_ids = data_columns_by_range_requests
|
||||||
.active_requests_of_peer(peer_id)
|
.active_requests()
|
||||||
.into_iter()
|
.map(|(id, peer)| (SyncRequestId::DataColumnsByRange(*id), peer));
|
||||||
.map(|req_id| SyncRequestId::DataColumnsByRange(*req_id));
|
|
||||||
|
|
||||||
blocks_by_root_ids
|
blocks_by_root_ids
|
||||||
.chain(blobs_by_root_ids)
|
.chain(blobs_by_root_ids)
|
||||||
@@ -350,6 +371,18 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
.chain(blocks_by_range_ids)
|
.chain(blocks_by_range_ids)
|
||||||
.chain(blobs_by_range_ids)
|
.chain(blobs_by_range_ids)
|
||||||
.chain(data_column_by_range_ids)
|
.chain(data_column_by_range_ids)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn active_block_components_by_range_requests(
|
||||||
|
&self,
|
||||||
|
) -> Vec<(
|
||||||
|
ComponentsByRangeRequestId,
|
||||||
|
BlockComponentsByRangeRequestStep,
|
||||||
|
)> {
|
||||||
|
self.block_components_by_range_requests
|
||||||
|
.iter()
|
||||||
|
.map(|(id, req)| (*id, req.state_step()))
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -362,6 +395,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
&self.network_beacon_processor.network_globals
|
&self.network_beacon_processor.network_globals
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn spec(&self) -> &ChainSpec {
|
||||||
|
&self.chain.spec
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the Client type of the peer if known
|
/// Returns the Client type of the peer if known
|
||||||
pub fn client_type(&self, peer_id: &PeerId) -> Client {
|
pub fn client_type(&self, peer_id: &PeerId) -> Client {
|
||||||
self.network_globals()
|
self.network_globals()
|
||||||
@@ -414,8 +451,9 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
data_columns_by_range_requests,
|
data_columns_by_range_requests,
|
||||||
// custody_by_root_requests is a meta request of data_columns_by_root_requests
|
// custody_by_root_requests is a meta request of data_columns_by_root_requests
|
||||||
custody_by_root_requests: _,
|
custody_by_root_requests: _,
|
||||||
|
custody_by_range_requests: _,
|
||||||
// components_by_range_requests is a meta request of various _by_range requests
|
// components_by_range_requests is a meta request of various _by_range requests
|
||||||
components_by_range_requests: _,
|
block_components_by_range_requests: _,
|
||||||
execution_engine_state: _,
|
execution_engine_state: _,
|
||||||
network_beacon_processor: _,
|
network_beacon_processor: _,
|
||||||
chain: _,
|
chain: _,
|
||||||
@@ -447,205 +485,95 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
requester: RangeRequestId,
|
requester: RangeRequestId,
|
||||||
peers: &HashSet<PeerId>,
|
peers: &HashSet<PeerId>,
|
||||||
peers_to_deprioritize: &HashSet<PeerId>,
|
peers_to_deprioritize: &HashSet<PeerId>,
|
||||||
|
total_requests_per_peer: &HashMap<PeerId, usize>,
|
||||||
) -> Result<Id, RpcRequestSendError> {
|
) -> Result<Id, RpcRequestSendError> {
|
||||||
let batch_epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch());
|
|
||||||
let batch_type = self.batch_type(batch_epoch);
|
|
||||||
|
|
||||||
let active_request_count_by_peer = self.active_request_count_by_peer();
|
|
||||||
|
|
||||||
let Some(block_peer) = peers
|
|
||||||
.iter()
|
|
||||||
.map(|peer| {
|
|
||||||
(
|
|
||||||
// If contains -> 1 (order after), not contains -> 0 (order first)
|
|
||||||
peers_to_deprioritize.contains(peer),
|
|
||||||
// Prefer peers with less overall requests
|
|
||||||
active_request_count_by_peer.get(peer).copied().unwrap_or(0),
|
|
||||||
// Random factor to break ties, otherwise the PeerID breaks ties
|
|
||||||
rand::random::<u32>(),
|
|
||||||
peer,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.min()
|
|
||||||
.map(|(_, _, _, peer)| *peer)
|
|
||||||
else {
|
|
||||||
// Backfill and forward sync handle this condition gracefully.
|
|
||||||
// - Backfill sync: will pause waiting for more peers to join
|
|
||||||
// - Forward sync: can never happen as the chain is dropped when removing the last peer.
|
|
||||||
return Err(RpcRequestSendError::NoPeer(NoPeerError::BlockPeer));
|
|
||||||
};
|
|
||||||
|
|
||||||
// Attempt to find all required custody peers before sending any request or creating an ID
|
|
||||||
let columns_by_range_peers_to_request =
|
|
||||||
if matches!(batch_type, ByRangeRequestType::BlocksAndColumns) {
|
|
||||||
let column_indexes = self.network_globals().sampling_columns.clone();
|
|
||||||
Some(self.select_columns_by_range_peers_to_request(
|
|
||||||
&column_indexes,
|
|
||||||
peers,
|
|
||||||
active_request_count_by_peer,
|
|
||||||
peers_to_deprioritize,
|
|
||||||
)?)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
// Create the overall components_by_range request ID before its individual components
|
|
||||||
let id = ComponentsByRangeRequestId {
|
let id = ComponentsByRangeRequestId {
|
||||||
id: self.next_id(),
|
id: self.next_id(),
|
||||||
requester,
|
requester,
|
||||||
};
|
};
|
||||||
|
|
||||||
let blocks_req_id = self.send_blocks_by_range_request(block_peer, request.clone(), id)?;
|
let req = BlockComponentsByRangeRequest::new(
|
||||||
|
id,
|
||||||
|
request,
|
||||||
|
peers,
|
||||||
|
peers_to_deprioritize,
|
||||||
|
total_requests_per_peer,
|
||||||
|
self,
|
||||||
|
)?;
|
||||||
|
|
||||||
let blobs_req_id = if matches!(batch_type, ByRangeRequestType::BlocksAndBlobs) {
|
self.block_components_by_range_requests.insert(id, req);
|
||||||
Some(self.send_blobs_by_range_request(
|
|
||||||
block_peer,
|
|
||||||
BlobsByRangeRequest {
|
|
||||||
start_slot: *request.start_slot(),
|
|
||||||
count: *request.count(),
|
|
||||||
},
|
|
||||||
id,
|
|
||||||
)?)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
let data_column_requests = columns_by_range_peers_to_request
|
|
||||||
.map(|columns_by_range_peers_to_request| {
|
|
||||||
let column_to_peer_map = columns_by_range_peers_to_request
|
|
||||||
.iter()
|
|
||||||
.flat_map(|(peer_id, columns)| columns.iter().map(|column| (*column, *peer_id)))
|
|
||||||
.collect::<HashMap<ColumnIndex, PeerId>>();
|
|
||||||
|
|
||||||
let requests = columns_by_range_peers_to_request
|
|
||||||
.into_iter()
|
|
||||||
.map(|(peer_id, columns)| {
|
|
||||||
self.send_data_columns_by_range_request(
|
|
||||||
peer_id,
|
|
||||||
DataColumnsByRangeRequest {
|
|
||||||
start_slot: *request.start_slot(),
|
|
||||||
count: *request.count(),
|
|
||||||
columns,
|
|
||||||
},
|
|
||||||
id,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
|
||||||
|
|
||||||
Ok((requests, column_to_peer_map))
|
|
||||||
})
|
|
||||||
.transpose()?;
|
|
||||||
|
|
||||||
let info =
|
|
||||||
RangeBlockComponentsRequest::new(blocks_req_id, blobs_req_id, data_column_requests);
|
|
||||||
self.components_by_range_requests.insert(id, info);
|
|
||||||
|
|
||||||
|
// TODO: use ID
|
||||||
Ok(id.id)
|
Ok(id.id)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn select_columns_by_range_peers_to_request(
|
/// Received a blocks by range or blobs by range response for a request that couples blocks '
|
||||||
&self,
|
/// and blobs.
|
||||||
custody_indexes: &HashSet<ColumnIndex>,
|
|
||||||
peers: &HashSet<PeerId>,
|
|
||||||
active_request_count_by_peer: HashMap<PeerId, usize>,
|
|
||||||
peers_to_deprioritize: &HashSet<PeerId>,
|
|
||||||
) -> Result<HashMap<PeerId, Vec<ColumnIndex>>, RpcRequestSendError> {
|
|
||||||
let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
|
|
||||||
|
|
||||||
for column_index in custody_indexes {
|
|
||||||
// Strictly consider peers that are custodials of this column AND are part of this
|
|
||||||
// syncing chain. If the forward range sync chain has few peers, it's likely that this
|
|
||||||
// function will not be able to find peers on our custody columns.
|
|
||||||
let Some(custody_peer) = peers
|
|
||||||
.iter()
|
|
||||||
.filter(|peer| {
|
|
||||||
self.network_globals()
|
|
||||||
.is_custody_peer_of(*column_index, peer)
|
|
||||||
})
|
|
||||||
.map(|peer| {
|
|
||||||
(
|
|
||||||
// If contains -> 1 (order after), not contains -> 0 (order first)
|
|
||||||
peers_to_deprioritize.contains(peer),
|
|
||||||
// Prefer peers with less overall requests
|
|
||||||
// Also account for requests that are not yet issued tracked in peer_id_to_request_map
|
|
||||||
// We batch requests to the same peer, so count existance in the
|
|
||||||
// `columns_to_request_by_peer` as a single 1 request.
|
|
||||||
active_request_count_by_peer.get(peer).copied().unwrap_or(0)
|
|
||||||
+ columns_to_request_by_peer.get(peer).map(|_| 1).unwrap_or(0),
|
|
||||||
// Random factor to break ties, otherwise the PeerID breaks ties
|
|
||||||
rand::random::<u32>(),
|
|
||||||
peer,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.min()
|
|
||||||
.map(|(_, _, _, peer)| *peer)
|
|
||||||
else {
|
|
||||||
// TODO(das): this will be pretty bad UX. To improve we should:
|
|
||||||
// - Handle the no peers case gracefully, maybe add some timeout and give a few
|
|
||||||
// minutes / seconds to the peer manager to locate peers on this subnet before
|
|
||||||
// abandoing progress on the chain completely.
|
|
||||||
return Err(RpcRequestSendError::NoPeer(NoPeerError::CustodyPeer(
|
|
||||||
*column_index,
|
|
||||||
)));
|
|
||||||
};
|
|
||||||
|
|
||||||
columns_to_request_by_peer
|
|
||||||
.entry(custody_peer)
|
|
||||||
.or_default()
|
|
||||||
.push(*column_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(columns_to_request_by_peer)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Received a _by_range response for a request that couples blocks and its data
|
|
||||||
///
|
|
||||||
/// `peer_id` is the peer that served this individual RPC _by_range response.
|
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
pub fn range_block_component_response(
|
pub fn on_block_components_by_range_response(
|
||||||
&mut self,
|
&mut self,
|
||||||
id: ComponentsByRangeRequestId,
|
id: ComponentsByRangeRequestId,
|
||||||
peer_id: PeerId,
|
|
||||||
range_block_component: RangeBlockComponent<T::EthSpec>,
|
range_block_component: RangeBlockComponent<T::EthSpec>,
|
||||||
) -> Option<Result<(Vec<RpcBlock<T::EthSpec>>, BatchPeers), RpcResponseError>> {
|
) -> Option<Result<(Vec<RpcBlock<T::EthSpec>>, BatchPeers), RpcResponseError>> {
|
||||||
let Entry::Occupied(mut entry) = self.components_by_range_requests.entry(id) else {
|
// Note: need to remove the request to borrow self again below. Otherwise we can't
|
||||||
metrics::inc_counter_vec(&metrics::SYNC_UNKNOWN_NETWORK_REQUESTS, &["range_blocks"]);
|
// do nested requests
|
||||||
|
let Some(mut request) = self.block_components_by_range_requests.remove(&id) else {
|
||||||
|
metrics::inc_counter_vec(
|
||||||
|
&metrics::SYNC_UNKNOWN_NETWORK_REQUESTS,
|
||||||
|
&["block_components_by_range"],
|
||||||
|
);
|
||||||
return None;
|
return None;
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Err(e) = {
|
let result = match range_block_component {
|
||||||
let request = entry.get_mut();
|
RangeBlockComponent::Block(req_id, resp, peer_id) => resp.and_then(|(blocks, _)| {
|
||||||
match range_block_component {
|
request
|
||||||
RangeBlockComponent::Block(req_id, resp) => resp.and_then(|(blocks, _)| {
|
.on_blocks_by_range_result(req_id, blocks, peer_id, self)
|
||||||
|
.map_err(Into::<RpcResponseError>::into)
|
||||||
|
}),
|
||||||
|
RangeBlockComponent::Blob(req_id, resp, peer_id) => resp.and_then(|(blobs, _)| {
|
||||||
|
request
|
||||||
|
.on_blobs_by_range_result(req_id, blobs, peer_id, self)
|
||||||
|
.map_err(Into::<RpcResponseError>::into)
|
||||||
|
}),
|
||||||
|
RangeBlockComponent::CustodyColumns(req_id, resp, peers) => {
|
||||||
|
resp.and_then(|(custody_columns, _)| {
|
||||||
request
|
request
|
||||||
.add_blocks(req_id, blocks, peer_id)
|
.on_custody_by_range_result(req_id, custody_columns, peers, self)
|
||||||
.map_err(RpcResponseError::BlockComponentCouplingError)
|
.map_err(Into::<RpcResponseError>::into)
|
||||||
}),
|
})
|
||||||
RangeBlockComponent::Blob(req_id, resp) => resp.and_then(|(blobs, _)| {
|
|
||||||
request
|
|
||||||
.add_blobs(req_id, blobs, peer_id)
|
|
||||||
.map_err(RpcResponseError::BlockComponentCouplingError)
|
|
||||||
}),
|
|
||||||
RangeBlockComponent::CustodyColumns(req_id, resp) => {
|
|
||||||
resp.and_then(|(custody_columns, _)| {
|
|
||||||
request
|
|
||||||
.add_custody_columns(req_id, custody_columns, peer_id)
|
|
||||||
.map_err(RpcResponseError::BlockComponentCouplingError)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} {
|
};
|
||||||
entry.remove();
|
|
||||||
return Some(Err(e));
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(blocks_result) = entry.get().responses(&self.chain.spec) {
|
let result = result.transpose();
|
||||||
entry.remove();
|
|
||||||
// If the request is finished, dequeue everything
|
// Convert a result from internal format of `ActiveCustodyRequest` (error first to use ?) to
|
||||||
Some(blocks_result.map_err(RpcResponseError::BlockComponentCouplingError))
|
// an Option first to use in an `if let Some() { act on result }` block.
|
||||||
} else {
|
match result.as_ref() {
|
||||||
None
|
Some(Ok((blocks, peer_group))) => {
|
||||||
|
let blocks_with_data = blocks
|
||||||
|
.iter()
|
||||||
|
.filter(|block| block.as_block().has_data())
|
||||||
|
.count();
|
||||||
|
// Don't log the peer_group here, it's very long (could be up to 128 peers). If you
|
||||||
|
// want to trace which peer sent the column at index X, search for the log:
|
||||||
|
// `Sync RPC request sent method="DataColumnsByRange" ...`
|
||||||
|
debug!(
|
||||||
|
%id,
|
||||||
|
blocks = blocks.len(),
|
||||||
|
blocks_with_data,
|
||||||
|
block_peer = ?peer_group.block(),
|
||||||
|
"Block components by range request success, removing"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Some(Err(e)) => {
|
||||||
|
debug!(%id, error = ?e, "Block components by range request failure, removing" )
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
self.block_components_by_range_requests.insert(id, request);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Request block of `block_root` if necessary by checking:
|
/// Request block of `block_root` if necessary by checking:
|
||||||
@@ -853,7 +781,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Request to send a single `data_columns_by_root` request to the network.
|
/// Request to send a single `data_columns_by_root` request to the network.
|
||||||
pub fn data_column_lookup_request(
|
pub fn data_columns_by_root_request(
|
||||||
&mut self,
|
&mut self,
|
||||||
requester: DataColumnsByRootRequester,
|
requester: DataColumnsByRootRequester,
|
||||||
peer_id: PeerId,
|
peer_id: PeerId,
|
||||||
@@ -951,7 +879,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
);
|
);
|
||||||
|
|
||||||
let requester = CustodyRequester(id);
|
let requester = CustodyRequester(id);
|
||||||
let mut request = ActiveCustodyRequest::new(
|
let mut request = ActiveCustodyByRootRequest::new(
|
||||||
block_root,
|
block_root,
|
||||||
CustodyId { requester },
|
CustodyId { requester },
|
||||||
&custody_indexes_to_fetch,
|
&custody_indexes_to_fetch,
|
||||||
@@ -967,25 +895,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
self.custody_by_root_requests.insert(requester, request);
|
self.custody_by_root_requests.insert(requester, request);
|
||||||
Ok(LookupRequestResult::RequestSent(id.req_id))
|
Ok(LookupRequestResult::RequestSent(id.req_id))
|
||||||
}
|
}
|
||||||
Err(e) => Err(match e {
|
Err(e) => Err(e.into()),
|
||||||
CustodyRequestError::NoPeer(column_index) => {
|
|
||||||
RpcRequestSendError::NoPeer(NoPeerError::CustodyPeer(column_index))
|
|
||||||
}
|
|
||||||
// - TooManyFailures: Should never happen, `request` has just been created, it's
|
|
||||||
// count of download_failures is 0 here
|
|
||||||
// - BadState: Should never happen, a bad state can only happen when handling a
|
|
||||||
// network response
|
|
||||||
// - UnexpectedRequestId: Never happens: this Err is only constructed handling a
|
|
||||||
// download or processing response
|
|
||||||
// - SendFailed: Should never happen unless in a bad drop sequence when shutting
|
|
||||||
// down the node
|
|
||||||
e @ (CustodyRequestError::TooManyFailures
|
|
||||||
| CustodyRequestError::BadState { .. }
|
|
||||||
| CustodyRequestError::UnexpectedRequestId { .. }
|
|
||||||
| CustodyRequestError::SendFailed { .. }) => {
|
|
||||||
RpcRequestSendError::InternalError(format!("{e:?}"))
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1073,8 +983,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
peer_id: PeerId,
|
peer_id: PeerId,
|
||||||
request: DataColumnsByRangeRequest,
|
request: DataColumnsByRangeRequest,
|
||||||
parent_request_id: ComponentsByRangeRequestId,
|
parent_request_id: CustodyByRangeRequestId,
|
||||||
) -> Result<DataColumnsByRangeRequestId, RpcRequestSendError> {
|
) -> Result<DataColumnsByRangeRequestId, &'static str> {
|
||||||
let id = DataColumnsByRangeRequestId {
|
let id = DataColumnsByRangeRequestId {
|
||||||
id: self.next_id(),
|
id: self.next_id(),
|
||||||
parent_request_id,
|
parent_request_id,
|
||||||
@@ -1085,7 +995,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
request: RequestType::DataColumnsByRange(request.clone()),
|
request: RequestType::DataColumnsByRange(request.clone()),
|
||||||
app_request_id: AppRequestId::Sync(SyncRequestId::DataColumnsByRange(id)),
|
app_request_id: AppRequestId::Sync(SyncRequestId::DataColumnsByRange(id)),
|
||||||
})
|
})
|
||||||
.map_err(|_| RpcRequestSendError::InternalError("network send error".to_owned()))?;
|
.map_err(|_| "network send error")?;
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
method = "DataColumnsByRange",
|
method = "DataColumnsByRange",
|
||||||
@@ -1108,6 +1018,50 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
Ok(id)
|
Ok(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Request to fetch all needed custody columns of a range of slot. This function may not send
|
||||||
|
/// any request to the network if no columns have to be fetched based on the import state of the
|
||||||
|
/// node. A custody request is a "super request" that may trigger 0 or more `data_columns_by_range`
|
||||||
|
/// requests.
|
||||||
|
pub fn send_custody_by_range_request(
|
||||||
|
&mut self,
|
||||||
|
parent_id: ComponentsByRangeRequestId,
|
||||||
|
blocks_with_data: Vec<SignedBeaconBlockHeader>,
|
||||||
|
epoch: Epoch,
|
||||||
|
column_indices: Vec<ColumnIndex>,
|
||||||
|
lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||||
|
) -> Result<CustodyByRangeRequestId, RpcRequestSendError> {
|
||||||
|
let id = CustodyByRangeRequestId {
|
||||||
|
id: self.next_id(),
|
||||||
|
parent_request_id: parent_id,
|
||||||
|
};
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
indices = ?column_indices,
|
||||||
|
%id,
|
||||||
|
"Starting custody columns by range request"
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut request = ActiveCustodyByRangeRequest::new(
|
||||||
|
id,
|
||||||
|
epoch,
|
||||||
|
blocks_with_data,
|
||||||
|
&column_indices,
|
||||||
|
lookup_peers,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Note that you can only send, but not handle a response here
|
||||||
|
match request.continue_requests(self) {
|
||||||
|
Ok(_) => {
|
||||||
|
// Ignoring the result of `continue_requests` is okay. A request that has just been
|
||||||
|
// created cannot return data immediately, it must send some request to the network
|
||||||
|
// first. And there must exist some request, `custody_indexes_to_fetch` is not empty.
|
||||||
|
self.custody_by_range_requests.insert(id, request);
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
Err(e) => Err(e.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_execution_engine_online(&self) -> bool {
|
pub fn is_execution_engine_online(&self) -> bool {
|
||||||
self.execution_engine_state == EngineState::Online
|
self.execution_engine_state == EngineState::Online
|
||||||
}
|
}
|
||||||
@@ -1212,34 +1166,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
id
|
id
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check whether a batch for this epoch (and only this epoch) should request just blocks or
|
|
||||||
/// blocks and blobs.
|
|
||||||
fn batch_type(&self, epoch: types::Epoch) -> ByRangeRequestType {
|
|
||||||
// Induces a compile time panic if this doesn't hold true.
|
|
||||||
#[allow(clippy::assertions_on_constants)]
|
|
||||||
const _: () = assert!(
|
|
||||||
super::backfill_sync::BACKFILL_EPOCHS_PER_BATCH == 1
|
|
||||||
&& super::range_sync::EPOCHS_PER_BATCH == 1,
|
|
||||||
"To deal with alignment with deneb boundaries, batches need to be of just one epoch"
|
|
||||||
);
|
|
||||||
|
|
||||||
if self
|
|
||||||
.chain
|
|
||||||
.data_availability_checker
|
|
||||||
.data_columns_required_for_epoch(epoch)
|
|
||||||
{
|
|
||||||
ByRangeRequestType::BlocksAndColumns
|
|
||||||
} else if self
|
|
||||||
.chain
|
|
||||||
.data_availability_checker
|
|
||||||
.blobs_required_for_epoch(epoch)
|
|
||||||
{
|
|
||||||
ByRangeRequestType::BlocksAndBlobs
|
|
||||||
} else {
|
|
||||||
ByRangeRequestType::Blocks
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Attempt to make progress on all custody_by_root requests. Some request may be stale waiting
|
/// Attempt to make progress on all custody_by_root requests. Some request may be stale waiting
|
||||||
/// for custody peers. Returns a Vec of results as zero or more requests may fail in this
|
/// for custody peers. Returns a Vec of results as zero or more requests may fail in this
|
||||||
/// attempt.
|
/// attempt.
|
||||||
@@ -1266,6 +1192,32 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attempt to make progress on all custody_by_range requests. Some request may be stale waiting
|
||||||
|
/// for custody peers. Returns a Vec of results as zero or more requests may fail in this
|
||||||
|
/// attempt.
|
||||||
|
pub fn continue_custody_by_range_requests(
|
||||||
|
&mut self,
|
||||||
|
) -> Vec<(CustodyByRangeRequestId, CustodyByRangeResult<T::EthSpec>)> {
|
||||||
|
let ids = self
|
||||||
|
.custody_by_range_requests
|
||||||
|
.keys()
|
||||||
|
.copied()
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// Need to collect ids and results in separate steps to re-borrow self.
|
||||||
|
ids.into_iter()
|
||||||
|
.filter_map(|id| {
|
||||||
|
let mut request = self
|
||||||
|
.custody_by_range_requests
|
||||||
|
.remove(&id)
|
||||||
|
.expect("key of hashmap");
|
||||||
|
let result = request.continue_requests(self);
|
||||||
|
self.handle_custody_by_range_result(id, request, result)
|
||||||
|
.map(|result| (id, result))
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
// Request handlers
|
// Request handlers
|
||||||
|
|
||||||
pub(crate) fn on_single_block_response(
|
pub(crate) fn on_single_block_response(
|
||||||
@@ -1425,8 +1377,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
// Note: need to remove the request to borrow self again below. Otherwise we can't
|
// Note: need to remove the request to borrow self again below. Otherwise we can't
|
||||||
// do nested requests
|
// do nested requests
|
||||||
let Some(mut request) = self.custody_by_root_requests.remove(&id.requester) else {
|
let Some(mut request) = self.custody_by_root_requests.remove(&id.requester) else {
|
||||||
// TOOD(das): This log can happen if the request is error'ed early and dropped
|
metrics::inc_counter_vec(
|
||||||
debug!(?id, "Custody column downloaded event for unknown request");
|
&metrics::SYNC_UNKNOWN_NETWORK_REQUESTS,
|
||||||
|
&["custody_by_root"],
|
||||||
|
);
|
||||||
return None;
|
return None;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -1438,8 +1392,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
fn handle_custody_by_root_result(
|
fn handle_custody_by_root_result(
|
||||||
&mut self,
|
&mut self,
|
||||||
id: CustodyRequester,
|
id: CustodyRequester,
|
||||||
request: ActiveCustodyRequest<T>,
|
request: ActiveCustodyByRootRequest<T>,
|
||||||
result: CustodyRequestResult<T::EthSpec>,
|
result: CustodyByRootRequestResult<T::EthSpec>,
|
||||||
) -> Option<CustodyByRootResult<T::EthSpec>> {
|
) -> Option<CustodyByRootResult<T::EthSpec>> {
|
||||||
let span = span!(
|
let span = span!(
|
||||||
Level::INFO,
|
Level::INFO,
|
||||||
@@ -1448,18 +1402,16 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
);
|
);
|
||||||
let _enter = span.enter();
|
let _enter = span.enter();
|
||||||
|
|
||||||
let result = result
|
let result = result.map_err(Into::<RpcResponseError>::into).transpose();
|
||||||
.map_err(RpcResponseError::CustodyRequestError)
|
|
||||||
.transpose();
|
|
||||||
|
|
||||||
// Convert a result from internal format of `ActiveCustodyRequest` (error first to use ?) to
|
// Convert a result from internal format of `ActiveCustodyRequest` (error first to use ?) to
|
||||||
// an Option first to use in an `if let Some() { act on result }` block.
|
// an Option first to use in an `if let Some() { act on result }` block.
|
||||||
match result.as_ref() {
|
match result.as_ref() {
|
||||||
Some(Ok((columns, peer_group, _))) => {
|
Some(Ok((columns, peer_group, _))) => {
|
||||||
debug!(?id, count = columns.len(), peers = ?peer_group, "Custody request success, removing")
|
debug!(%id, count = columns.len(), peers = ?peer_group, "Custody by root request success, removing")
|
||||||
}
|
}
|
||||||
Some(Err(e)) => {
|
Some(Err(e)) => {
|
||||||
debug!(?id, error = ?e, "Custody request failure, removing" )
|
debug!(%id, error = ?e, "Custody by root request failure, removing" )
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
self.custody_by_root_requests.insert(id, request);
|
self.custody_by_root_requests.insert(id, request);
|
||||||
@@ -1468,6 +1420,61 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Insert a downloaded column into an active custody request. Then make progress on the
|
||||||
|
/// entire request.
|
||||||
|
///
|
||||||
|
/// ### Returns
|
||||||
|
///
|
||||||
|
/// - `Some`: Request completed, won't make more progress. Expect requester to act on the result.
|
||||||
|
/// - `None`: Request still active, requester should do no action
|
||||||
|
#[allow(clippy::type_complexity)]
|
||||||
|
pub fn on_custody_by_range_response(
|
||||||
|
&mut self,
|
||||||
|
id: CustodyByRangeRequestId,
|
||||||
|
req_id: DataColumnsByRangeRequestId,
|
||||||
|
peer_id: PeerId,
|
||||||
|
resp: RpcResponseResult<Vec<Arc<DataColumnSidecar<T::EthSpec>>>>,
|
||||||
|
) -> Option<CustodyByRootResult<T::EthSpec>> {
|
||||||
|
// Note: need to remove the request to borrow self again below. Otherwise we can't
|
||||||
|
// do nested requests
|
||||||
|
let Some(mut request) = self.custody_by_range_requests.remove(&id) else {
|
||||||
|
// TOOD(das): This log can happen if the request is error'ed early and dropped
|
||||||
|
debug!(%id, "Custody by range downloaded event for unknown request");
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = request.on_data_column_downloaded(peer_id, req_id, resp, self);
|
||||||
|
|
||||||
|
self.handle_custody_by_range_result(id, request, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_custody_by_range_result(
|
||||||
|
&mut self,
|
||||||
|
id: CustodyByRangeRequestId,
|
||||||
|
request: ActiveCustodyByRangeRequest<T>,
|
||||||
|
result: CustodyByRangeRequestResult<T::EthSpec>,
|
||||||
|
) -> Option<CustodyByRangeResult<T::EthSpec>> {
|
||||||
|
let result = result.map_err(Into::<RpcResponseError>::into).transpose();
|
||||||
|
|
||||||
|
// Convert a result from internal format of `ActiveCustodyRequest` (error first to use ?) to
|
||||||
|
// an Option first to use in an `if let Some() { act on result }` block.
|
||||||
|
match result.as_ref() {
|
||||||
|
Some(Ok((columns, _peer_group, _))) => {
|
||||||
|
// Don't log the peer_group here, it's very long (could be up to 128 peers). If you
|
||||||
|
// want to trace which peer sent the column at index X, search for the log:
|
||||||
|
// `Sync RPC request sent method="DataColumnsByRange" ...`
|
||||||
|
debug!(%id, count = columns.len(), "Custody by range request success, removing")
|
||||||
|
}
|
||||||
|
Some(Err(e)) => {
|
||||||
|
debug!(%id, error = ?e, "Custody by range request failure, removing" )
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
self.custody_by_range_requests.insert(id, request);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
pub fn send_block_for_processing(
|
pub fn send_block_for_processing(
|
||||||
&self,
|
&self,
|
||||||
id: Id,
|
id: Id,
|
||||||
@@ -1529,7 +1536,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
.beacon_processor_if_enabled()
|
.beacon_processor_if_enabled()
|
||||||
.ok_or(SendErrorProcessor::ProcessorNotAvailable)?;
|
.ok_or(SendErrorProcessor::ProcessorNotAvailable)?;
|
||||||
|
|
||||||
debug!(?block_root, ?id, "Sending blobs for processing");
|
debug!(?block_root, %id, "Sending blobs for processing");
|
||||||
// Lookup sync event safety: If `beacon_processor.send_rpc_blobs` returns Ok() sync
|
// Lookup sync event safety: If `beacon_processor.send_rpc_blobs` returns Ok() sync
|
||||||
// must receive a single `SyncMessage::BlockComponentProcessed` event with this process type
|
// must receive a single `SyncMessage::BlockComponentProcessed` event with this process type
|
||||||
beacon_processor
|
beacon_processor
|
||||||
@@ -1600,8 +1607,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
|||||||
),
|
),
|
||||||
("custody_by_root", self.custody_by_root_requests.len()),
|
("custody_by_root", self.custody_by_root_requests.len()),
|
||||||
(
|
(
|
||||||
"components_by_range",
|
"block_components_by_range",
|
||||||
self.components_by_range_requests.len(),
|
self.block_components_by_range_requests.len(),
|
||||||
),
|
),
|
||||||
] {
|
] {
|
||||||
metrics::set_gauge_vec(&metrics::SYNC_ACTIVE_NETWORK_REQUESTS, &[id], count as i64);
|
metrics::set_gauge_vec(&metrics::SYNC_ACTIVE_NETWORK_REQUESTS, &[id], count as i64);
|
||||||
|
|||||||
@@ -0,0 +1,550 @@
|
|||||||
|
use crate::sync::network_context::{
|
||||||
|
PeerGroup, RpcRequestSendError, RpcResponseError, SyncNetworkContext,
|
||||||
|
};
|
||||||
|
use crate::sync::range_sync::BatchPeers;
|
||||||
|
use beacon_chain::block_verification_types::RpcBlock;
|
||||||
|
use beacon_chain::data_column_verification::CustodyDataColumn;
|
||||||
|
use beacon_chain::{get_block_root, BeaconChainTypes};
|
||||||
|
use lighthouse_network::rpc::methods::{BlobsByRangeRequest, BlocksByRangeRequest};
|
||||||
|
use lighthouse_network::service::api_types::{
|
||||||
|
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
||||||
|
CustodyByRangeRequestId,
|
||||||
|
};
|
||||||
|
use lighthouse_network::PeerId;
|
||||||
|
use parking_lot::RwLock;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use types::{
|
||||||
|
BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, EthSpec, Hash256, RuntimeVariableList,
|
||||||
|
SignedBeaconBlock, Slot,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct BlockComponentsByRangeRequest<T: BeaconChainTypes> {
|
||||||
|
id: ComponentsByRangeRequestId,
|
||||||
|
peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||||
|
request: BlocksByRangeRequest,
|
||||||
|
state: State<T::EthSpec>,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum State<E: EthSpec> {
|
||||||
|
Base {
|
||||||
|
blocks_by_range_request:
|
||||||
|
ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||||
|
},
|
||||||
|
// Two single concurrent requests for block + blobs
|
||||||
|
DenebEnabled {
|
||||||
|
blocks_by_range_request:
|
||||||
|
ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||||
|
blobs_by_range_request: ByRangeRequest<BlobsByRangeRequestId, Vec<Arc<BlobSidecar<E>>>>,
|
||||||
|
},
|
||||||
|
// Request blocks first, then columns
|
||||||
|
FuluEnabled(FuluEnabledState<E>),
|
||||||
|
}
|
||||||
|
|
||||||
|
enum FuluEnabledState<E: EthSpec> {
|
||||||
|
BlockRequest {
|
||||||
|
blocks_by_range_request:
|
||||||
|
ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||||
|
},
|
||||||
|
CustodyRequest {
|
||||||
|
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||||
|
block_peer: PeerId,
|
||||||
|
custody_by_range_request:
|
||||||
|
ByRangeRequest<CustodyByRangeRequestId, Vec<Arc<DataColumnSidecar<E>>>, PeerGroup>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ByRangeRequest<I: PartialEq + std::fmt::Display, T, P = PeerId> {
|
||||||
|
/// Active(RequestIndex)
|
||||||
|
Active(I),
|
||||||
|
/// Complete(DownloadedData, Peers)
|
||||||
|
Complete(T, P),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type BlockComponentsByRangeRequestResult<E> =
|
||||||
|
Result<Option<(Vec<RpcBlock<E>>, BatchPeers)>, Error>;
|
||||||
|
|
||||||
|
pub enum Error {
|
||||||
|
InternalError(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Error> for RpcResponseError {
|
||||||
|
fn from(e: Error) -> Self {
|
||||||
|
match e {
|
||||||
|
Error::InternalError(e) => RpcResponseError::InternalError(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Error> for RpcRequestSendError {
|
||||||
|
fn from(e: Error) -> Self {
|
||||||
|
match e {
|
||||||
|
Error::InternalError(e) => RpcRequestSendError::InternalError(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// FOR TESTING ONLY
|
||||||
|
#[cfg(test)]
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum BlockComponentsByRangeRequestStep {
|
||||||
|
BlocksRequest,
|
||||||
|
CustodyRequest,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: BeaconChainTypes> BlockComponentsByRangeRequest<T> {
|
||||||
|
pub fn new(
|
||||||
|
id: ComponentsByRangeRequestId,
|
||||||
|
request: BlocksByRangeRequest,
|
||||||
|
peers: &HashSet<PeerId>,
|
||||||
|
peers_to_deprioritize: &HashSet<PeerId>,
|
||||||
|
total_requests_per_peer: &HashMap<PeerId, usize>,
|
||||||
|
cx: &mut SyncNetworkContext<T>,
|
||||||
|
) -> Result<Self, RpcRequestSendError> {
|
||||||
|
// Induces a compile time panic if this doesn't hold true.
|
||||||
|
#[allow(clippy::assertions_on_constants)]
|
||||||
|
const _: () = assert!(
|
||||||
|
super::super::backfill_sync::BACKFILL_EPOCHS_PER_BATCH == 1
|
||||||
|
&& super::super::range_sync::EPOCHS_PER_BATCH == 1,
|
||||||
|
"To deal with alignment with deneb boundaries, batches need to be of just one epoch"
|
||||||
|
);
|
||||||
|
// The assertion above ensures each batch is in one single epoch
|
||||||
|
let batch_epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch());
|
||||||
|
let batch_fork = cx.spec().fork_name_at_epoch(batch_epoch);
|
||||||
|
|
||||||
|
// TODO(das): a change of behaviour here is that if the SyncingChain has a single peer we
|
||||||
|
// will request all blocks for the first 5 epochs to that same single peer. Before we would
|
||||||
|
// query only idle peers in the syncing chain.
|
||||||
|
let Some(block_peer) = peers
|
||||||
|
.iter()
|
||||||
|
.map(|peer| {
|
||||||
|
(
|
||||||
|
// If contains -> 1 (order after), not contains -> 0 (order first)
|
||||||
|
peers_to_deprioritize.contains(peer),
|
||||||
|
// TODO(das): Should we use active_request_count_by_peer?
|
||||||
|
// Prefer peers with less overall requests
|
||||||
|
// active_request_count_by_peer.get(peer).copied().unwrap_or(0),
|
||||||
|
// Prefer peers with less total cummulative requests, so we fetch data from a
|
||||||
|
// diverse set of peers
|
||||||
|
total_requests_per_peer.get(peer).copied().unwrap_or(0),
|
||||||
|
// Random factor to break ties, otherwise the PeerID breaks ties
|
||||||
|
rand::random::<u32>(),
|
||||||
|
peer,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.min()
|
||||||
|
.map(|(_, _, _, peer)| *peer)
|
||||||
|
else {
|
||||||
|
// When a peer disconnects and is removed from the SyncingChain peer set, if the set
|
||||||
|
// reaches zero the SyncingChain is removed.
|
||||||
|
// TODO(das): add test for this.
|
||||||
|
return Err(RpcRequestSendError::InternalError(
|
||||||
|
"A batch peer set should never be empty".to_string(),
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
let blocks_req_id = cx.send_blocks_by_range_request(block_peer, request.clone(), id)?;
|
||||||
|
|
||||||
|
let state = if batch_fork.fulu_enabled() {
|
||||||
|
State::FuluEnabled(FuluEnabledState::BlockRequest {
|
||||||
|
blocks_by_range_request: ByRangeRequest::Active(blocks_req_id),
|
||||||
|
})
|
||||||
|
} else if batch_fork.deneb_enabled() {
|
||||||
|
// TODO(deneb): is it okay to send blobs_by_range requests outside the DA window? I
|
||||||
|
// would like the beacon processor / da_checker to be the one that decides if an
|
||||||
|
// RpcBlock is valid or not with respect to containing blobs. Having sync not even
|
||||||
|
// attempt a requests seems like an added limitation.
|
||||||
|
let blobs_req_id = cx.send_blobs_by_range_request(
|
||||||
|
block_peer,
|
||||||
|
BlobsByRangeRequest {
|
||||||
|
start_slot: *request.start_slot(),
|
||||||
|
count: *request.count(),
|
||||||
|
},
|
||||||
|
id,
|
||||||
|
)?;
|
||||||
|
State::DenebEnabled {
|
||||||
|
blocks_by_range_request: ByRangeRequest::Active(blocks_req_id),
|
||||||
|
blobs_by_range_request: ByRangeRequest::Active(blobs_req_id),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
State::Base {
|
||||||
|
blocks_by_range_request: ByRangeRequest::Active(blocks_req_id),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
id,
|
||||||
|
// TODO(das): share the rwlock with the range sync batch. Are peers added to the batch
|
||||||
|
// after being created?
|
||||||
|
peers: Arc::new(RwLock::new(peers.clone())),
|
||||||
|
request,
|
||||||
|
state,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn continue_requests(
|
||||||
|
&mut self,
|
||||||
|
cx: &mut SyncNetworkContext<T>,
|
||||||
|
) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
|
||||||
|
match &mut self.state {
|
||||||
|
State::Base {
|
||||||
|
blocks_by_range_request,
|
||||||
|
} => {
|
||||||
|
if let Some((blocks, block_peer)) = blocks_by_range_request.to_finished() {
|
||||||
|
// TODO(das): use the peer group
|
||||||
|
let peer_group = BatchPeers::new_from_block_peer(*block_peer);
|
||||||
|
let rpc_blocks = couple_blocks_base(
|
||||||
|
blocks.to_vec(),
|
||||||
|
cx.network_globals().sampling_columns.len(),
|
||||||
|
);
|
||||||
|
Ok(Some((rpc_blocks, peer_group)))
|
||||||
|
} else {
|
||||||
|
// Wait for blocks_by_range requests to complete
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
State::DenebEnabled {
|
||||||
|
blocks_by_range_request,
|
||||||
|
blobs_by_range_request,
|
||||||
|
} => {
|
||||||
|
if let (Some((blocks, block_peer)), Some((blobs, _))) = (
|
||||||
|
blocks_by_range_request.to_finished(),
|
||||||
|
blobs_by_range_request.to_finished(),
|
||||||
|
) {
|
||||||
|
// We use the same block_peer for the blobs request
|
||||||
|
let peer_group = BatchPeers::new_from_block_peer(*block_peer);
|
||||||
|
let rpc_blocks =
|
||||||
|
couple_blocks_deneb(blocks.to_vec(), blobs.to_vec(), cx.spec())?;
|
||||||
|
Ok(Some((rpc_blocks, peer_group)))
|
||||||
|
} else {
|
||||||
|
// Wait for blocks_by_range and blobs_by_range requests to complete
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
State::FuluEnabled(state) => match state {
|
||||||
|
FuluEnabledState::BlockRequest {
|
||||||
|
blocks_by_range_request,
|
||||||
|
} => {
|
||||||
|
if let Some((blocks, block_peer)) = blocks_by_range_request.to_finished() {
|
||||||
|
// TODO(das): use the peer group
|
||||||
|
let blocks_with_data = blocks
|
||||||
|
.iter()
|
||||||
|
.filter(|block| block.has_data())
|
||||||
|
.map(|block| block.signed_block_header())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
if blocks_with_data.is_empty() {
|
||||||
|
let custody_column_indices = cx
|
||||||
|
.network_globals()
|
||||||
|
.sampling_columns
|
||||||
|
.clone()
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Done, we got blocks and no columns needed
|
||||||
|
let peer_group = BatchPeers::new_from_block_peer(*block_peer);
|
||||||
|
let rpc_blocks = couple_blocks_fulu(
|
||||||
|
blocks.to_vec(),
|
||||||
|
vec![],
|
||||||
|
custody_column_indices,
|
||||||
|
cx.spec(),
|
||||||
|
)?;
|
||||||
|
Ok(Some((rpc_blocks, peer_group)))
|
||||||
|
} else {
|
||||||
|
let mut column_indices = cx
|
||||||
|
.network_globals()
|
||||||
|
.sampling_columns
|
||||||
|
.clone()
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
column_indices.sort_unstable();
|
||||||
|
|
||||||
|
let req_id = cx
|
||||||
|
.send_custody_by_range_request(
|
||||||
|
self.id,
|
||||||
|
blocks_with_data,
|
||||||
|
Slot::new(*self.request.start_slot())
|
||||||
|
.epoch(T::EthSpec::slots_per_epoch()),
|
||||||
|
column_indices,
|
||||||
|
self.peers.clone(),
|
||||||
|
)
|
||||||
|
.map_err(|e| match e {
|
||||||
|
RpcRequestSendError::InternalError(e) => {
|
||||||
|
Error::InternalError(e)
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
*state = FuluEnabledState::CustodyRequest {
|
||||||
|
blocks: blocks.to_vec(),
|
||||||
|
block_peer: *block_peer,
|
||||||
|
custody_by_range_request: ByRangeRequest::Active(req_id),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Wait for the new custody_by_range request to complete
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Wait for the block request to complete
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FuluEnabledState::CustodyRequest {
|
||||||
|
blocks,
|
||||||
|
block_peer,
|
||||||
|
custody_by_range_request,
|
||||||
|
} => {
|
||||||
|
if let Some((columns, column_peers)) = custody_by_range_request.to_finished() {
|
||||||
|
let custody_column_indices = cx
|
||||||
|
.network_globals()
|
||||||
|
.sampling_columns
|
||||||
|
.clone()
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let peer_group =
|
||||||
|
BatchPeers::new(*block_peer, column_peers.as_reversed_map());
|
||||||
|
let rpc_blocks = couple_blocks_fulu(
|
||||||
|
blocks.to_vec(),
|
||||||
|
columns.to_vec(),
|
||||||
|
custody_column_indices,
|
||||||
|
cx.spec(),
|
||||||
|
)?;
|
||||||
|
Ok(Some((rpc_blocks, peer_group)))
|
||||||
|
} else {
|
||||||
|
// Wait for the custody_by_range request to complete
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn on_blocks_by_range_result(
|
||||||
|
&mut self,
|
||||||
|
id: BlocksByRangeRequestId,
|
||||||
|
data: Vec<Arc<SignedBeaconBlock<T::EthSpec>>>,
|
||||||
|
peer_id: PeerId,
|
||||||
|
cx: &mut SyncNetworkContext<T>,
|
||||||
|
) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
|
||||||
|
match &mut self.state {
|
||||||
|
State::Base {
|
||||||
|
blocks_by_range_request,
|
||||||
|
}
|
||||||
|
| State::DenebEnabled {
|
||||||
|
blocks_by_range_request,
|
||||||
|
..
|
||||||
|
}
|
||||||
|
| State::FuluEnabled(FuluEnabledState::BlockRequest {
|
||||||
|
blocks_by_range_request,
|
||||||
|
}) => {
|
||||||
|
blocks_by_range_request.finish(id, data, peer_id)?;
|
||||||
|
}
|
||||||
|
State::FuluEnabled(FuluEnabledState::CustodyRequest { .. }) => {
|
||||||
|
return Err(Error::InternalError(
|
||||||
|
"Received blocks_by_range response expecting custody_by_range".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.continue_requests(cx)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn on_blobs_by_range_result(
|
||||||
|
&mut self,
|
||||||
|
id: BlobsByRangeRequestId,
|
||||||
|
data: Vec<Arc<BlobSidecar<T::EthSpec>>>,
|
||||||
|
peer_id: PeerId,
|
||||||
|
cx: &mut SyncNetworkContext<T>,
|
||||||
|
) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
|
||||||
|
match &mut self.state {
|
||||||
|
State::Base { .. } => {
|
||||||
|
return Err(Error::InternalError(
|
||||||
|
"Received blobs_by_range response before Deneb".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
State::DenebEnabled {
|
||||||
|
blobs_by_range_request,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
blobs_by_range_request.finish(id, data, peer_id)?;
|
||||||
|
}
|
||||||
|
State::FuluEnabled(_) => {
|
||||||
|
return Err(Error::InternalError(
|
||||||
|
"Received blobs_by_range response after PeerDAS".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.continue_requests(cx)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn on_custody_by_range_result(
|
||||||
|
&mut self,
|
||||||
|
id: CustodyByRangeRequestId,
|
||||||
|
data: Vec<Arc<DataColumnSidecar<T::EthSpec>>>,
|
||||||
|
peers: PeerGroup,
|
||||||
|
cx: &mut SyncNetworkContext<T>,
|
||||||
|
) -> BlockComponentsByRangeRequestResult<T::EthSpec> {
|
||||||
|
match &mut self.state {
|
||||||
|
State::Base { .. } | State::DenebEnabled { .. } => {
|
||||||
|
return Err(Error::InternalError(
|
||||||
|
"Received custody_by_range response before PeerDAS".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
State::FuluEnabled(state) => match state {
|
||||||
|
FuluEnabledState::BlockRequest { .. } => {
|
||||||
|
return Err(Error::InternalError(
|
||||||
|
"Received custody_by_range expecting blocks_by_range".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
FuluEnabledState::CustodyRequest {
|
||||||
|
custody_by_range_request,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
custody_by_range_request.finish(id, data, peers)?;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
self.continue_requests(cx)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn state_step(&self) -> BlockComponentsByRangeRequestStep {
|
||||||
|
match &self.state {
|
||||||
|
State::Base { .. } => BlockComponentsByRangeRequestStep::BlocksRequest,
|
||||||
|
State::DenebEnabled { .. } => BlockComponentsByRangeRequestStep::BlocksRequest,
|
||||||
|
State::FuluEnabled(state) => match state {
|
||||||
|
FuluEnabledState::BlockRequest { .. } => {
|
||||||
|
BlockComponentsByRangeRequestStep::BlocksRequest
|
||||||
|
}
|
||||||
|
FuluEnabledState::CustodyRequest { .. } => {
|
||||||
|
BlockComponentsByRangeRequestStep::CustodyRequest
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn couple_blocks_base<E: EthSpec>(
|
||||||
|
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||||
|
custody_columns_count: usize,
|
||||||
|
) -> Vec<RpcBlock<E>> {
|
||||||
|
blocks
|
||||||
|
.into_iter()
|
||||||
|
.map(|block| RpcBlock::new_without_blobs(None, block, custody_columns_count))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn couple_blocks_deneb<E: EthSpec>(
|
||||||
|
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||||
|
blobs: Vec<Arc<BlobSidecar<E>>>,
|
||||||
|
spec: &ChainSpec,
|
||||||
|
) -> Result<Vec<RpcBlock<E>>, Error> {
|
||||||
|
let mut blobs_by_block = HashMap::<Hash256, Vec<Arc<BlobSidecar<E>>>>::new();
|
||||||
|
for blob in blobs {
|
||||||
|
let block_root = blob.block_root();
|
||||||
|
blobs_by_block.entry(block_root).or_default().push(blob);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now collect all blobs that match to the block by block root. BlobsByRange request checks
|
||||||
|
// the inclusion proof so we know that the commitment is the expected.
|
||||||
|
//
|
||||||
|
// BlobsByRange request handler ensures that we don't receive more blobs than possible.
|
||||||
|
// If the peer serving the request sends us blobs that don't pair well we'll send to the
|
||||||
|
// processor blocks without expected blobs, resulting in a downscoring event. A serving peer
|
||||||
|
// could serve fake blobs for blocks that don't have data, but it would gain nothing by it
|
||||||
|
// wasting theirs and our bandwidth 1:1. Therefore blobs that don't pair well are just ignored.
|
||||||
|
//
|
||||||
|
// RpcBlock::new ensures that the count of blobs is consistent with the block
|
||||||
|
blocks
|
||||||
|
.into_iter()
|
||||||
|
.map(|block| {
|
||||||
|
let block_root = get_block_root(&block);
|
||||||
|
let max_blobs_per_block = spec.max_blobs_per_block(block.epoch()) as usize;
|
||||||
|
let blobs = blobs_by_block.remove(&block_root).unwrap_or_default();
|
||||||
|
// BlobsByRange request handler enforces that blobs are sorted by index
|
||||||
|
let blobs = RuntimeVariableList::new(blobs, max_blobs_per_block).map_err(|_| {
|
||||||
|
Error::InternalError("Blobs returned exceeds max length".to_string())
|
||||||
|
})?;
|
||||||
|
Ok(RpcBlock::new(Some(block_root), block, Some(blobs))
|
||||||
|
.expect("TODO: don't do matching here"))
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<RpcBlock<E>>, Error>>()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn couple_blocks_fulu<E: EthSpec>(
|
||||||
|
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||||
|
data_columns: Vec<Arc<DataColumnSidecar<E>>>,
|
||||||
|
custody_column_indices: Vec<ColumnIndex>,
|
||||||
|
spec: &ChainSpec,
|
||||||
|
) -> Result<Vec<RpcBlock<E>>, Error> {
|
||||||
|
// Group data columns by block_root and index
|
||||||
|
let mut custody_columns_by_block = HashMap::<Hash256, Vec<CustodyDataColumn<E>>>::new();
|
||||||
|
|
||||||
|
for column in data_columns {
|
||||||
|
let block_root = column.block_root();
|
||||||
|
|
||||||
|
if custody_column_indices.contains(&column.index) {
|
||||||
|
custody_columns_by_block
|
||||||
|
.entry(block_root)
|
||||||
|
.or_default()
|
||||||
|
// Safe to convert to `CustodyDataColumn`: we have asserted that the index of
|
||||||
|
// this column is in the set of `expects_custody_columns` and with the expected
|
||||||
|
// block root, so for the expected epoch of this batch.
|
||||||
|
.push(CustodyDataColumn::from_asserted_custody(column));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now iterate all blocks ensuring that the block roots of each block and data column match,
|
||||||
|
blocks
|
||||||
|
.into_iter()
|
||||||
|
.map(|block| {
|
||||||
|
let block_root = get_block_root(&block);
|
||||||
|
let data_columns_with_block_root = custody_columns_by_block
|
||||||
|
// Remove to only use columns once
|
||||||
|
.remove(&block_root)
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// TODO(das): Change RpcBlock to holding a Vec of DataColumnSidecars so we don't need
|
||||||
|
// the spec here.
|
||||||
|
RpcBlock::new_with_custody_columns(
|
||||||
|
Some(block_root),
|
||||||
|
block,
|
||||||
|
data_columns_with_block_root,
|
||||||
|
custody_column_indices.clone(),
|
||||||
|
spec,
|
||||||
|
)
|
||||||
|
.map_err(Error::InternalError)
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: PartialEq + std::fmt::Display, T, P> ByRangeRequest<I, T, P> {
|
||||||
|
fn finish(&mut self, id: I, data: T, peer_id: P) -> Result<(), Error> {
|
||||||
|
match self {
|
||||||
|
Self::Active(expected_id) => {
|
||||||
|
if expected_id != &id {
|
||||||
|
return Err(Error::InternalError(format!(
|
||||||
|
"unexpected req_id expected {expected_id} got {id}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
*self = Self::Complete(data, peer_id);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Self::Complete(_, _) => Err(Error::InternalError(format!(
|
||||||
|
"request already complete {id}"
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_finished(&self) -> Option<(&T, &P)> {
|
||||||
|
match self {
|
||||||
|
Self::Active(_) => None,
|
||||||
|
Self::Complete(data, peer_id) => Some((data, peer_id)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
481
beacon_node/network/src/sync/network_context/custody_by_range.rs
Normal file
481
beacon_node/network/src/sync/network_context/custody_by_range.rs
Normal file
@@ -0,0 +1,481 @@
|
|||||||
|
use super::custody_by_root::{ColumnRequest, Error};
|
||||||
|
use crate::sync::network_context::RpcResponseError;
|
||||||
|
use beacon_chain::validator_monitor::timestamp_now;
|
||||||
|
use beacon_chain::BeaconChainTypes;
|
||||||
|
use fnv::FnvHashMap;
|
||||||
|
use lighthouse_network::rpc::methods::DataColumnsByRangeRequest;
|
||||||
|
use lighthouse_network::service::api_types::{
|
||||||
|
CustodyByRangeRequestId, DataColumnsByRangeRequestId,
|
||||||
|
};
|
||||||
|
use lighthouse_network::{PeerAction, PeerId};
|
||||||
|
use lru_cache::LRUTimeCache;
|
||||||
|
use parking_lot::RwLock;
|
||||||
|
use rand::Rng;
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use std::{collections::HashMap, marker::PhantomData, sync::Arc};
|
||||||
|
use tracing::{debug, warn};
|
||||||
|
use types::{
|
||||||
|
data_column_sidecar::ColumnIndex, DataColumnSidecar, Epoch, EthSpec, Hash256,
|
||||||
|
SignedBeaconBlockHeader, Slot,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{PeerGroup, RpcResponseResult, SyncNetworkContext};
|
||||||
|
|
||||||
|
const TEMPORARY_FAULT_EXPIRY_SECONDS: u64 = 15;
|
||||||
|
const REQUEST_EXPIRY_SECONDS: u64 = 300;
|
||||||
|
|
||||||
|
type DataColumnSidecarList<E> = Vec<Arc<DataColumnSidecar<E>>>;
|
||||||
|
|
||||||
|
pub struct ActiveCustodyByRangeRequest<T: BeaconChainTypes> {
|
||||||
|
start_time: Instant,
|
||||||
|
id: CustodyByRangeRequestId,
|
||||||
|
// TODO(das): Pass a better type for the by_range request
|
||||||
|
epoch: Epoch,
|
||||||
|
/// Blocks that we expect peers to serve data columns for
|
||||||
|
blocks_with_data: Vec<SignedBeaconBlockHeader>,
|
||||||
|
/// List of column indices this request needs to download to complete successfully
|
||||||
|
column_requests: FnvHashMap<
|
||||||
|
ColumnIndex,
|
||||||
|
ColumnRequest<DataColumnsByRangeRequestId, DataColumnSidecarList<T::EthSpec>>,
|
||||||
|
>,
|
||||||
|
/// Active requests for 1 or more columns each
|
||||||
|
active_batch_columns_requests:
|
||||||
|
FnvHashMap<DataColumnsByRangeRequestId, ActiveBatchColumnsRequest>,
|
||||||
|
/// Peers that have recently failed to successfully respond to a columns by root request.
|
||||||
|
/// Having a LRUTimeCache allows this request to not have to track disconnecting peers.
|
||||||
|
peers_with_custody_failures: LRUTimeCache<PeerId>,
|
||||||
|
peers_with_temporary_faults: LRUTimeCache<PeerId>,
|
||||||
|
// TODO(das): does this HashSet has an OOM risk? We should either: make sure that this request
|
||||||
|
// structs are dropped after some time, that disconnected peers are pruned (but we may want to
|
||||||
|
// retain faulty information if they just disconnect and reconnect) or make this an LRUTimeCache
|
||||||
|
// with a long time (like 5 minutes).
|
||||||
|
peers_with_permanent_faults: HashSet<PeerId>,
|
||||||
|
/// Set of peers that claim to have imported this block and their custody columns
|
||||||
|
lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||||
|
|
||||||
|
_phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ActiveBatchColumnsRequest {
|
||||||
|
indices: Vec<ColumnIndex>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type CustodyByRangeRequestResult<E> =
|
||||||
|
Result<Option<(DataColumnSidecarList<E>, PeerGroup, Duration)>, Error>;
|
||||||
|
|
||||||
|
enum ColumnResponseError {
|
||||||
|
NonMatchingColumn {
|
||||||
|
slot: Slot,
|
||||||
|
actual_block_root: Hash256,
|
||||||
|
expected_block_root: Hash256,
|
||||||
|
},
|
||||||
|
MissingColumn(Slot),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: BeaconChainTypes> ActiveCustodyByRangeRequest<T> {
|
||||||
|
pub(crate) fn new(
|
||||||
|
id: CustodyByRangeRequestId,
|
||||||
|
epoch: Epoch,
|
||||||
|
blocks_with_data: Vec<SignedBeaconBlockHeader>,
|
||||||
|
column_indices: &[ColumnIndex],
|
||||||
|
lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
start_time: Instant::now(),
|
||||||
|
id,
|
||||||
|
epoch,
|
||||||
|
blocks_with_data,
|
||||||
|
column_requests: HashMap::from_iter(
|
||||||
|
column_indices
|
||||||
|
.iter()
|
||||||
|
.map(|index| (*index, ColumnRequest::new())),
|
||||||
|
),
|
||||||
|
active_batch_columns_requests: <_>::default(),
|
||||||
|
peers_with_custody_failures: LRUTimeCache::new(Duration::from_secs(
|
||||||
|
TEMPORARY_FAULT_EXPIRY_SECONDS,
|
||||||
|
)),
|
||||||
|
peers_with_temporary_faults: LRUTimeCache::new(Duration::from_secs(
|
||||||
|
TEMPORARY_FAULT_EXPIRY_SECONDS,
|
||||||
|
)),
|
||||||
|
peers_with_permanent_faults: HashSet::new(),
|
||||||
|
lookup_peers,
|
||||||
|
_phantom: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert a downloaded column into an active custody request. Then make progress on the
|
||||||
|
/// entire request.
|
||||||
|
///
|
||||||
|
/// ### Returns
|
||||||
|
///
|
||||||
|
/// - `Err`: Custody request has failed and will be dropped
|
||||||
|
/// - `Ok(Some)`: Custody request has successfully completed and will be dropped
|
||||||
|
/// - `Ok(None)`: Custody request still active
|
||||||
|
pub(crate) fn on_data_column_downloaded(
|
||||||
|
&mut self,
|
||||||
|
peer_id: PeerId,
|
||||||
|
req_id: DataColumnsByRangeRequestId,
|
||||||
|
resp: RpcResponseResult<DataColumnSidecarList<T::EthSpec>>,
|
||||||
|
cx: &mut SyncNetworkContext<T>,
|
||||||
|
) -> CustodyByRangeRequestResult<T::EthSpec> {
|
||||||
|
let Some(batch_request) = self.active_batch_columns_requests.get_mut(&req_id) else {
|
||||||
|
warn!(
|
||||||
|
id = %self.id,
|
||||||
|
%req_id,
|
||||||
|
"Received custody by range response for unrequested index"
|
||||||
|
);
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
|
||||||
|
match resp {
|
||||||
|
Ok((data_columns, seen_timestamp)) => {
|
||||||
|
// Map columns by index as an optimization to not loop the returned list on each
|
||||||
|
// requested index. The worse case is 128 loops over a 128 item vec + mutation to
|
||||||
|
// drop the consumed columns.
|
||||||
|
let mut data_columns_by_index =
|
||||||
|
HashMap::<(ColumnIndex, Slot), Arc<DataColumnSidecar<T::EthSpec>>>::new();
|
||||||
|
for data_column in data_columns {
|
||||||
|
data_columns_by_index
|
||||||
|
.insert((data_column.index, data_column.slot()), data_column);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accumulate columns that the peer does not have to issue a single log per request
|
||||||
|
let mut missing_column_indexes = vec![];
|
||||||
|
let mut incorrect_column_indices = vec![];
|
||||||
|
let mut imported_column_indices = vec![];
|
||||||
|
|
||||||
|
for index in &batch_request.indices {
|
||||||
|
let column_request =
|
||||||
|
self.column_requests
|
||||||
|
.get_mut(index)
|
||||||
|
.ok_or(Error::InternalError(format!(
|
||||||
|
"unknown column_index {index}"
|
||||||
|
)))?;
|
||||||
|
|
||||||
|
let columns_at_index = self
|
||||||
|
.blocks_with_data
|
||||||
|
.iter()
|
||||||
|
.map(|block| {
|
||||||
|
let slot = block.message.slot;
|
||||||
|
if let Some(data_column) = data_columns_by_index.remove(&(*index, slot))
|
||||||
|
{
|
||||||
|
let actual_block_root =
|
||||||
|
data_column.signed_block_header.message.canonical_root();
|
||||||
|
let expected_block_root = block.message.canonical_root();
|
||||||
|
if actual_block_root != expected_block_root {
|
||||||
|
Err(ColumnResponseError::NonMatchingColumn {
|
||||||
|
slot,
|
||||||
|
actual_block_root: data_column
|
||||||
|
.signed_block_header
|
||||||
|
.message
|
||||||
|
.canonical_root(),
|
||||||
|
expected_block_root: block.message.canonical_root(),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Ok(data_column)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// The following three statements are true:
|
||||||
|
// - block at `slot` is not missed, and has data
|
||||||
|
// - peer custodies this column `index`
|
||||||
|
// - peer claims to be synced to at least `slot`
|
||||||
|
//
|
||||||
|
// Therefore not returning this column is an protocol violation that we
|
||||||
|
// penalize and mark the peer as failed to retry with another peer.
|
||||||
|
//
|
||||||
|
// TODO(das) do not consider this case a success. We know for sure the block has
|
||||||
|
// data. However we allow the peer to return empty as we can't attribute fault.
|
||||||
|
// TODO(das): Should track which columns are missing and eventually give up
|
||||||
|
// TODO(das): If the peer is in the lookup peer set it claims to have imported
|
||||||
|
// the block AND its custody columns. So in this case we can downscore
|
||||||
|
Err(ColumnResponseError::MissingColumn(slot))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>, _>>();
|
||||||
|
|
||||||
|
match columns_at_index {
|
||||||
|
Ok(columns_at_index) => {
|
||||||
|
column_request.on_download_success(
|
||||||
|
req_id,
|
||||||
|
peer_id,
|
||||||
|
columns_at_index,
|
||||||
|
seen_timestamp,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
imported_column_indices.push(index);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
column_request.on_download_error(req_id)?;
|
||||||
|
|
||||||
|
match e {
|
||||||
|
ColumnResponseError::NonMatchingColumn {
|
||||||
|
slot,
|
||||||
|
actual_block_root,
|
||||||
|
expected_block_root,
|
||||||
|
} => {
|
||||||
|
incorrect_column_indices.push((
|
||||||
|
index,
|
||||||
|
slot,
|
||||||
|
actual_block_root,
|
||||||
|
expected_block_root,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
ColumnResponseError::MissingColumn(slot) => {
|
||||||
|
missing_column_indexes.push((index, slot));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log missing_column_indexes and incorrect_column_indices here in batch per request
|
||||||
|
// to make this logs more compact and less noisy.
|
||||||
|
if !imported_column_indices.is_empty() {
|
||||||
|
// TODO(das): this log may be redundant. We already log on DataColumnsByRange
|
||||||
|
// completed, and on DataColumnsByRange sent we log the column indices
|
||||||
|
// ```
|
||||||
|
// Sync RPC request sent method="DataColumnsByRange" slots=8 epoch=4 columns=[52] peer=16Uiu2HAmEooeoHzHDYS35TSHrJDSfmREecPyFskrLPYm9Gm1EURj id=493/399/10/RangeSync/4/1
|
||||||
|
// Sync RPC request completed id=493/399/10/RangeSync/4/1 method="DataColumnsByRange" count=1
|
||||||
|
// ```
|
||||||
|
// Which can be traced to this custody by range request, and the initial log
|
||||||
|
debug!(
|
||||||
|
id = %self.id,
|
||||||
|
data_columns_by_range_req_id = %req_id,
|
||||||
|
%peer_id,
|
||||||
|
count = imported_column_indices.len(),
|
||||||
|
"Custody by range request download imported columns"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !incorrect_column_indices.is_empty() {
|
||||||
|
// Note: Batch logging that columns are missing to not spam logger
|
||||||
|
debug!(
|
||||||
|
id = %self.id,
|
||||||
|
data_columns_by_range_req_id = %req_id,
|
||||||
|
%peer_id,
|
||||||
|
// TODO(das): this property can become very noisy, being the full range 0..128
|
||||||
|
incorrect_columns = ?incorrect_column_indices,
|
||||||
|
"Custody by range peer returned non-matching columns"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Returning a non-canonical column is not a permanent fault. We should not
|
||||||
|
// retry the peer for some time but the peer may return a canonical column in
|
||||||
|
// the future.
|
||||||
|
// TODO(das): if this finalized sync the fault is permanent
|
||||||
|
self.peers_with_temporary_faults.insert(peer_id);
|
||||||
|
cx.report_peer(
|
||||||
|
peer_id,
|
||||||
|
PeerAction::MidToleranceError,
|
||||||
|
"non-matching data column",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !missing_column_indexes.is_empty() {
|
||||||
|
// Note: Batch logging that columns are missing to not spam logger
|
||||||
|
debug!(
|
||||||
|
id = %self.id,
|
||||||
|
data_columns_by_range_req_id = %req_id,
|
||||||
|
%peer_id,
|
||||||
|
// TODO(das): this property can become very noisy, being the full range 0..128
|
||||||
|
?missing_column_indexes,
|
||||||
|
"Custody by range peer claims to not have some data"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Not having columns is not a permanent fault. The peer may be backfilling.
|
||||||
|
self.peers_with_custody_failures.insert(peer_id);
|
||||||
|
cx.report_peer(peer_id, PeerAction::MidToleranceError, "custody_failure");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
debug!(
|
||||||
|
id = %self.id,
|
||||||
|
%req_id,
|
||||||
|
%peer_id,
|
||||||
|
error = ?err,
|
||||||
|
"Custody by range download error"
|
||||||
|
);
|
||||||
|
|
||||||
|
// TODO(das): Should mark peer as failed and try from another peer
|
||||||
|
for column_index in &batch_request.indices {
|
||||||
|
self.column_requests
|
||||||
|
.get_mut(column_index)
|
||||||
|
.ok_or(Error::InternalError("unknown column_index".to_owned()))?
|
||||||
|
.on_download_error_and_mark_failure(req_id, err.clone())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
match err {
|
||||||
|
// Verify errors are correctness errors against our request or about the
|
||||||
|
// returned data itself. This peer is faulty or malicious, should not be
|
||||||
|
// retried.
|
||||||
|
RpcResponseError::VerifyError(_) => {
|
||||||
|
self.peers_with_permanent_faults.insert(peer_id);
|
||||||
|
}
|
||||||
|
// Network errors are not permanent faults and worth retrying
|
||||||
|
RpcResponseError::RpcError(_) => {
|
||||||
|
self.peers_with_temporary_faults.insert(peer_id);
|
||||||
|
}
|
||||||
|
// Do nothing for internal errors
|
||||||
|
RpcResponseError::InternalError(_) => {}
|
||||||
|
// unreachable
|
||||||
|
RpcResponseError::RequestExpired(_) => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
self.continue_requests(cx)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn continue_requests(
|
||||||
|
&mut self,
|
||||||
|
cx: &mut SyncNetworkContext<T>,
|
||||||
|
) -> CustodyByRangeRequestResult<T::EthSpec> {
|
||||||
|
if self.column_requests.values().all(|r| r.is_downloaded()) {
|
||||||
|
// All requests have completed successfully.
|
||||||
|
let mut peers = HashMap::<PeerId, Vec<usize>>::new();
|
||||||
|
let mut seen_timestamps = vec![];
|
||||||
|
let columns = std::mem::take(&mut self.column_requests)
|
||||||
|
.into_values()
|
||||||
|
.map(|request| {
|
||||||
|
let (peer, data_columns, seen_timestamp) = request.complete()?;
|
||||||
|
|
||||||
|
for data_column in &data_columns {
|
||||||
|
let columns_by_peer = peers.entry(peer).or_default();
|
||||||
|
if !columns_by_peer.contains(&(data_column.index as usize)) {
|
||||||
|
columns_by_peer.push(data_column.index as usize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seen_timestamps.push(seen_timestamp);
|
||||||
|
|
||||||
|
Ok(data_columns)
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>, _>>()?
|
||||||
|
// Flatten Vec<Vec<Columns>> to Vec<Columns>
|
||||||
|
// TODO(das): maybe not optimal for the coupling logic later
|
||||||
|
.into_iter()
|
||||||
|
.flatten()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let peer_group = PeerGroup::from_set(peers);
|
||||||
|
let max_seen_timestamp = seen_timestamps.into_iter().max().unwrap_or(timestamp_now());
|
||||||
|
return Ok(Some((columns, peer_group, max_seen_timestamp)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let active_request_count_by_peer = cx.active_request_count_by_peer();
|
||||||
|
let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
|
||||||
|
let lookup_peers = self.lookup_peers.read();
|
||||||
|
|
||||||
|
// Need to:
|
||||||
|
// - track how many active requests a peer has for load balancing
|
||||||
|
// - which peers have failures to attempt others
|
||||||
|
// - which peer returned what to have PeerGroup attributability
|
||||||
|
|
||||||
|
for (column_index, request) in self.column_requests.iter_mut() {
|
||||||
|
if request.is_awaiting_download() {
|
||||||
|
if let Some(last_error) = request.too_many_failures() {
|
||||||
|
return Err(Error::TooManyDownloadErrors(last_error));
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(das): When is a fork and only a subset of your peers know about a block, we should
|
||||||
|
// only query the peers on that fork. Should this case be handled? How to handle it?
|
||||||
|
let custodial_peers = cx.get_custodial_peers(*column_index);
|
||||||
|
|
||||||
|
// We draw from the total set of peers, but prioritize those peers who we have
|
||||||
|
// received an attestation / status / block message claiming to have imported the
|
||||||
|
// lookup. The frequency of those messages is low, so drawing only from lookup_peers
|
||||||
|
// could cause many lookups to take much longer or fail as they don't have enough
|
||||||
|
// custody peers on a given column
|
||||||
|
let mut priorized_peers = custodial_peers
|
||||||
|
.iter()
|
||||||
|
.filter(|peer| {
|
||||||
|
// Never request again peers with permanent faults
|
||||||
|
// Do not request peers with custody failures for some time
|
||||||
|
!self.peers_with_permanent_faults.contains(peer)
|
||||||
|
&& !self.peers_with_custody_failures.contains(peer)
|
||||||
|
})
|
||||||
|
.map(|peer| {
|
||||||
|
(
|
||||||
|
// Prioritize peers that claim to know have imported this block
|
||||||
|
if lookup_peers.contains(peer) { 0 } else { 1 },
|
||||||
|
// De-prioritize peers that have failed to successfully respond to
|
||||||
|
// requests recently, but allow to immediatelly request them again
|
||||||
|
self.peers_with_temporary_faults.contains(peer),
|
||||||
|
// Prefer peers with fewer requests to load balance across peers.
|
||||||
|
// We batch requests to the same peer, so count existence in the
|
||||||
|
// `columns_to_request_by_peer` as a single 1 request.
|
||||||
|
active_request_count_by_peer.get(peer).copied().unwrap_or(0)
|
||||||
|
+ columns_to_request_by_peer.get(peer).map(|_| 1).unwrap_or(0),
|
||||||
|
// Random factor to break ties, otherwise the PeerID breaks ties
|
||||||
|
rand::thread_rng().gen::<u32>(),
|
||||||
|
*peer,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
priorized_peers.sort_unstable();
|
||||||
|
|
||||||
|
if let Some((_, _, _, _, peer_id)) = priorized_peers.first() {
|
||||||
|
columns_to_request_by_peer
|
||||||
|
.entry(*peer_id)
|
||||||
|
.or_default()
|
||||||
|
.push(*column_index);
|
||||||
|
} else {
|
||||||
|
// Do not issue requests if there is no custody peer on this column. The request
|
||||||
|
// will sit idle without making progress. The only way to make to progress is:
|
||||||
|
// - Add a new peer that custodies the missing columns
|
||||||
|
// - Call `continue_requests`
|
||||||
|
//
|
||||||
|
// Otherwise this request should be dropped and failed after some time.
|
||||||
|
// TODO(das): implement the above
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (peer_id, indices) in columns_to_request_by_peer.into_iter() {
|
||||||
|
let req_id = cx
|
||||||
|
.send_data_columns_by_range_request(
|
||||||
|
peer_id,
|
||||||
|
DataColumnsByRangeRequest {
|
||||||
|
// TODO(das): generalize with constants from batch
|
||||||
|
start_slot: self
|
||||||
|
.epoch
|
||||||
|
.start_slot(T::EthSpec::slots_per_epoch())
|
||||||
|
.as_u64(),
|
||||||
|
count: T::EthSpec::slots_per_epoch(),
|
||||||
|
columns: indices.clone(),
|
||||||
|
},
|
||||||
|
self.id,
|
||||||
|
)
|
||||||
|
.map_err(|e| Error::InternalError(format!("send failed {e}")))?;
|
||||||
|
|
||||||
|
for column_index in &indices {
|
||||||
|
let column_request = self
|
||||||
|
.column_requests
|
||||||
|
.get_mut(column_index)
|
||||||
|
// Should never happen: column_index is iterated from column_requests
|
||||||
|
.ok_or(Error::InternalError(format!(
|
||||||
|
"Unknown column_request {column_index}"
|
||||||
|
)))?;
|
||||||
|
|
||||||
|
column_request.on_download_start(req_id)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.active_batch_columns_requests
|
||||||
|
.insert(req_id, ActiveBatchColumnsRequest { indices });
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.start_time.elapsed() > Duration::from_secs(REQUEST_EXPIRY_SECONDS)
|
||||||
|
&& !self.column_requests.values().any(|r| r.is_downloading())
|
||||||
|
{
|
||||||
|
let awaiting_peers_indicies = self
|
||||||
|
.column_requests
|
||||||
|
.iter()
|
||||||
|
.filter(|(_, r)| r.is_awaiting_download())
|
||||||
|
.map(|(id, _)| *id)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
return Err(Error::ExpiredNoCustodyPeers(awaiting_peers_indicies));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
use crate::sync::network_context::{
|
use crate::sync::network_context::{
|
||||||
DataColumnsByRootRequestId, DataColumnsByRootSingleBlockRequest,
|
DataColumnsByRootRequestId, DataColumnsByRootSingleBlockRequest, RpcRequestSendError,
|
||||||
|
RpcResponseError,
|
||||||
};
|
};
|
||||||
use beacon_chain::validator_monitor::timestamp_now;
|
use beacon_chain::validator_monitor::timestamp_now;
|
||||||
use beacon_chain::BeaconChainTypes;
|
use beacon_chain::BeaconChainTypes;
|
||||||
@@ -12,22 +13,29 @@ use rand::Rng;
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use std::{collections::HashMap, marker::PhantomData, sync::Arc};
|
use std::{collections::HashMap, marker::PhantomData, sync::Arc};
|
||||||
|
use strum::IntoStaticStr;
|
||||||
use tracing::{debug, warn};
|
use tracing::{debug, warn};
|
||||||
use types::EthSpec;
|
|
||||||
use types::{data_column_sidecar::ColumnIndex, DataColumnSidecar, Hash256};
|
use types::{data_column_sidecar::ColumnIndex, DataColumnSidecar, Hash256};
|
||||||
|
|
||||||
use super::{LookupRequestResult, PeerGroup, RpcResponseResult, SyncNetworkContext};
|
use super::{LookupRequestResult, PeerGroup, RpcResponseResult, SyncNetworkContext};
|
||||||
|
|
||||||
const FAILED_PEERS_CACHE_EXPIRY_SECONDS: u64 = 5;
|
const FAILED_PEERS_CACHE_EXPIRY_SECONDS: u64 = 5;
|
||||||
const MAX_STALE_NO_PEERS_DURATION: Duration = Duration::from_secs(30);
|
const REQUEST_EXPIRY_SECONDS: u64 = 300;
|
||||||
|
/// TODO(das): this attempt count is nested into the existing lookup request count.
|
||||||
|
const MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS: usize = 3;
|
||||||
|
|
||||||
type DataColumnSidecarList<E> = Vec<Arc<DataColumnSidecar<E>>>;
|
type DataColumnSidecarList<E> = Vec<Arc<DataColumnSidecar<E>>>;
|
||||||
|
|
||||||
pub struct ActiveCustodyRequest<T: BeaconChainTypes> {
|
pub struct ActiveCustodyByRootRequest<T: BeaconChainTypes> {
|
||||||
|
start_time: Instant,
|
||||||
block_root: Hash256,
|
block_root: Hash256,
|
||||||
custody_id: CustodyId,
|
custody_id: CustodyId,
|
||||||
/// List of column indices this request needs to download to complete successfully
|
/// List of column indices this request needs to download to complete successfully
|
||||||
column_requests: FnvHashMap<ColumnIndex, ColumnRequest<T::EthSpec>>,
|
#[allow(clippy::type_complexity)]
|
||||||
|
column_requests: FnvHashMap<
|
||||||
|
ColumnIndex,
|
||||||
|
ColumnRequest<DataColumnsByRootRequestId, Arc<DataColumnSidecar<T::EthSpec>>>,
|
||||||
|
>,
|
||||||
/// Active requests for 1 or more columns each
|
/// Active requests for 1 or more columns each
|
||||||
active_batch_columns_requests:
|
active_batch_columns_requests:
|
||||||
FnvHashMap<DataColumnsByRootRequestId, ActiveBatchColumnsRequest>,
|
FnvHashMap<DataColumnsByRootRequestId, ActiveBatchColumnsRequest>,
|
||||||
@@ -40,29 +48,47 @@ pub struct ActiveCustodyRequest<T: BeaconChainTypes> {
|
|||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq)]
|
#[derive(Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
SendFailed(&'static str),
|
InternalError(String),
|
||||||
TooManyFailures,
|
TooManyDownloadErrors(RpcResponseError),
|
||||||
BadState(String),
|
ExpiredNoCustodyPeers(Vec<ColumnIndex>),
|
||||||
NoPeer(ColumnIndex),
|
}
|
||||||
/// Received a download result for a different request id than the in-flight request.
|
|
||||||
/// There should only exist a single request at a time. Having multiple requests is a bug and
|
impl From<Error> for RpcResponseError {
|
||||||
/// can result in undefined state, so it's treated as a hard error and the lookup is dropped.
|
fn from(e: Error) -> Self {
|
||||||
UnexpectedRequestId {
|
match e {
|
||||||
expected_req_id: DataColumnsByRootRequestId,
|
Error::InternalError(e) => RpcResponseError::InternalError(e),
|
||||||
req_id: DataColumnsByRootRequestId,
|
Error::TooManyDownloadErrors(e) => e,
|
||||||
},
|
Error::ExpiredNoCustodyPeers(indices) => RpcResponseError::RequestExpired(format!(
|
||||||
|
"Expired waiting for custody peers {indices:?}"
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Error> for RpcRequestSendError {
|
||||||
|
fn from(e: Error) -> Self {
|
||||||
|
match e {
|
||||||
|
Error::TooManyDownloadErrors(_) => {
|
||||||
|
RpcRequestSendError::InternalError("Download error in request send".to_string())
|
||||||
|
}
|
||||||
|
Error::InternalError(e) => RpcRequestSendError::InternalError(e),
|
||||||
|
Error::ExpiredNoCustodyPeers(_) => RpcRequestSendError::InternalError(
|
||||||
|
"Request can not expire when requesting it".to_string(),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ActiveBatchColumnsRequest {
|
struct ActiveBatchColumnsRequest {
|
||||||
indices: Vec<ColumnIndex>,
|
indices: Vec<ColumnIndex>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type CustodyRequestResult<E> =
|
pub type CustodyByRootRequestResult<E> =
|
||||||
Result<Option<(DataColumnSidecarList<E>, PeerGroup, Duration)>, Error>;
|
Result<Option<(DataColumnSidecarList<E>, PeerGroup, Duration)>, Error>;
|
||||||
|
|
||||||
impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
impl<T: BeaconChainTypes> ActiveCustodyByRootRequest<T> {
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
block_root: Hash256,
|
block_root: Hash256,
|
||||||
custody_id: CustodyId,
|
custody_id: CustodyId,
|
||||||
@@ -70,6 +96,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
|
lookup_peers: Arc<RwLock<HashSet<PeerId>>>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
start_time: Instant::now(),
|
||||||
block_root,
|
block_root,
|
||||||
custody_id,
|
custody_id,
|
||||||
column_requests: HashMap::from_iter(
|
column_requests: HashMap::from_iter(
|
||||||
@@ -98,7 +125,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
req_id: DataColumnsByRootRequestId,
|
req_id: DataColumnsByRootRequestId,
|
||||||
resp: RpcResponseResult<DataColumnSidecarList<T::EthSpec>>,
|
resp: RpcResponseResult<DataColumnSidecarList<T::EthSpec>>,
|
||||||
cx: &mut SyncNetworkContext<T>,
|
cx: &mut SyncNetworkContext<T>,
|
||||||
) -> CustodyRequestResult<T::EthSpec> {
|
) -> CustodyByRootRequestResult<T::EthSpec> {
|
||||||
let Some(batch_request) = self.active_batch_columns_requests.get_mut(&req_id) else {
|
let Some(batch_request) = self.active_batch_columns_requests.get_mut(&req_id) else {
|
||||||
warn!(
|
warn!(
|
||||||
block_root = ?self.block_root,
|
block_root = ?self.block_root,
|
||||||
@@ -131,7 +158,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
let column_request = self
|
let column_request = self
|
||||||
.column_requests
|
.column_requests
|
||||||
.get_mut(column_index)
|
.get_mut(column_index)
|
||||||
.ok_or(Error::BadState("unknown column_index".to_owned()))?;
|
.ok_or(Error::InternalError("unknown column_index".to_owned()))?;
|
||||||
|
|
||||||
if let Some(data_column) = data_columns.remove(column_index) {
|
if let Some(data_column) = data_columns.remove(column_index) {
|
||||||
column_request.on_download_success(
|
column_request.on_download_success(
|
||||||
@@ -182,8 +209,8 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
for column_index in &batch_request.indices {
|
for column_index in &batch_request.indices {
|
||||||
self.column_requests
|
self.column_requests
|
||||||
.get_mut(column_index)
|
.get_mut(column_index)
|
||||||
.ok_or(Error::BadState("unknown column_index".to_owned()))?
|
.ok_or(Error::InternalError("unknown column_index".to_owned()))?
|
||||||
.on_download_error_and_mark_failure(req_id)?;
|
.on_download_error_and_mark_failure(req_id, err.clone())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.failed_peers.insert(peer_id);
|
self.failed_peers.insert(peer_id);
|
||||||
@@ -196,7 +223,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
pub(crate) fn continue_requests(
|
pub(crate) fn continue_requests(
|
||||||
&mut self,
|
&mut self,
|
||||||
cx: &mut SyncNetworkContext<T>,
|
cx: &mut SyncNetworkContext<T>,
|
||||||
) -> CustodyRequestResult<T::EthSpec> {
|
) -> CustodyByRootRequestResult<T::EthSpec> {
|
||||||
if self.column_requests.values().all(|r| r.is_downloaded()) {
|
if self.column_requests.values().all(|r| r.is_downloaded()) {
|
||||||
// All requests have completed successfully.
|
// All requests have completed successfully.
|
||||||
let mut peers = HashMap::<PeerId, Vec<usize>>::new();
|
let mut peers = HashMap::<PeerId, Vec<usize>>::new();
|
||||||
@@ -222,6 +249,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
let active_request_count_by_peer = cx.active_request_count_by_peer();
|
let active_request_count_by_peer = cx.active_request_count_by_peer();
|
||||||
let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
|
let mut columns_to_request_by_peer = HashMap::<PeerId, Vec<ColumnIndex>>::new();
|
||||||
let lookup_peers = self.lookup_peers.read();
|
let lookup_peers = self.lookup_peers.read();
|
||||||
|
let mut indices_without_peers = vec![];
|
||||||
|
|
||||||
// Need to:
|
// Need to:
|
||||||
// - track how many active requests a peer has for load balancing
|
// - track how many active requests a peer has for load balancing
|
||||||
@@ -229,9 +257,9 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
// - which peer returned what to have PeerGroup attributability
|
// - which peer returned what to have PeerGroup attributability
|
||||||
|
|
||||||
for (column_index, request) in self.column_requests.iter_mut() {
|
for (column_index, request) in self.column_requests.iter_mut() {
|
||||||
if let Some(wait_duration) = request.is_awaiting_download() {
|
if request.is_awaiting_download() {
|
||||||
if request.download_failures > MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS {
|
if let Some(last_error) = request.too_many_failures() {
|
||||||
return Err(Error::TooManyFailures);
|
return Err(Error::TooManyDownloadErrors(last_error));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(das): When is a fork and only a subset of your peers know about a block, we should
|
// TODO(das): When is a fork and only a subset of your peers know about a block, we should
|
||||||
@@ -270,21 +298,22 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
.entry(*peer_id)
|
.entry(*peer_id)
|
||||||
.or_default()
|
.or_default()
|
||||||
.push(*column_index);
|
.push(*column_index);
|
||||||
} else if wait_duration > MAX_STALE_NO_PEERS_DURATION {
|
|
||||||
// Allow to request to sit stale in `NotStarted` state for at most
|
|
||||||
// `MAX_STALE_NO_PEERS_DURATION`, else error and drop the request. Note that
|
|
||||||
// lookup will naturally retry when other peers send us attestations for
|
|
||||||
// descendants of this un-available lookup.
|
|
||||||
return Err(Error::NoPeer(*column_index));
|
|
||||||
} else {
|
} else {
|
||||||
// Do not issue requests if there is no custody peer on this column
|
// Do not issue requests if there is no custody peer on this column. The request
|
||||||
|
// will sit idle without making progress. The only way to make to progress is:
|
||||||
|
// - Add a new peer that custodies the missing columns
|
||||||
|
// - Call `continue_requests`
|
||||||
|
//
|
||||||
|
// Otherwise this request should be dropped and failed after some time.
|
||||||
|
// TODO(das): implement the above
|
||||||
|
indices_without_peers.push(column_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (peer_id, indices) in columns_to_request_by_peer.into_iter() {
|
for (peer_id, indices) in columns_to_request_by_peer.into_iter() {
|
||||||
let request_result = cx
|
let request_result = cx
|
||||||
.data_column_lookup_request(
|
.data_columns_by_root_request(
|
||||||
DataColumnsByRootRequester::Custody(self.custody_id),
|
DataColumnsByRootRequester::Custody(self.custody_id),
|
||||||
peer_id,
|
peer_id,
|
||||||
DataColumnsByRootSingleBlockRequest {
|
DataColumnsByRootSingleBlockRequest {
|
||||||
@@ -297,7 +326,9 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
// columns. For the rest of peers, don't downscore if columns are missing.
|
// columns. For the rest of peers, don't downscore if columns are missing.
|
||||||
lookup_peers.contains(&peer_id),
|
lookup_peers.contains(&peer_id),
|
||||||
)
|
)
|
||||||
.map_err(Error::SendFailed)?;
|
.map_err(|e| {
|
||||||
|
Error::InternalError(format!("Send failed data_columns_by_root {e:?}"))
|
||||||
|
})?;
|
||||||
|
|
||||||
match request_result {
|
match request_result {
|
||||||
LookupRequestResult::RequestSent(req_id) => {
|
LookupRequestResult::RequestSent(req_id) => {
|
||||||
@@ -306,7 +337,7 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
.column_requests
|
.column_requests
|
||||||
.get_mut(column_index)
|
.get_mut(column_index)
|
||||||
// Should never happen: column_index is iterated from column_requests
|
// Should never happen: column_index is iterated from column_requests
|
||||||
.ok_or(Error::BadState("unknown column_index".to_owned()))?;
|
.ok_or(Error::InternalError("unknown column_index".to_owned()))?;
|
||||||
|
|
||||||
column_request.on_download_start(req_id)?;
|
column_request.on_download_start(req_id)?;
|
||||||
}
|
}
|
||||||
@@ -319,117 +350,149 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self.start_time.elapsed() > Duration::from_secs(REQUEST_EXPIRY_SECONDS)
|
||||||
|
&& !self.column_requests.values().any(|r| r.is_downloading())
|
||||||
|
{
|
||||||
|
let awaiting_peers_indicies = self
|
||||||
|
.column_requests
|
||||||
|
.iter()
|
||||||
|
.filter(|(_, r)| r.is_awaiting_download())
|
||||||
|
.map(|(id, _)| *id)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
return Err(Error::ExpiredNoCustodyPeers(awaiting_peers_indicies));
|
||||||
|
}
|
||||||
|
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TODO(das): this attempt count is nested into the existing lookup request count.
|
pub struct ColumnRequest<I: std::fmt::Display + PartialEq, T> {
|
||||||
const MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS: usize = 3;
|
status: Status<I, T>,
|
||||||
|
download_failures: Vec<RpcResponseError>,
|
||||||
struct ColumnRequest<E: EthSpec> {
|
|
||||||
status: Status<E>,
|
|
||||||
download_failures: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, IntoStaticStr)]
|
||||||
enum Status<E: EthSpec> {
|
pub enum Status<I, T> {
|
||||||
NotStarted(Instant),
|
NotStarted,
|
||||||
Downloading(DataColumnsByRootRequestId),
|
Downloading(I),
|
||||||
Downloaded(PeerId, Arc<DataColumnSidecar<E>>, Duration),
|
Downloaded(PeerId, T, Duration),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<E: EthSpec> ColumnRequest<E> {
|
impl<I: std::fmt::Display + PartialEq, T> ColumnRequest<I, T> {
|
||||||
fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
status: Status::NotStarted(Instant::now()),
|
status: Status::NotStarted,
|
||||||
download_failures: 0,
|
download_failures: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_awaiting_download(&self) -> Option<Duration> {
|
pub fn is_awaiting_download(&self) -> bool {
|
||||||
match self.status {
|
match self.status {
|
||||||
Status::NotStarted(start_time) => Some(start_time.elapsed()),
|
Status::NotStarted => true,
|
||||||
Status::Downloading { .. } | Status::Downloaded { .. } => None,
|
Status::Downloading { .. } | Status::Downloaded { .. } => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_downloaded(&self) -> bool {
|
pub fn is_downloading(&self) -> bool {
|
||||||
match self.status {
|
match self.status {
|
||||||
Status::NotStarted { .. } | Status::Downloading { .. } => false,
|
Status::NotStarted => false,
|
||||||
|
Status::Downloading { .. } => true,
|
||||||
|
Status::Downloaded { .. } => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_downloaded(&self) -> bool {
|
||||||
|
match self.status {
|
||||||
|
Status::NotStarted | Status::Downloading { .. } => false,
|
||||||
Status::Downloaded { .. } => true,
|
Status::Downloaded { .. } => true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn on_download_start(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> {
|
pub fn too_many_failures(&self) -> Option<RpcResponseError> {
|
||||||
|
if self.download_failures.len() > MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS {
|
||||||
|
Some(
|
||||||
|
self.download_failures
|
||||||
|
.last()
|
||||||
|
.cloned()
|
||||||
|
.expect("download_failures is not empty"),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn on_download_start(&mut self, req_id: I) -> Result<(), Error> {
|
||||||
match &self.status {
|
match &self.status {
|
||||||
Status::NotStarted { .. } => {
|
Status::NotStarted => {
|
||||||
self.status = Status::Downloading(req_id);
|
self.status = Status::Downloading(req_id);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
other => Err(Error::BadState(format!(
|
other => Err(Error::InternalError(format!(
|
||||||
"bad state on_download_start expected NotStarted got {other:?}"
|
"bad state on_download_start expected NotStarted got {}",
|
||||||
|
Into::<&'static str>::into(other),
|
||||||
))),
|
))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn on_download_error(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> {
|
pub fn on_download_error(&mut self, req_id: I) -> Result<(), Error> {
|
||||||
match &self.status {
|
match &self.status {
|
||||||
Status::Downloading(expected_req_id) => {
|
Status::Downloading(expected_req_id) => {
|
||||||
if req_id != *expected_req_id {
|
if req_id != *expected_req_id {
|
||||||
return Err(Error::UnexpectedRequestId {
|
return Err(Error::InternalError(format!(
|
||||||
expected_req_id: *expected_req_id,
|
"Received download result for req_id {req_id} expecting {expected_req_id}"
|
||||||
req_id,
|
)));
|
||||||
});
|
|
||||||
}
|
}
|
||||||
self.status = Status::NotStarted(Instant::now());
|
self.status = Status::NotStarted;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
other => Err(Error::BadState(format!(
|
other => Err(Error::InternalError(format!(
|
||||||
"bad state on_download_error expected Downloading got {other:?}"
|
"bad state on_download_error expected Downloading got {}",
|
||||||
|
Into::<&'static str>::into(other),
|
||||||
))),
|
))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn on_download_error_and_mark_failure(
|
pub fn on_download_error_and_mark_failure(
|
||||||
&mut self,
|
&mut self,
|
||||||
req_id: DataColumnsByRootRequestId,
|
req_id: I,
|
||||||
|
e: RpcResponseError,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
// TODO(das): Should track which peers don't have data
|
self.download_failures.push(e);
|
||||||
self.download_failures += 1;
|
|
||||||
self.on_download_error(req_id)
|
self.on_download_error(req_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn on_download_success(
|
pub fn on_download_success(
|
||||||
&mut self,
|
&mut self,
|
||||||
req_id: DataColumnsByRootRequestId,
|
req_id: I,
|
||||||
peer_id: PeerId,
|
peer_id: PeerId,
|
||||||
data_column: Arc<DataColumnSidecar<E>>,
|
data_column: T,
|
||||||
seen_timestamp: Duration,
|
seen_timestamp: Duration,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
match &self.status {
|
match &self.status {
|
||||||
Status::Downloading(expected_req_id) => {
|
Status::Downloading(expected_req_id) => {
|
||||||
if req_id != *expected_req_id {
|
if req_id != *expected_req_id {
|
||||||
return Err(Error::UnexpectedRequestId {
|
return Err(Error::InternalError(format!(
|
||||||
expected_req_id: *expected_req_id,
|
"Received download result for req_id {req_id} expecting {expected_req_id}"
|
||||||
req_id,
|
)));
|
||||||
});
|
|
||||||
}
|
}
|
||||||
self.status = Status::Downloaded(peer_id, data_column, seen_timestamp);
|
self.status = Status::Downloaded(peer_id, data_column, seen_timestamp);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
other => Err(Error::BadState(format!(
|
other => Err(Error::InternalError(format!(
|
||||||
"bad state on_download_success expected Downloading got {other:?}"
|
"bad state on_download_success expected Downloading got {}",
|
||||||
|
Into::<&'static str>::into(other),
|
||||||
))),
|
))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn complete(self) -> Result<(PeerId, Arc<DataColumnSidecar<E>>, Duration), Error> {
|
pub fn complete(self) -> Result<(PeerId, T, Duration), Error> {
|
||||||
match self.status {
|
match self.status {
|
||||||
Status::Downloaded(peer_id, data_column, seen_timestamp) => {
|
Status::Downloaded(peer_id, data_column, seen_timestamp) => {
|
||||||
Ok((peer_id, data_column, seen_timestamp))
|
Ok((peer_id, data_column, seen_timestamp))
|
||||||
}
|
}
|
||||||
other => Err(Error::BadState(format!(
|
other => Err(Error::InternalError(format!(
|
||||||
"bad state complete expected Downloaded got {other:?}"
|
"bad state complete expected Downloaded got {}",
|
||||||
|
Into::<&'static str>::into(other),
|
||||||
))),
|
))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -26,7 +26,7 @@ mod blocks_by_root;
|
|||||||
mod data_columns_by_range;
|
mod data_columns_by_range;
|
||||||
mod data_columns_by_root;
|
mod data_columns_by_root;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, IntoStaticStr)]
|
#[derive(Debug, Clone, PartialEq, Eq, IntoStaticStr)]
|
||||||
pub enum LookupVerifyError {
|
pub enum LookupVerifyError {
|
||||||
NotEnoughResponsesReturned {
|
NotEnoughResponsesReturned {
|
||||||
actual: usize,
|
actual: usize,
|
||||||
@@ -177,12 +177,10 @@ impl<K: Eq + Hash, T: ActiveRequestItems> ActiveRequests<K, T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn active_requests_of_peer(&self, peer_id: &PeerId) -> Vec<&K> {
|
pub fn active_requests(&self) -> impl Iterator<Item = (&K, &PeerId)> {
|
||||||
self.requests
|
self.requests
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|(_, request)| &request.peer_id == peer_id)
|
.map(|(id, request)| (id, &request.peer_id))
|
||||||
.map(|(id, _)| id)
|
|
||||||
.collect()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn iter_request_peers(&self) -> impl Iterator<Item = PeerId> + '_ {
|
pub fn iter_request_peers(&self) -> impl Iterator<Item = PeerId> + '_ {
|
||||||
|
|||||||
@@ -98,13 +98,13 @@ impl<T: BeaconChainTypes> Sampling<T> {
|
|||||||
// TODO(das): Should track failed sampling request for some time? Otherwise there's
|
// TODO(das): Should track failed sampling request for some time? Otherwise there's
|
||||||
// a risk of a loop with multiple triggers creating the request, then failing,
|
// a risk of a loop with multiple triggers creating the request, then failing,
|
||||||
// and repeat.
|
// and repeat.
|
||||||
debug!(?id, "Ignoring duplicate sampling request");
|
debug!(%id, "Ignoring duplicate sampling request");
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
?id,
|
%id,
|
||||||
column_selection = ?request.column_selection(),
|
column_selection = ?request.column_selection(),
|
||||||
"Created new sample request"
|
"Created new sample request"
|
||||||
);
|
);
|
||||||
@@ -138,7 +138,7 @@ impl<T: BeaconChainTypes> Sampling<T> {
|
|||||||
) -> Option<(SamplingRequester, SamplingResult)> {
|
) -> Option<(SamplingRequester, SamplingResult)> {
|
||||||
let Some(request) = self.requests.get_mut(&id.id) else {
|
let Some(request) = self.requests.get_mut(&id.id) else {
|
||||||
// TOOD(das): This log can happen if the request is error'ed early and dropped
|
// TOOD(das): This log can happen if the request is error'ed early and dropped
|
||||||
debug!(?id, "Sample downloaded event for unknown request");
|
debug!(%id, "Sample downloaded event for unknown request");
|
||||||
return None;
|
return None;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -167,7 +167,7 @@ impl<T: BeaconChainTypes> Sampling<T> {
|
|||||||
) -> Option<(SamplingRequester, SamplingResult)> {
|
) -> Option<(SamplingRequester, SamplingResult)> {
|
||||||
let Some(request) = self.requests.get_mut(&id.id) else {
|
let Some(request) = self.requests.get_mut(&id.id) else {
|
||||||
// TOOD(das): This log can happen if the request is error'ed early and dropped
|
// TOOD(das): This log can happen if the request is error'ed early and dropped
|
||||||
debug!(?id, "Sample verified event for unknown request");
|
debug!(%id, "Sample verified event for unknown request");
|
||||||
return None;
|
return None;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -191,7 +191,7 @@ impl<T: BeaconChainTypes> Sampling<T> {
|
|||||||
) -> Option<(SamplingRequester, SamplingResult)> {
|
) -> Option<(SamplingRequester, SamplingResult)> {
|
||||||
let result = result.transpose();
|
let result = result.transpose();
|
||||||
if let Some(result) = result {
|
if let Some(result) = result {
|
||||||
debug!(?id, ?result, "Sampling request completed, removing");
|
debug!(%id, ?result, "Sampling request completed, removing");
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::SAMPLING_REQUEST_RESULT,
|
&metrics::SAMPLING_REQUEST_RESULT,
|
||||||
&[metrics::from_result(&result)],
|
&[metrics::from_result(&result)],
|
||||||
@@ -570,7 +570,7 @@ impl<T: BeaconChainTypes> ActiveSamplingRequest<T> {
|
|||||||
// Send requests.
|
// Send requests.
|
||||||
let mut sent_request = false;
|
let mut sent_request = false;
|
||||||
for (peer_id, column_indexes) in column_indexes_to_request {
|
for (peer_id, column_indexes) in column_indexes_to_request {
|
||||||
cx.data_column_lookup_request(
|
cx.data_columns_by_root_request(
|
||||||
DataColumnsByRootRequester::Sampling(SamplingId {
|
DataColumnsByRootRequester::Sampling(SamplingId {
|
||||||
id: self.requester_id,
|
id: self.requester_id,
|
||||||
sampling_request_id: self.current_sampling_request_id,
|
sampling_request_id: self.current_sampling_request_id,
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use beacon_chain::block_verification_types::RpcBlock;
|
use beacon_chain::block_verification_types::RpcBlock;
|
||||||
|
use itertools::Itertools;
|
||||||
use lighthouse_network::rpc::methods::BlocksByRangeRequest;
|
use lighthouse_network::rpc::methods::BlocksByRangeRequest;
|
||||||
use lighthouse_network::service::api_types::Id;
|
use lighthouse_network::service::api_types::Id;
|
||||||
use lighthouse_network::PeerId;
|
use lighthouse_network::PeerId;
|
||||||
@@ -17,15 +18,7 @@ const MAX_BATCH_DOWNLOAD_ATTEMPTS: u8 = 5;
|
|||||||
/// after `MAX_BATCH_PROCESSING_ATTEMPTS` times, it is considered faulty.
|
/// after `MAX_BATCH_PROCESSING_ATTEMPTS` times, it is considered faulty.
|
||||||
const MAX_BATCH_PROCESSING_ATTEMPTS: u8 = 3;
|
const MAX_BATCH_PROCESSING_ATTEMPTS: u8 = 3;
|
||||||
|
|
||||||
/// Type of expected batch.
|
// TODO(das): Consider merging with PeerGroup
|
||||||
#[derive(Debug, Copy, Clone, Display)]
|
|
||||||
#[strum(serialize_all = "snake_case")]
|
|
||||||
pub enum ByRangeRequestType {
|
|
||||||
BlocksAndColumns,
|
|
||||||
BlocksAndBlobs,
|
|
||||||
Blocks,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct BatchPeers {
|
pub struct BatchPeers {
|
||||||
block_peer: PeerId,
|
block_peer: PeerId,
|
||||||
@@ -53,6 +46,12 @@ impl BatchPeers {
|
|||||||
pub fn column(&self, index: &ColumnIndex) -> Option<&PeerId> {
|
pub fn column(&self, index: &ColumnIndex) -> Option<&PeerId> {
|
||||||
self.column_peers.get(index)
|
self.column_peers.get(index)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn iter_unique_peers(&self) -> impl Iterator<Item = &PeerId> {
|
||||||
|
std::iter::once(&self.block_peer)
|
||||||
|
.chain(self.column_peers.values())
|
||||||
|
.unique()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Allows customisation of the above constants used in other sync methods such as BackFillSync.
|
/// Allows customisation of the above constants used in other sync methods such as BackFillSync.
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ use itertools::Itertools;
|
|||||||
use lighthouse_network::service::api_types::Id;
|
use lighthouse_network::service::api_types::Id;
|
||||||
use lighthouse_network::{PeerAction, PeerId};
|
use lighthouse_network::{PeerAction, PeerId};
|
||||||
use logging::crit;
|
use logging::crit;
|
||||||
use std::collections::{btree_map::Entry, BTreeMap, HashSet};
|
use std::collections::{btree_map::Entry, BTreeMap, HashMap, HashSet};
|
||||||
use strum::IntoStaticStr;
|
use strum::IntoStaticStr;
|
||||||
use tracing::{debug, instrument, warn};
|
use tracing::{debug, instrument, warn};
|
||||||
use types::{Epoch, EthSpec, Hash256, Slot};
|
use types::{Epoch, EthSpec, Hash256, Slot};
|
||||||
@@ -87,9 +87,11 @@ pub struct SyncingChain<T: BeaconChainTypes> {
|
|||||||
batches: BTreeMap<BatchId, BatchInfo<T::EthSpec>>,
|
batches: BTreeMap<BatchId, BatchInfo<T::EthSpec>>,
|
||||||
|
|
||||||
/// The peers that agree on the `target_head_slot` and `target_head_root` as a canonical chain
|
/// The peers that agree on the `target_head_slot` and `target_head_root` as a canonical chain
|
||||||
/// and thus available to download this chain from, as well as the batches we are currently
|
/// and thus available to download this chain from.
|
||||||
/// requesting.
|
///
|
||||||
peers: HashSet<PeerId>,
|
/// Also, For each peer tracks the total requests done per peer as part of this SyncingChain
|
||||||
|
/// `HashMap<peer, total_requests_per_peer>`
|
||||||
|
peers: HashMap<PeerId, usize>,
|
||||||
|
|
||||||
/// Starting epoch of the next batch that needs to be downloaded.
|
/// Starting epoch of the next batch that needs to be downloaded.
|
||||||
to_be_downloaded: BatchId,
|
to_be_downloaded: BatchId,
|
||||||
@@ -121,7 +123,40 @@ pub enum ChainSyncingState {
|
|||||||
Syncing,
|
Syncing,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
#[derive(Debug, Eq, PartialEq)]
|
||||||
|
pub enum BatchStateSummary {
|
||||||
|
Downloading,
|
||||||
|
Processing,
|
||||||
|
AwaitingProcessing,
|
||||||
|
AwaitingValidation,
|
||||||
|
Unexpected(&'static str),
|
||||||
|
}
|
||||||
|
|
||||||
impl<T: BeaconChainTypes> SyncingChain<T> {
|
impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||||
|
/// Returns a summary of batch states for assertions in tests.
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn batches_state(&self) -> Vec<(BatchId, BatchStateSummary)> {
|
||||||
|
self.batches
|
||||||
|
.iter()
|
||||||
|
.map(|(id, batch)| {
|
||||||
|
let state = match batch.state() {
|
||||||
|
// A batch is never left in this state, it's only the initial value
|
||||||
|
BatchState::AwaitingDownload => {
|
||||||
|
BatchStateSummary::Unexpected("AwaitingDownload")
|
||||||
|
}
|
||||||
|
BatchState::Downloading { .. } => BatchStateSummary::Downloading,
|
||||||
|
BatchState::AwaitingProcessing { .. } => BatchStateSummary::AwaitingProcessing,
|
||||||
|
BatchState::Poisoned => BatchStateSummary::Unexpected("Poisoned"),
|
||||||
|
BatchState::Processing { .. } => BatchStateSummary::Processing,
|
||||||
|
BatchState::Failed => BatchStateSummary::Unexpected("Failed"),
|
||||||
|
BatchState::AwaitingValidation { .. } => BatchStateSummary::AwaitingValidation,
|
||||||
|
};
|
||||||
|
(*id, state)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn new(
|
pub fn new(
|
||||||
id: Id,
|
id: Id,
|
||||||
@@ -138,7 +173,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
target_head_slot,
|
target_head_slot,
|
||||||
target_head_root,
|
target_head_root,
|
||||||
batches: BTreeMap::new(),
|
batches: BTreeMap::new(),
|
||||||
peers: HashSet::from_iter([peer_id]),
|
peers: HashMap::from_iter([(peer_id, <_>::default())]),
|
||||||
to_be_downloaded: start_epoch,
|
to_be_downloaded: start_epoch,
|
||||||
processing_target: start_epoch,
|
processing_target: start_epoch,
|
||||||
optimistic_start: None,
|
optimistic_start: None,
|
||||||
@@ -168,7 +203,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
/// Peers currently syncing this chain.
|
/// Peers currently syncing this chain.
|
||||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||||
pub fn peers(&self) -> impl Iterator<Item = PeerId> + '_ {
|
pub fn peers(&self) -> impl Iterator<Item = PeerId> + '_ {
|
||||||
self.peers.iter().cloned()
|
self.peers.keys().cloned()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Progress in epochs made by the chain
|
/// Progress in epochs made by the chain
|
||||||
@@ -221,6 +256,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
request_id: Id,
|
request_id: Id,
|
||||||
blocks: Vec<RpcBlock<T::EthSpec>>,
|
blocks: Vec<RpcBlock<T::EthSpec>>,
|
||||||
) -> ProcessingResult {
|
) -> ProcessingResult {
|
||||||
|
// Account for one more requests to this peer
|
||||||
|
// TODO(das): this code assumes that we do a single request per peer per RpcBlock
|
||||||
|
for peer in batch_peers.iter_unique_peers() {
|
||||||
|
*self.peers.entry(*peer).or_default() += 1;
|
||||||
|
}
|
||||||
|
|
||||||
// check if we have this batch
|
// check if we have this batch
|
||||||
let batch = match self.batches.get_mut(&batch_id) {
|
let batch = match self.batches.get_mut(&batch_id) {
|
||||||
None => {
|
None => {
|
||||||
@@ -400,11 +441,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
self.request_batches(network)?;
|
self.request_batches(network)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if !self.good_peers_on_sampling_subnets(self.processing_target, network) {
|
|
||||||
// This is to handle the case where no batch was sent for the current processing
|
|
||||||
// target when there is no sampling peers available. This is a valid state and should not
|
|
||||||
// return an error.
|
|
||||||
return Ok(KeepChain);
|
|
||||||
} else {
|
} else {
|
||||||
return Err(RemoveChain::WrongChainState(format!(
|
return Err(RemoveChain::WrongChainState(format!(
|
||||||
"Batch not found for current processing target {}",
|
"Batch not found for current processing target {}",
|
||||||
@@ -577,7 +613,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
"Batch failed to download. Dropping chain scoring peers"
|
"Batch failed to download. Dropping chain scoring peers"
|
||||||
);
|
);
|
||||||
|
|
||||||
for peer in self.peers.drain() {
|
for (peer, _) in self.peers.drain() {
|
||||||
network.report_peer(peer, penalty, "faulty_chain");
|
network.report_peer(peer, penalty, "faulty_chain");
|
||||||
}
|
}
|
||||||
Err(RemoveChain::ChainFailed {
|
Err(RemoveChain::ChainFailed {
|
||||||
@@ -842,7 +878,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
network: &mut SyncNetworkContext<T>,
|
network: &mut SyncNetworkContext<T>,
|
||||||
peer_id: PeerId,
|
peer_id: PeerId,
|
||||||
) -> ProcessingResult {
|
) -> ProcessingResult {
|
||||||
self.peers.insert(peer_id);
|
self.peers.insert(peer_id, <_>::default());
|
||||||
self.request_batches(network)
|
self.request_batches(network)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -854,7 +890,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
network: &mut SyncNetworkContext<T>,
|
network: &mut SyncNetworkContext<T>,
|
||||||
batch_id: BatchId,
|
batch_id: BatchId,
|
||||||
peer_id: &PeerId,
|
|
||||||
request_id: Id,
|
request_id: Id,
|
||||||
err: RpcResponseError,
|
err: RpcResponseError,
|
||||||
) -> ProcessingResult {
|
) -> ProcessingResult {
|
||||||
@@ -869,7 +904,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
debug!(
|
debug!(
|
||||||
batch_epoch = %batch_id,
|
batch_epoch = %batch_id,
|
||||||
batch_state = ?batch.state(),
|
batch_state = ?batch.state(),
|
||||||
%peer_id,
|
|
||||||
%request_id,
|
%request_id,
|
||||||
?batch_state,
|
?batch_state,
|
||||||
"Batch not expecting block"
|
"Batch not expecting block"
|
||||||
@@ -880,12 +914,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
batch_epoch = %batch_id,
|
batch_epoch = %batch_id,
|
||||||
batch_state = ?batch.state(),
|
batch_state = ?batch.state(),
|
||||||
error = ?err,
|
error = ?err,
|
||||||
%peer_id,
|
|
||||||
%request_id,
|
%request_id,
|
||||||
"Batch download error"
|
"Batch download error"
|
||||||
);
|
);
|
||||||
if let BatchOperationOutcome::Failed { blacklist } =
|
if let BatchOperationOutcome::Failed { blacklist } =
|
||||||
batch.download_failed(Some(*peer_id))?
|
// TODO(das): Is it necessary for the batch to track failed peers? Can we make this
|
||||||
|
// mechanism compatible with PeerDAS and before PeerDAS?
|
||||||
|
batch.download_failed(None)?
|
||||||
{
|
{
|
||||||
return Err(RemoveChain::ChainFailed {
|
return Err(RemoveChain::ChainFailed {
|
||||||
blacklist,
|
blacklist,
|
||||||
@@ -896,7 +931,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
} else {
|
} else {
|
||||||
debug!(
|
debug!(
|
||||||
batch_epoch = %batch_id,
|
batch_epoch = %batch_id,
|
||||||
%peer_id,
|
|
||||||
%request_id,
|
%request_id,
|
||||||
batch_state,
|
batch_state,
|
||||||
"Batch not found"
|
"Batch not found"
|
||||||
@@ -937,6 +971,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
},
|
},
|
||||||
&synced_peers,
|
&synced_peers,
|
||||||
&failed_peers,
|
&failed_peers,
|
||||||
|
&self.peers,
|
||||||
) {
|
) {
|
||||||
Ok(request_id) => {
|
Ok(request_id) => {
|
||||||
// inform the batch about the new request
|
// inform the batch about the new request
|
||||||
@@ -953,14 +988,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
return Ok(KeepChain);
|
return Ok(KeepChain);
|
||||||
}
|
}
|
||||||
Err(e) => match e {
|
Err(e) => match e {
|
||||||
// TODO(das): Handle the NoPeer case explicitly and don't drop the batch. For
|
RpcRequestSendError::InternalError(e) => {
|
||||||
// sync to work properly it must be okay to have "stalled" batches in
|
|
||||||
// AwaitingDownload state. Currently it will error with invalid state if
|
|
||||||
// that happens. Sync manager must periodicatlly prune stalled batches like
|
|
||||||
// we do for lookup sync. Then we can deprecate the redundant
|
|
||||||
// `good_peers_on_sampling_subnets` checks.
|
|
||||||
e
|
|
||||||
@ (RpcRequestSendError::NoPeer(_) | RpcRequestSendError::InternalError(_)) => {
|
|
||||||
// NOTE: under normal conditions this shouldn't happen but we handle it anyway
|
// NOTE: under normal conditions this shouldn't happen but we handle it anyway
|
||||||
warn!(%batch_id, error = ?e, "batch_id" = %batch_id, %batch, "Could not send batch request");
|
warn!(%batch_id, error = ?e, "batch_id" = %batch_id, %batch, "Could not send batch request");
|
||||||
// register the failed download and check if the batch can be retried
|
// register the failed download and check if the batch can be retried
|
||||||
@@ -1019,11 +1047,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
// check if we have the batch for our optimistic start. If not, request it first.
|
// check if we have the batch for our optimistic start. If not, request it first.
|
||||||
// We wait for this batch before requesting any other batches.
|
// We wait for this batch before requesting any other batches.
|
||||||
if let Some(epoch) = self.optimistic_start {
|
if let Some(epoch) = self.optimistic_start {
|
||||||
if !self.good_peers_on_sampling_subnets(epoch, network) {
|
|
||||||
debug!("Waiting for peers to be available on sampling column subnets");
|
|
||||||
return Ok(KeepChain);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Entry::Vacant(entry) = self.batches.entry(epoch) {
|
if let Entry::Vacant(entry) = self.batches.entry(epoch) {
|
||||||
let optimistic_batch = BatchInfo::new(&epoch, EPOCHS_PER_BATCH);
|
let optimistic_batch = BatchInfo::new(&epoch, EPOCHS_PER_BATCH);
|
||||||
entry.insert(optimistic_batch);
|
entry.insert(optimistic_batch);
|
||||||
@@ -1046,35 +1069,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
Ok(KeepChain)
|
Ok(KeepChain)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks all sampling column subnets for peers. Returns `true` if there is at least one peer in
|
|
||||||
/// every sampling column subnet.
|
|
||||||
fn good_peers_on_sampling_subnets(
|
|
||||||
&self,
|
|
||||||
epoch: Epoch,
|
|
||||||
network: &SyncNetworkContext<T>,
|
|
||||||
) -> bool {
|
|
||||||
if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) {
|
|
||||||
// Require peers on all sampling column subnets before sending batches
|
|
||||||
let peers_on_all_custody_subnets = network
|
|
||||||
.network_globals()
|
|
||||||
.sampling_subnets
|
|
||||||
.iter()
|
|
||||||
.all(|subnet_id| {
|
|
||||||
let peer_count = network
|
|
||||||
.network_globals()
|
|
||||||
.peers
|
|
||||||
.read()
|
|
||||||
.good_custody_subnet_peer(*subnet_id)
|
|
||||||
.count();
|
|
||||||
|
|
||||||
peer_count > 0
|
|
||||||
});
|
|
||||||
peers_on_all_custody_subnets
|
|
||||||
} else {
|
|
||||||
true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Creates the next required batch from the chain. If there are no more batches required,
|
/// Creates the next required batch from the chain. If there are no more batches required,
|
||||||
/// `false` is returned.
|
/// `false` is returned.
|
||||||
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
#[instrument(parent = None,level = "info", fields(chain = self.id , service = "range_sync"), skip_all)]
|
||||||
@@ -1107,15 +1101,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
// don't send batch requests until we have peers on sampling subnets
|
|
||||||
// TODO(das): this is a workaround to avoid sending out excessive block requests because
|
|
||||||
// block and data column requests are currently coupled. This can be removed once we find a
|
|
||||||
// way to decouple the requests and do retries individually, see issue #6258.
|
|
||||||
if !self.good_peers_on_sampling_subnets(self.to_be_downloaded, network) {
|
|
||||||
debug!("Waiting for peers to be available on custody column subnets");
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no batch needs a retry, attempt to send the batch of the next epoch to download
|
// If no batch needs a retry, attempt to send the batch of the next epoch to download
|
||||||
let next_batch_id = self.to_be_downloaded;
|
let next_batch_id = self.to_be_downloaded;
|
||||||
// this batch could have been included already being an optimistic batch
|
// this batch could have been included already being an optimistic batch
|
||||||
|
|||||||
@@ -54,6 +54,13 @@ pub struct ChainCollection<T: BeaconChainTypes> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T: BeaconChainTypes> ChainCollection<T> {
|
impl<T: BeaconChainTypes> ChainCollection<T> {
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) fn iter(&self) -> impl Iterator<Item = &SyncingChain<T>> {
|
||||||
|
self.finalized_chains
|
||||||
|
.values()
|
||||||
|
.chain(self.head_chains.values())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn new(beacon_chain: Arc<BeaconChain<T>>) -> Self {
|
pub fn new(beacon_chain: Arc<BeaconChain<T>>) -> Self {
|
||||||
ChainCollection {
|
ChainCollection {
|
||||||
beacon_chain,
|
beacon_chain,
|
||||||
|
|||||||
@@ -9,10 +9,9 @@ mod sync_type;
|
|||||||
|
|
||||||
pub use batch::{
|
pub use batch::{
|
||||||
BatchConfig, BatchInfo, BatchOperationOutcome, BatchPeers, BatchProcessingResult, BatchState,
|
BatchConfig, BatchInfo, BatchOperationOutcome, BatchPeers, BatchProcessingResult, BatchState,
|
||||||
ByRangeRequestType,
|
|
||||||
};
|
};
|
||||||
pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub use chain_collection::SyncChainStatus;
|
pub use chain::BatchStateSummary;
|
||||||
|
pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
|
||||||
pub use range::RangeSync;
|
pub use range::RangeSync;
|
||||||
pub use sync_type::RangeSyncType;
|
pub use sync_type::RangeSyncType;
|
||||||
|
|||||||
@@ -39,6 +39,8 @@
|
|||||||
//! Each chain is downloaded in batches of blocks. The batched blocks are processed sequentially
|
//! Each chain is downloaded in batches of blocks. The batched blocks are processed sequentially
|
||||||
//! and further batches are requested as current blocks are being processed.
|
//! and further batches are requested as current blocks are being processed.
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
use super::chain::BatchStateSummary;
|
||||||
use super::chain::{BatchId, ChainId, RemoveChain, SyncingChain};
|
use super::chain::{BatchId, ChainId, RemoveChain, SyncingChain};
|
||||||
use super::chain_collection::{ChainCollection, SyncChainStatus};
|
use super::chain_collection::{ChainCollection, SyncChainStatus};
|
||||||
use super::sync_type::RangeSyncType;
|
use super::sync_type::RangeSyncType;
|
||||||
@@ -100,10 +102,23 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) fn __failed_chains(&mut self) -> Vec<Hash256> {
|
pub(crate) fn failed_chains(&mut self) -> Vec<Hash256> {
|
||||||
self.failed_chains.keys().copied().collect()
|
self.failed_chains.keys().copied().collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) fn batches_state(&self) -> Vec<(ChainId, BatchId, BatchStateSummary)> {
|
||||||
|
self.chains
|
||||||
|
.iter()
|
||||||
|
.flat_map(|chain| {
|
||||||
|
chain
|
||||||
|
.batches_state()
|
||||||
|
.into_iter()
|
||||||
|
.map(|(batch_id, state)| (chain.id(), batch_id, state))
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
#[instrument(parent = None,
|
#[instrument(parent = None,
|
||||||
level = "info",
|
level = "info",
|
||||||
fields(component = "range_sync"),
|
fields(component = "range_sync"),
|
||||||
@@ -344,7 +359,6 @@ where
|
|||||||
pub fn inject_error(
|
pub fn inject_error(
|
||||||
&mut self,
|
&mut self,
|
||||||
network: &mut SyncNetworkContext<T>,
|
network: &mut SyncNetworkContext<T>,
|
||||||
peer_id: PeerId,
|
|
||||||
batch_id: BatchId,
|
batch_id: BatchId,
|
||||||
chain_id: ChainId,
|
chain_id: ChainId,
|
||||||
request_id: Id,
|
request_id: Id,
|
||||||
@@ -352,7 +366,7 @@ where
|
|||||||
) {
|
) {
|
||||||
// check that this request is pending
|
// check that this request is pending
|
||||||
match self.chains.call_by_id(chain_id, |chain| {
|
match self.chains.call_by_id(chain_id, |chain| {
|
||||||
chain.inject_error(network, batch_id, &peer_id, request_id, err)
|
chain.inject_error(network, batch_id, request_id, err)
|
||||||
}) {
|
}) {
|
||||||
Ok((removed_chain, sync_type)) => {
|
Ok((removed_chain, sync_type)) => {
|
||||||
if let Some((removed_chain, remove_reason)) = removed_chain {
|
if let Some((removed_chain, remove_reason)) = removed_chain {
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ use lighthouse_network::{
|
|||||||
SamplingRequester, SingleLookupReqId, SyncRequestId,
|
SamplingRequester, SingleLookupReqId, SyncRequestId,
|
||||||
},
|
},
|
||||||
types::SyncState,
|
types::SyncState,
|
||||||
NetworkConfig, NetworkGlobals, PeerId,
|
NetworkConfig, NetworkGlobals, PeerId, SyncInfo,
|
||||||
};
|
};
|
||||||
use slot_clock::{SlotClock, TestingSlotClock};
|
use slot_clock::{SlotClock, TestingSlotClock};
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
@@ -53,8 +53,21 @@ const SAMPLING_REQUIRED_SUCCESSES: usize = 2;
|
|||||||
type DCByRootIds = Vec<DCByRootId>;
|
type DCByRootIds = Vec<DCByRootId>;
|
||||||
type DCByRootId = (SyncRequestId, Vec<ColumnIndex>);
|
type DCByRootId = (SyncRequestId, Vec<ColumnIndex>);
|
||||||
|
|
||||||
|
pub enum PeersConfig {
|
||||||
|
SupernodeAndRandom,
|
||||||
|
SupernodeOnly,
|
||||||
|
}
|
||||||
|
|
||||||
impl TestRig {
|
impl TestRig {
|
||||||
pub fn test_setup() -> Self {
|
pub fn test_setup() -> Self {
|
||||||
|
Self::test_setup_with_options(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn test_setup_as_supernode() -> Self {
|
||||||
|
Self::test_setup_with_options(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_setup_with_options(is_supernode: bool) -> Self {
|
||||||
// Use `fork_from_env` logic to set correct fork epochs
|
// Use `fork_from_env` logic to set correct fork epochs
|
||||||
let spec = test_spec::<E>();
|
let spec = test_spec::<E>();
|
||||||
|
|
||||||
@@ -83,10 +96,11 @@ impl TestRig {
|
|||||||
// TODO(das): make the generation of the ENR use the deterministic rng to have consistent
|
// TODO(das): make the generation of the ENR use the deterministic rng to have consistent
|
||||||
// column assignments
|
// column assignments
|
||||||
let network_config = Arc::new(NetworkConfig::default());
|
let network_config = Arc::new(NetworkConfig::default());
|
||||||
let globals = Arc::new(NetworkGlobals::new_test_globals(
|
let globals = Arc::new(NetworkGlobals::new_test_globals_as_supernode(
|
||||||
Vec::new(),
|
Vec::new(),
|
||||||
network_config,
|
network_config,
|
||||||
chain.spec.clone(),
|
chain.spec.clone(),
|
||||||
|
is_supernode,
|
||||||
));
|
));
|
||||||
let (beacon_processor, beacon_processor_rx) = NetworkBeaconProcessor::null_for_testing(
|
let (beacon_processor, beacon_processor_rx) = NetworkBeaconProcessor::null_for_testing(
|
||||||
globals,
|
globals,
|
||||||
@@ -113,6 +127,7 @@ impl TestRig {
|
|||||||
network_rx,
|
network_rx,
|
||||||
network_rx_queue: vec![],
|
network_rx_queue: vec![],
|
||||||
sync_rx,
|
sync_rx,
|
||||||
|
sent_blocks_by_range: <_>::default(),
|
||||||
rng,
|
rng,
|
||||||
network_globals: beacon_processor.network_globals.clone(),
|
network_globals: beacon_processor.network_globals.clone(),
|
||||||
sync_manager: SyncManager::new(
|
sync_manager: SyncManager::new(
|
||||||
@@ -244,8 +259,8 @@ impl TestRig {
|
|||||||
self.sync_manager.active_parent_lookups().len()
|
self.sync_manager.active_parent_lookups().len()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn active_range_sync_chain(&self) -> (RangeSyncType, Slot, Slot) {
|
fn active_range_sync_chain(&mut self) -> (RangeSyncType, Slot, Slot) {
|
||||||
self.sync_manager.get_range_sync_chains().unwrap().unwrap()
|
self.sync_manager.range_sync().state().unwrap().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn assert_single_lookups_count(&self, count: usize) {
|
fn assert_single_lookups_count(&self, count: usize) {
|
||||||
@@ -355,29 +370,63 @@ impl TestRig {
|
|||||||
self.expect_empty_network();
|
self.expect_empty_network();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_connected_peer(&mut self) -> PeerId {
|
// Don't make pub, use `add_connected_peer_testing_only`
|
||||||
|
fn new_connected_peer(&mut self) -> PeerId {
|
||||||
|
self.add_connected_peer_testing_only(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't make pub, use `add_connected_peer_testing_only`
|
||||||
|
fn new_connected_supernode_peer(&mut self) -> PeerId {
|
||||||
|
self.add_connected_peer_testing_only(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_connected_peer_testing_only(&mut self, supernode: bool) -> PeerId {
|
||||||
let key = self.determinstic_key();
|
let key = self.determinstic_key();
|
||||||
let peer_id = self
|
let peer_id = self
|
||||||
.network_globals
|
.network_globals
|
||||||
.peers
|
.peers
|
||||||
.write()
|
.write()
|
||||||
.__add_connected_peer_testing_only(false, &self.harness.spec, key);
|
.__add_connected_peer_testing_only(supernode, &self.harness.spec, key);
|
||||||
self.log(&format!("Added new peer for testing {peer_id:?}"));
|
let mut peer_custody_subnets = self
|
||||||
|
.network_globals
|
||||||
|
.peers
|
||||||
|
.read()
|
||||||
|
.peer_info(&peer_id)
|
||||||
|
.expect("peer was just added")
|
||||||
|
.custody_subnets_iter()
|
||||||
|
.map(|subnet| **subnet)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
peer_custody_subnets.sort_unstable();
|
||||||
|
self.log(&format!(
|
||||||
|
"Added new peer for testing {peer_id:?} custody subnets {peer_custody_subnets:?}"
|
||||||
|
));
|
||||||
peer_id
|
peer_id
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_connected_supernode_peer(&mut self) -> PeerId {
|
pub fn add_sync_peer(&mut self, supernode: bool, remote_info: SyncInfo) -> PeerId {
|
||||||
let key = self.determinstic_key();
|
let peer_id = self.add_connected_peer_testing_only(supernode);
|
||||||
self.network_globals
|
self.send_sync_message(SyncMessage::AddPeer(peer_id, remote_info));
|
||||||
.peers
|
peer_id
|
||||||
.write()
|
|
||||||
.__add_connected_peer_testing_only(true, &self.harness.spec, key)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn determinstic_key(&mut self) -> CombinedKey {
|
fn determinstic_key(&mut self) -> CombinedKey {
|
||||||
k256::ecdsa::SigningKey::random(&mut self.rng).into()
|
k256::ecdsa::SigningKey::random(&mut self.rng).into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn add_sync_peers(&mut self, config: PeersConfig, remote_info: SyncInfo) {
|
||||||
|
match config {
|
||||||
|
PeersConfig::SupernodeAndRandom => {
|
||||||
|
for _ in 0..100 {
|
||||||
|
self.add_sync_peer(false, remote_info.clone());
|
||||||
|
}
|
||||||
|
self.add_sync_peer(true, remote_info);
|
||||||
|
}
|
||||||
|
PeersConfig::SupernodeOnly => {
|
||||||
|
self.add_sync_peer(true, remote_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn new_connected_peers_for_peerdas(&mut self) {
|
pub fn new_connected_peers_for_peerdas(&mut self) {
|
||||||
// Enough sampling peers with few columns
|
// Enough sampling peers with few columns
|
||||||
for _ in 0..100 {
|
for _ in 0..100 {
|
||||||
@@ -840,6 +889,19 @@ impl TestRig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Find, not pop
|
||||||
|
pub fn filter_received_network_events<T, F: Fn(&NetworkMessage<E>) -> Option<T>>(
|
||||||
|
&mut self,
|
||||||
|
predicate_transform: F,
|
||||||
|
) -> Vec<T> {
|
||||||
|
self.drain_network_rx();
|
||||||
|
|
||||||
|
self.network_rx_queue
|
||||||
|
.iter()
|
||||||
|
.filter_map(predicate_transform)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn pop_received_processor_event<T, F: Fn(&WorkEvent<E>) -> Option<T>>(
|
pub fn pop_received_processor_event<T, F: Fn(&WorkEvent<E>) -> Option<T>>(
|
||||||
&mut self,
|
&mut self,
|
||||||
predicate_transform: F,
|
predicate_transform: F,
|
||||||
@@ -1088,6 +1150,21 @@ impl TestRig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn expect_no_penalty_for_anyone(&mut self) {
|
||||||
|
self.drain_network_rx();
|
||||||
|
let downscore_events = self
|
||||||
|
.network_rx_queue
|
||||||
|
.iter()
|
||||||
|
.filter_map(|ev| match ev {
|
||||||
|
NetworkMessage::ReportPeer { peer_id, msg, .. } => Some((peer_id, msg)),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
if !downscore_events.is_empty() {
|
||||||
|
panic!("Expected no downscoring events but found: {downscore_events:?}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
fn expect_parent_chain_process(&mut self) {
|
fn expect_parent_chain_process(&mut self) {
|
||||||
match self.beacon_processor_rx.try_recv() {
|
match self.beacon_processor_rx.try_recv() {
|
||||||
@@ -1123,6 +1200,25 @@ impl TestRig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[track_caller]
|
||||||
|
pub fn expect_penalties(&mut self, expected_penalty_msg: &'static str) {
|
||||||
|
let all_penalties = self.filter_received_network_events(|ev| match ev {
|
||||||
|
NetworkMessage::ReportPeer { peer_id, msg, .. } => Some((*peer_id, *msg)),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
if all_penalties
|
||||||
|
.iter()
|
||||||
|
.any(|(_, msg)| *msg != expected_penalty_msg)
|
||||||
|
{
|
||||||
|
panic!(
|
||||||
|
"Expected penalties only of {expected_penalty_msg}, but found {all_penalties:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
self.log(&format!(
|
||||||
|
"Found expected penalties {expected_penalty_msg}: {all_penalties:?}"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
pub fn expect_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) {
|
pub fn expect_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) {
|
||||||
let penalty_msg = self
|
let penalty_msg = self
|
||||||
|
|||||||
@@ -6,13 +6,17 @@ use beacon_chain::builder::Witness;
|
|||||||
use beacon_chain::eth1_chain::CachingEth1Backend;
|
use beacon_chain::eth1_chain::CachingEth1Backend;
|
||||||
use beacon_chain::test_utils::{BeaconChainHarness, EphemeralHarnessType};
|
use beacon_chain::test_utils::{BeaconChainHarness, EphemeralHarnessType};
|
||||||
use beacon_processor::WorkEvent;
|
use beacon_processor::WorkEvent;
|
||||||
|
use lighthouse_network::service::api_types::ComponentsByRangeRequestId;
|
||||||
use lighthouse_network::NetworkGlobals;
|
use lighthouse_network::NetworkGlobals;
|
||||||
use rand_chacha::ChaCha20Rng;
|
use rand_chacha::ChaCha20Rng;
|
||||||
use slot_clock::ManualSlotClock;
|
use slot_clock::ManualSlotClock;
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use store::MemoryStore;
|
use store::MemoryStore;
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use types::{ChainSpec, ForkName, MinimalEthSpec as E};
|
use types::{ChainSpec, ForkName, MinimalEthSpec as E, SignedBeaconBlock};
|
||||||
|
|
||||||
|
pub use lookups::PeersConfig;
|
||||||
|
|
||||||
mod lookups;
|
mod lookups;
|
||||||
mod range;
|
mod range;
|
||||||
@@ -64,4 +68,7 @@ struct TestRig {
|
|||||||
rng: ChaCha20Rng,
|
rng: ChaCha20Rng,
|
||||||
fork_name: ForkName,
|
fork_name: ForkName,
|
||||||
spec: Arc<ChainSpec>,
|
spec: Arc<ChainSpec>,
|
||||||
|
|
||||||
|
// Cache of sent blocks for PeerDAS responses
|
||||||
|
sent_blocks_by_range: HashMap<ComponentsByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -321,6 +321,10 @@ impl<E: EthSpec, Payload: AbstractExecPayload<E>> SignedBeaconBlock<E, Payload>
|
|||||||
.unwrap_or(0)
|
.unwrap_or(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn has_data(&self) -> bool {
|
||||||
|
self.num_expected_blobs() > 0
|
||||||
|
}
|
||||||
|
|
||||||
/// Used for displaying commitments in logs.
|
/// Used for displaying commitments in logs.
|
||||||
pub fn commitments_formatted(&self) -> String {
|
pub fn commitments_formatted(&self) -> String {
|
||||||
let Ok(commitments) = self.message().body().blob_kzg_commitments() else {
|
let Ok(commitments) = self.message().body().blob_kzg_commitments() else {
|
||||||
|
|||||||
Reference in New Issue
Block a user