mirror of
https://github.com/sigp/lighthouse.git
synced 2026-05-31 05:07:12 +00:00
Sync peer attribution (#7733)
Which issue # does this PR address? Closes #7604 Improvements to range sync including: 1. Contain column requests only to peers that are part of the SyncingChain 2. Attribute the fault to the correct peer and downscore them if they don't return the data columns for the request 3. Improve sync performance by retrying only the failed columns from other peers instead of failing the entire batch 4. Uses the earliest_available_slot to make requests to peers that claim to have the epoch. Note: if no earliest_available_slot info is available, fallback to using previous logic i.e. assume peer has everything backfilled upto WS checkpoint/da boundary Tested this on fusaka-devnet-2 with a full node and supernode and the recovering logic seems to works well. Also tested this a little on mainnet. Need to do more testing and possibly add some unit tests.
This commit is contained in:
@@ -1,15 +1,17 @@
|
||||
use beacon_chain::{
|
||||
block_verification_types::RpcBlock, data_column_verification::CustodyDataColumn, get_block_root,
|
||||
};
|
||||
use lighthouse_network::service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, DataColumnsByRangeRequestId,
|
||||
use lighthouse_network::{
|
||||
service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, DataColumnsByRangeRequestId,
|
||||
},
|
||||
PeerAction, PeerId,
|
||||
};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use types::{
|
||||
BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec,
|
||||
Hash256, RuntimeVariableList, SignedBeaconBlock,
|
||||
};
|
||||
|
||||
pub struct RangeBlockComponentsRequest<E: EthSpec> {
|
||||
/// Blocks we have received awaiting for their corresponding sidecar.
|
||||
blocks_request: ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
|
||||
@@ -30,24 +32,38 @@ enum RangeBlockDataRequest<E: EthSpec> {
|
||||
DataColumnsByRangeRequestId,
|
||||
ByRangeRequest<DataColumnsByRangeRequestId, DataColumnSidecarList<E>>,
|
||||
>,
|
||||
/// The column indices corresponding to the request
|
||||
column_peers: HashMap<DataColumnsByRangeRequestId, Vec<ColumnIndex>>,
|
||||
expected_custody_columns: Vec<ColumnIndex>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CouplingError {
|
||||
pub(crate) msg: String,
|
||||
pub(crate) column_and_peer: Option<(Vec<(ColumnIndex, PeerId)>, PeerAction)>,
|
||||
}
|
||||
|
||||
impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn new(
|
||||
blocks_req_id: BlocksByRangeRequestId,
|
||||
blobs_req_id: Option<BlobsByRangeRequestId>,
|
||||
data_columns: Option<(Vec<DataColumnsByRangeRequestId>, Vec<ColumnIndex>)>,
|
||||
data_columns: Option<(
|
||||
Vec<(DataColumnsByRangeRequestId, Vec<ColumnIndex>)>,
|
||||
Vec<ColumnIndex>,
|
||||
)>,
|
||||
) -> Self {
|
||||
let block_data_request = if let Some(blobs_req_id) = blobs_req_id {
|
||||
RangeBlockDataRequest::Blobs(ByRangeRequest::Active(blobs_req_id))
|
||||
} else if let Some((requests, expected_custody_columns)) = data_columns {
|
||||
let column_peers: HashMap<_, _> = requests.into_iter().collect();
|
||||
RangeBlockDataRequest::DataColumns {
|
||||
requests: requests
|
||||
.into_iter()
|
||||
.map(|id| (id, ByRangeRequest::Active(id)))
|
||||
requests: column_peers
|
||||
.keys()
|
||||
.map(|id| (*id, ByRangeRequest::Active(*id)))
|
||||
.collect(),
|
||||
column_peers,
|
||||
expected_custody_columns,
|
||||
}
|
||||
} else {
|
||||
@@ -60,6 +76,28 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Modifies `self` by inserting a new `DataColumnsByRangeRequestId` for a formerly failed
|
||||
/// request for some columns.
|
||||
pub fn reinsert_failed_column_requests(
|
||||
&mut self,
|
||||
failed_column_requests: Vec<(DataColumnsByRangeRequestId, Vec<u64>)>,
|
||||
) -> Result<(), String> {
|
||||
match &mut self.block_data_request {
|
||||
RangeBlockDataRequest::DataColumns {
|
||||
requests,
|
||||
expected_custody_columns: _,
|
||||
column_peers,
|
||||
} => {
|
||||
for (request, columns) in failed_column_requests.into_iter() {
|
||||
requests.insert(request, ByRangeRequest::Active(request));
|
||||
column_peers.insert(request, columns);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
_ => Err("not a column request".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_blocks(
|
||||
&mut self,
|
||||
req_id: BlocksByRangeRequestId,
|
||||
@@ -105,12 +143,15 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn responses(&self, spec: &ChainSpec) -> Option<Result<Vec<RpcBlock<E>>, String>> {
|
||||
pub fn responses(
|
||||
&mut self,
|
||||
spec: &ChainSpec,
|
||||
) -> Option<Result<Vec<RpcBlock<E>>, CouplingError>> {
|
||||
let Some(blocks) = self.blocks_request.to_finished() else {
|
||||
return None;
|
||||
};
|
||||
|
||||
match &self.block_data_request {
|
||||
match &mut self.block_data_request {
|
||||
RangeBlockDataRequest::NoData => {
|
||||
Some(Self::responses_with_blobs(blocks.to_vec(), vec![], spec))
|
||||
}
|
||||
@@ -127,8 +168,10 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
RangeBlockDataRequest::DataColumns {
|
||||
requests,
|
||||
expected_custody_columns,
|
||||
column_peers,
|
||||
} => {
|
||||
let mut data_columns = vec![];
|
||||
let mut column_to_peer_id: HashMap<u64, PeerId> = HashMap::new();
|
||||
for req in requests.values() {
|
||||
let Some(data) = req.to_finished() else {
|
||||
return None;
|
||||
@@ -136,12 +179,33 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
data_columns.extend(data.clone())
|
||||
}
|
||||
|
||||
Some(Self::responses_with_custody_columns(
|
||||
// Note: this assumes that only 1 peer is responsible for a column
|
||||
// with a batch.
|
||||
for (id, columns) in column_peers {
|
||||
for column in columns {
|
||||
column_to_peer_id.insert(*column, id.peer);
|
||||
}
|
||||
}
|
||||
|
||||
let resp = Self::responses_with_custody_columns(
|
||||
blocks.to_vec(),
|
||||
data_columns,
|
||||
column_to_peer_id,
|
||||
expected_custody_columns,
|
||||
spec,
|
||||
))
|
||||
);
|
||||
|
||||
if let Err(err) = &resp {
|
||||
if let Some((peers, _)) = &err.column_and_peer {
|
||||
for (_, peer) in peers.iter() {
|
||||
// find the req id associated with the peer and
|
||||
// delete it from the entries
|
||||
requests.retain(|&k, _| k.peer != *peer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(resp)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -150,7 +214,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||
blobs: Vec<Arc<BlobSidecar<E>>>,
|
||||
spec: &ChainSpec,
|
||||
) -> Result<Vec<RpcBlock<E>>, String> {
|
||||
) -> Result<Vec<RpcBlock<E>>, CouplingError> {
|
||||
// There can't be more more blobs than blocks. i.e. sending any blob (empty
|
||||
// included) for a skipped slot is not permitted.
|
||||
let mut responses = Vec::with_capacity(blocks.len());
|
||||
@@ -165,17 +229,26 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
.unwrap_or(false);
|
||||
pair_next_blob
|
||||
} {
|
||||
blob_list.push(blob_iter.next().ok_or("Missing next blob".to_string())?);
|
||||
blob_list.push(blob_iter.next().ok_or_else(|| CouplingError {
|
||||
msg: "Missing next blob".to_string(),
|
||||
column_and_peer: None,
|
||||
})?);
|
||||
}
|
||||
|
||||
let mut blobs_buffer = vec![None; max_blobs_per_block];
|
||||
for blob in blob_list {
|
||||
let blob_index = blob.index as usize;
|
||||
let Some(blob_opt) = blobs_buffer.get_mut(blob_index) else {
|
||||
return Err("Invalid blob index".to_string());
|
||||
return Err(CouplingError {
|
||||
msg: "Invalid blob index".to_string(),
|
||||
column_and_peer: None,
|
||||
});
|
||||
};
|
||||
if blob_opt.is_some() {
|
||||
return Err("Repeat blob index".to_string());
|
||||
return Err(CouplingError {
|
||||
msg: "Repeat blob index".to_string(),
|
||||
column_and_peer: None,
|
||||
});
|
||||
} else {
|
||||
*blob_opt = Some(blob);
|
||||
}
|
||||
@@ -184,13 +257,24 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
blobs_buffer.into_iter().flatten().collect::<Vec<_>>(),
|
||||
max_blobs_per_block,
|
||||
)
|
||||
.map_err(|_| "Blobs returned exceeds max length".to_string())?;
|
||||
responses.push(RpcBlock::new(None, block, Some(blobs)).map_err(|e| format!("{e:?}"))?)
|
||||
.map_err(|_| CouplingError {
|
||||
msg: "Blobs returned exceeds max length".to_string(),
|
||||
column_and_peer: None,
|
||||
})?;
|
||||
responses.push(
|
||||
RpcBlock::new(None, block, Some(blobs)).map_err(|e| CouplingError {
|
||||
msg: format!("{e:?}"),
|
||||
column_and_peer: None,
|
||||
})?,
|
||||
)
|
||||
}
|
||||
|
||||
// if accumulated sidecars is not empty, throw an error.
|
||||
if blob_iter.next().is_some() {
|
||||
return Err("Received sidecars that don't pair well".to_string());
|
||||
return Err(CouplingError {
|
||||
msg: "Received sidecars that don't pair well".to_string(),
|
||||
column_and_peer: None,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(responses)
|
||||
@@ -199,9 +283,10 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
fn responses_with_custody_columns(
|
||||
blocks: Vec<Arc<SignedBeaconBlock<E>>>,
|
||||
data_columns: DataColumnSidecarList<E>,
|
||||
column_to_peer: HashMap<u64, PeerId>,
|
||||
expects_custody_columns: &[ColumnIndex],
|
||||
spec: &ChainSpec,
|
||||
) -> Result<Vec<RpcBlock<E>>, String> {
|
||||
) -> Result<Vec<RpcBlock<E>>, CouplingError> {
|
||||
// Group data columns by block_root and index
|
||||
let mut data_columns_by_block =
|
||||
HashMap::<Hash256, HashMap<ColumnIndex, Arc<DataColumnSidecar<E>>>>::new();
|
||||
@@ -215,9 +300,10 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
.insert(index, column)
|
||||
.is_some()
|
||||
{
|
||||
return Err(format!(
|
||||
"Repeated column block_root {block_root:?} index {index}"
|
||||
));
|
||||
return Err(CouplingError {
|
||||
msg: format!("Repeated column block_root {block_root:?} index {index}"),
|
||||
column_and_peer: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,30 +321,61 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
// TODO(das): on the initial version of PeerDAS the beacon chain does not check
|
||||
// rpc custody requirements and dropping this check can allow the block to have
|
||||
// an inconsistent DB.
|
||||
return Err(format!("No columns for block {block_root:?} with data"));
|
||||
|
||||
// For now, we always assume that the block peer is right.
|
||||
// This is potentially dangerous as we can get isolated on a chain with a
|
||||
// malicious block peer.
|
||||
// TODO: fix this by checking the proposer signature before downloading columns.
|
||||
let responsible_peers = column_to_peer.iter().map(|c| (*c.0, *c.1)).collect();
|
||||
return Err(CouplingError {
|
||||
msg: format!("No columns for block {block_root:?} with data"),
|
||||
column_and_peer: Some((responsible_peers, PeerAction::LowToleranceError)),
|
||||
});
|
||||
};
|
||||
|
||||
let mut custody_columns = vec![];
|
||||
let mut naughty_peers = vec![];
|
||||
for index in expects_custody_columns {
|
||||
let Some(data_column) = data_columns_by_index.remove(index) else {
|
||||
return Err(format!("No column for block {block_root:?} index {index}"));
|
||||
};
|
||||
// Safe to convert to `CustodyDataColumn`: we have asserted that the index of
|
||||
// this column is in the set of `expects_custody_columns` and with the expected
|
||||
// block root, so for the expected epoch of this batch.
|
||||
custody_columns.push(CustodyDataColumn::from_asserted_custody(data_column));
|
||||
if let Some(data_column) = data_columns_by_index.remove(index) {
|
||||
// Safe to convert to `CustodyDataColumn`: we have asserted that the index of
|
||||
// this column is in the set of `expects_custody_columns` and with the expected
|
||||
// block root, so for the expected epoch of this batch.
|
||||
custody_columns.push(CustodyDataColumn::from_asserted_custody(data_column));
|
||||
} else {
|
||||
// Penalize the peer for claiming to have the columns but not returning
|
||||
// them
|
||||
let Some(responsible_peer) = column_to_peer.get(index) else {
|
||||
return Err(CouplingError {
|
||||
msg: format!("Internal error, no request made for column {}", index),
|
||||
column_and_peer: None,
|
||||
});
|
||||
};
|
||||
naughty_peers.push((*index, *responsible_peer));
|
||||
}
|
||||
}
|
||||
if !naughty_peers.is_empty() {
|
||||
return Err(CouplingError {
|
||||
msg: format!("Peers did not return column for block_root {block_root:?} {naughty_peers:?}"),
|
||||
column_and_peer: Some((naughty_peers, PeerAction::LowToleranceError)),
|
||||
});
|
||||
}
|
||||
|
||||
// Assert that there are no columns left
|
||||
if !data_columns_by_index.is_empty() {
|
||||
let remaining_indices = data_columns_by_index.keys().collect::<Vec<_>>();
|
||||
return Err(format!(
|
||||
"Not all columns consumed for block {block_root:?}: {remaining_indices:?}"
|
||||
));
|
||||
// log the error but don't return an error, we can still progress with extra columns.
|
||||
tracing::error!(
|
||||
?block_root,
|
||||
?remaining_indices,
|
||||
"Not all columns consumed for block"
|
||||
);
|
||||
}
|
||||
|
||||
RpcBlock::new_with_custody_columns(Some(block_root), block, custody_columns, spec)
|
||||
.map_err(|e| format!("{e:?}"))?
|
||||
.map_err(|e| CouplingError {
|
||||
msg: format!("{:?}", e),
|
||||
column_and_peer: None,
|
||||
})?
|
||||
} else {
|
||||
// Block has no data, expects zero columns
|
||||
RpcBlock::new_without_blobs(Some(block_root), block)
|
||||
@@ -268,7 +385,9 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
|
||||
// Assert that there are no columns left for other blocks
|
||||
if !data_columns_by_block.is_empty() {
|
||||
let remaining_roots = data_columns_by_block.keys().collect::<Vec<_>>();
|
||||
return Err(format!("Not all columns consumed: {remaining_roots:?}"));
|
||||
// log the error but don't return an error, we can still progress with responses.
|
||||
// this is most likely an internal error with overrequesting or a client bug.
|
||||
tracing::error!(?remaining_roots, "Not all columns consumed for block");
|
||||
}
|
||||
|
||||
Ok(rpc_blocks)
|
||||
@@ -303,9 +422,12 @@ mod tests {
|
||||
use beacon_chain::test_utils::{
|
||||
generate_rand_block_and_blobs, generate_rand_block_and_data_columns, test_spec, NumBlobs,
|
||||
};
|
||||
use lighthouse_network::service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
||||
DataColumnsByRangeRequestId, Id, RangeRequestId,
|
||||
use lighthouse_network::{
|
||||
service::api_types::{
|
||||
BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
|
||||
DataColumnsByRangeRequestId, Id, RangeRequestId,
|
||||
},
|
||||
PeerId,
|
||||
};
|
||||
use rand::SeedableRng;
|
||||
use std::sync::Arc;
|
||||
@@ -342,10 +464,11 @@ mod tests {
|
||||
DataColumnsByRangeRequestId {
|
||||
id,
|
||||
parent_request_id,
|
||||
peer: PeerId::random(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_finished(info: &RangeBlockComponentsRequest<E>) -> bool {
|
||||
fn is_finished(info: &mut RangeBlockComponentsRequest<E>) -> bool {
|
||||
let spec = test_spec::<E>();
|
||||
info.responses(&spec).is_some()
|
||||
}
|
||||
@@ -428,7 +551,7 @@ mod tests {
|
||||
let columns_req_id = expects_custody_columns
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, _)| columns_id(i as Id, components_id))
|
||||
.map(|(i, column)| (columns_id(i as Id, components_id), vec![*column]))
|
||||
.collect::<Vec<_>>();
|
||||
let mut info = RangeBlockComponentsRequest::<E>::new(
|
||||
blocks_req_id,
|
||||
@@ -442,12 +565,13 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
// Assert response is not finished
|
||||
assert!(!is_finished(&info));
|
||||
assert!(!is_finished(&mut info));
|
||||
|
||||
// Send data columns
|
||||
for (i, &column_index) in expects_custody_columns.iter().enumerate() {
|
||||
let (req, _columns) = columns_req_id.get(i).unwrap();
|
||||
info.add_custody_columns(
|
||||
columns_req_id.get(i).copied().unwrap(),
|
||||
*req,
|
||||
blocks
|
||||
.iter()
|
||||
.flat_map(|b| b.1.iter().filter(|d| d.index == column_index).cloned())
|
||||
@@ -457,7 +581,7 @@ mod tests {
|
||||
|
||||
if i < expects_custody_columns.len() - 1 {
|
||||
assert!(
|
||||
!is_finished(&info),
|
||||
!is_finished(&mut info),
|
||||
"requested should not be finished at loop {i}"
|
||||
);
|
||||
}
|
||||
@@ -485,7 +609,7 @@ mod tests {
|
||||
let columns_req_id = batched_column_requests
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, _)| columns_id(i as Id, components_id))
|
||||
.map(|(i, columns)| (columns_id(i as Id, components_id), columns.clone()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut info = RangeBlockComponentsRequest::<E>::new(
|
||||
@@ -513,12 +637,13 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
// Assert response is not finished
|
||||
assert!(!is_finished(&info));
|
||||
assert!(!is_finished(&mut info));
|
||||
|
||||
for (i, column_indices) in batched_column_requests.iter().enumerate() {
|
||||
let (req, _columns) = columns_req_id.get(i).unwrap();
|
||||
// Send the set of columns in the same batch request
|
||||
info.add_custody_columns(
|
||||
columns_req_id.get(i).copied().unwrap(),
|
||||
*req,
|
||||
blocks
|
||||
.iter()
|
||||
.flat_map(|b| {
|
||||
@@ -532,7 +657,7 @@ mod tests {
|
||||
|
||||
if i < num_of_data_column_requests - 1 {
|
||||
assert!(
|
||||
!is_finished(&info),
|
||||
!is_finished(&mut info),
|
||||
"requested should not be finished at loop {i}"
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user