mirror of
https://github.com/sigp/lighthouse.git
synced 2026-06-30 11:24:31 +00:00
Fix and improve handling of empty columns after getBlobs response (#9361)
This PR fixes two issues: 1. This condition is inverted:dfb259171a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs (L1507-L1508)We are supposed to filter out incomplete columns when we DON'T have local blobs yet! 2. When the EL returns no blobs, we never store a partial in the assembler, and this code fails to publish our need to the network, as no partials are returned:dfb259171a/beacon_node/network/src/network_beacon_processor/mod.rs (L1038-L1050)The simple fix for 1 would be to invert the condition, but we can improve the flow here: Instead of not publishing anything, we can publish what we got, but not request anything. This ties into the fix for 2: After get blobs completes, we not only publish anything in the partial assembler, but also for every missing custody column in there, publish an empty column and a request for all cells. In particular: - When sending a partial message to `network`, allow specifying a request bitmap instead of hardcoding an all-ones bitmap. - For clarity and to prepare for Gloas integration, add a `PubsubPartialMessage` enum with a `DataColumnFulu` variant. - On republishing after merging a gossip column: always publish, but only request cells if local blobs are known or get blobs is disabled. This also prepares us to request only *some* cells, e.g. in cases where we are aware of the blobs that the EL is going to send us, e.g. via `engine_hasBlobs`. - Move guards in `fetch_engine_blobs_and_publish` to ensure everything works fine if there are no blobs or if get_blobs is disabled. Co-Authored-By: Daniel Knopik <daniel@dknopik.de>
This commit is contained in:
@@ -33,7 +33,7 @@ use beacon_chain::{
|
||||
use beacon_processor::{Work, WorkEvent};
|
||||
use lighthouse_network::{
|
||||
Client, GossipTopic, MessageAcceptance, MessageId, PeerAction, PeerId, PubsubMessage,
|
||||
ReportSource,
|
||||
PubsubPartialMessage, ReportSource,
|
||||
};
|
||||
use logging::crit;
|
||||
use operation_pool::ReceivedPreCapella;
|
||||
@@ -937,9 +937,15 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
Ok(mut column) => {
|
||||
let header = column.sidecar.header.take();
|
||||
if let Some(header) = header {
|
||||
// Requesting cells is irrelevant as all cells are available, simply clone
|
||||
// the `cells_present_bitmap`.
|
||||
let request_cells = column.sidecar.cells_present_bitmap.clone();
|
||||
self.send_network_message(NetworkMessage::PublishPartialColumns {
|
||||
columns: vec![Arc::new(column)],
|
||||
header: Arc::new(header),
|
||||
messages: vec![PubsubPartialMessage::DataColumnFulu {
|
||||
column: Arc::new(column),
|
||||
request_cells,
|
||||
header: Arc::new(header),
|
||||
}],
|
||||
});
|
||||
} else {
|
||||
crit!("Converting from full to partial yielded headerless partial")
|
||||
@@ -1077,8 +1083,10 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
debug!(block = %block_root, "Triggering getBlobs after receiving partial header");
|
||||
// We want to publish immediately when this finishes
|
||||
let publish_blobs = true;
|
||||
self.fetch_engine_blobs_and_publish(header.into_header(), block_root, publish_blobs)
|
||||
.await
|
||||
let header = header.into_header();
|
||||
self.fetch_engine_blobs_and_publish_full(header.clone(), block_root, publish_blobs)
|
||||
.await;
|
||||
self.publish_partial_data_columns(header, block_root).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1311,28 +1319,31 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
});
|
||||
}
|
||||
|
||||
let only_send_completed_partials =
|
||||
merge_result.local_blobs || self.chain.config.disable_get_blobs;
|
||||
let columns = merge_result
|
||||
.updated_partials
|
||||
.into_iter()
|
||||
.map(|partial| partial.into_inner())
|
||||
.filter(|partial| {
|
||||
!only_send_completed_partials || partial.sidecar.is_complete()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if !columns.is_empty() {
|
||||
if only_send_completed_partials {
|
||||
debug!(
|
||||
block = %block_root,
|
||||
"Not publishing incomplete partials before getBlobs"
|
||||
);
|
||||
}
|
||||
self.send_network_message(NetworkMessage::PublishPartialColumns {
|
||||
columns,
|
||||
header: verified_header.into_header(),
|
||||
});
|
||||
if !merge_result.updated_partials.is_empty() {
|
||||
let header = verified_header.into_header();
|
||||
let messages = merge_result
|
||||
.updated_partials
|
||||
.into_iter()
|
||||
.map(|partial| {
|
||||
let column = partial.into_inner();
|
||||
let present_cells = &column.sidecar.cells_present_bitmap;
|
||||
let request_cells = if merge_result.local_blobs {
|
||||
// Request all cells that are not available locally.
|
||||
let mut all_one = present_cells.clone_zeroed();
|
||||
all_one.not_inplace();
|
||||
all_one
|
||||
} else {
|
||||
// Do not request cells if we don't know the local blobs yet.
|
||||
present_cells.clone_zeroed()
|
||||
};
|
||||
PubsubPartialMessage::DataColumnFulu {
|
||||
column,
|
||||
request_cells,
|
||||
header: header.clone(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
self.send_network_message(NetworkMessage::PublishPartialColumns { messages });
|
||||
}
|
||||
Ok(avail)
|
||||
}
|
||||
@@ -1803,8 +1814,16 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
self.executor.spawn(
|
||||
async move {
|
||||
if let Ok(header) = PartialDataColumnHeader::try_from(block_clone.as_ref()) {
|
||||
let header = Arc::new(header);
|
||||
self_clone
|
||||
.fetch_engine_blobs_and_publish(Arc::new(header), block_root, publish_blobs)
|
||||
.fetch_engine_blobs_and_publish_full(
|
||||
header.clone(),
|
||||
block_root,
|
||||
publish_blobs,
|
||||
)
|
||||
.await;
|
||||
self_clone
|
||||
.publish_partial_data_columns(header, block_root)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,9 +22,13 @@ use lighthouse_network::rpc::methods::{
|
||||
use lighthouse_network::service::api_types::CustodyBackfillBatchId;
|
||||
use lighthouse_network::{
|
||||
Client, GossipTopic, MessageId, NetworkConfig, NetworkGlobals, PeerId, PubsubMessage,
|
||||
PubsubPartialMessage,
|
||||
rpc::{BlocksByRangeRequest, BlocksByRootRequest, LightClientBootstrapRequest, StatusMessage},
|
||||
};
|
||||
use logging::crit;
|
||||
use rand::prelude::SliceRandom;
|
||||
use ssz_types::VariableList;
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@@ -907,7 +911,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
});
|
||||
}
|
||||
|
||||
pub async fn fetch_engine_blobs_and_publish(
|
||||
pub async fn fetch_engine_blobs_and_publish_full(
|
||||
self: &Arc<Self>,
|
||||
header: Arc<PartialDataColumnHeader<T::EthSpec>>,
|
||||
block_root: Hash256,
|
||||
@@ -931,7 +935,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
match fetch_and_process_engine_blobs(
|
||||
self.chain.clone(),
|
||||
block_root,
|
||||
header.clone(),
|
||||
header,
|
||||
custody_columns,
|
||||
publish_fn,
|
||||
)
|
||||
@@ -975,44 +979,108 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Publish partial columns without eager send
|
||||
// TODO(gloas): implement publish partial columns without eager send
|
||||
if let Some(assembler) = self.chain.data_availability_checker.partial_assembler() {
|
||||
let columns = assembler.get_columns_and_mark_as_local_fetched(block_root, &header);
|
||||
pub async fn publish_partial_data_columns(
|
||||
self: &Arc<Self>,
|
||||
header: Arc<PartialDataColumnHeader<T::EthSpec>>,
|
||||
block_root: Hash256,
|
||||
) {
|
||||
if header.kzg_commitments.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO(gloas): implement publish partial columns
|
||||
let Some(assembler) = self.chain.data_availability_checker.partial_assembler() else {
|
||||
// Partials are disabled.
|
||||
return;
|
||||
};
|
||||
let epoch = header.slot().epoch(T::EthSpec::slots_per_epoch());
|
||||
let custody_columns = self.chain.sampling_columns_for_epoch(epoch);
|
||||
let columns = assembler.get_columns_and_mark_as_local_fetched(block_root, &header);
|
||||
|
||||
let mut present_indices: HashSet<ColumnIndex> = HashSet::with_capacity(columns.len());
|
||||
let mut messages: Vec<PubsubPartialMessage<T::EthSpec>> = Vec::with_capacity(columns.len());
|
||||
for column in columns {
|
||||
// Republish both complete and incomplete columns as partials
|
||||
let columns: Vec<_> = columns
|
||||
.into_iter()
|
||||
.filter_map(|column| match column {
|
||||
AssemblyColumn::Incomplete(partial) => Some(partial.into_inner()),
|
||||
AssemblyColumn::Complete(full) => {
|
||||
let DataColumnSidecar::Fulu(fulu) = full.as_data_column() else {
|
||||
return None;
|
||||
};
|
||||
match fulu.to_partial() {
|
||||
Ok(partial) => Some(Arc::new(partial)),
|
||||
Err(err) => {
|
||||
error!(
|
||||
%block_root,
|
||||
column_index = %full.index(),
|
||||
?err,
|
||||
"Failed to convert complete column to partial for re-seeding"
|
||||
);
|
||||
None
|
||||
}
|
||||
let partial_column = match column {
|
||||
AssemblyColumn::Incomplete(partial) => partial.into_inner(),
|
||||
AssemblyColumn::Complete(full) => {
|
||||
let DataColumnSidecar::Fulu(fulu) = full.as_data_column() else {
|
||||
continue;
|
||||
};
|
||||
match fulu.to_partial() {
|
||||
Ok(partial) => Arc::new(partial),
|
||||
Err(err) => {
|
||||
error!(
|
||||
%block_root,
|
||||
column_index = %full.index(),
|
||||
?err,
|
||||
"Failed to convert complete column to partial for re-seeding"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if !columns.is_empty() {
|
||||
debug!(block = %block_root, "Publishing all partials after getBlobs");
|
||||
self.send_network_message(NetworkMessage::PublishPartialColumns {
|
||||
columns,
|
||||
header,
|
||||
});
|
||||
} else {
|
||||
debug!(block = %block_root, "No partials to publish after getBlobs");
|
||||
}
|
||||
};
|
||||
|
||||
present_indices.insert(partial_column.index);
|
||||
let mut request_cells = partial_column.sidecar.cells_present_bitmap.clone_zeroed();
|
||||
request_cells.not_inplace();
|
||||
messages.push(PubsubPartialMessage::DataColumnFulu {
|
||||
column: partial_column,
|
||||
request_cells,
|
||||
header: header.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
// For each custody column without any local partial, send an empty placeholder
|
||||
// that requests all cells.
|
||||
let num_cells = header.kzg_commitments.len();
|
||||
for col_idx in custody_columns {
|
||||
if present_indices.contains(col_idx) {
|
||||
continue;
|
||||
}
|
||||
// `kzg_commitments.len()` is bounded by `MaxBlobCommitmentsPerBlock`, so the
|
||||
// bitmap constructor is infallible.
|
||||
let Ok(cells_present_bitmap) = CellBitmap::<T::EthSpec>::with_capacity(num_cells)
|
||||
else {
|
||||
crit!(
|
||||
%block_root,
|
||||
num_cells,
|
||||
column_index = %col_idx,
|
||||
"CellBitmap construction failed despite being bounded by MaxBlobCommitmentsPerBlock"
|
||||
);
|
||||
continue;
|
||||
};
|
||||
let request_cells = cells_present_bitmap.not();
|
||||
messages.push(PubsubPartialMessage::DataColumnFulu {
|
||||
column: Arc::new(PartialDataColumn {
|
||||
block_root,
|
||||
index: *col_idx,
|
||||
sidecar: PartialDataColumnSidecar {
|
||||
cells_present_bitmap,
|
||||
column: VariableList::empty(),
|
||||
kzg_proofs: VariableList::empty(),
|
||||
header: None.into(),
|
||||
},
|
||||
}),
|
||||
request_cells,
|
||||
header: header.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
if !messages.is_empty() {
|
||||
debug!(
|
||||
block = %block_root,
|
||||
count = messages.len(),
|
||||
"Publishing all partials"
|
||||
);
|
||||
self.send_network_message(NetworkMessage::PublishPartialColumns { messages });
|
||||
} else {
|
||||
// This should not happen, as any custody columns will have at least an empty
|
||||
// partial published.
|
||||
warn!(block = %block_root, "No partials to publish");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -222,7 +222,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
|
||||
// to be sent from the peers if we already have them.
|
||||
if let Ok(header) = signed_beacon_block.as_ref().try_into() {
|
||||
let publish_blobs = false;
|
||||
self.fetch_engine_blobs_and_publish(
|
||||
self.fetch_engine_blobs_and_publish_full(
|
||||
Arc::new(header),
|
||||
block_root,
|
||||
publish_blobs,
|
||||
|
||||
@@ -19,7 +19,7 @@ use lighthouse_network::rpc::methods::RpcResponse;
|
||||
use lighthouse_network::service::Network;
|
||||
use lighthouse_network::types::GossipKind;
|
||||
use lighthouse_network::{
|
||||
Context, PeerAction, PubsubMessage, ReportSource, Response, Subnet,
|
||||
Context, PeerAction, PubsubMessage, PubsubPartialMessage, ReportSource, Response, Subnet,
|
||||
rpc::{GoodbyeReason, RpcErrorResponse},
|
||||
};
|
||||
use lighthouse_network::{MessageAcceptance, prometheus_client::registry::Registry};
|
||||
@@ -39,8 +39,8 @@ use tokio::time::Sleep;
|
||||
use tracing::{debug, error, info, trace, warn};
|
||||
use typenum::Unsigned;
|
||||
use types::{
|
||||
EthSpec, ForkContext, PartialDataColumn, PartialDataColumnHeader, Slot, SubnetId,
|
||||
SyncCommitteeSubscription, SyncSubnetId, ValidatorSubscription,
|
||||
EthSpec, ForkContext, Slot, SubnetId, SyncCommitteeSubscription, SyncSubnetId,
|
||||
ValidatorSubscription,
|
||||
};
|
||||
|
||||
mod tests;
|
||||
@@ -85,8 +85,7 @@ pub enum NetworkMessage<E: EthSpec> {
|
||||
Publish { messages: Vec<PubsubMessage<E>> },
|
||||
/// Publish partial data column sidecars via the partial gossipsub protocol.
|
||||
PublishPartialColumns {
|
||||
columns: Vec<Arc<PartialDataColumn<E>>>,
|
||||
header: Arc<PartialDataColumnHeader<E>>,
|
||||
messages: Vec<PubsubPartialMessage<E>>,
|
||||
},
|
||||
/// Validates a received gossipsub message. This will propagate the message on the network.
|
||||
ValidationResult {
|
||||
@@ -683,8 +682,8 @@ impl<T: BeaconChainTypes> NetworkService<T> {
|
||||
);
|
||||
self.libp2p.publish(messages);
|
||||
}
|
||||
NetworkMessage::PublishPartialColumns { columns, header } => {
|
||||
self.libp2p.publish_partial(columns, header);
|
||||
NetworkMessage::PublishPartialColumns { messages } => {
|
||||
self.libp2p.publish_partial(messages);
|
||||
}
|
||||
NetworkMessage::ReportPeer {
|
||||
peer_id,
|
||||
|
||||
Reference in New Issue
Block a user