Fix and improve handling of empty columns after getBlobs response (#9361)

This PR fixes two issues:

1. This condition is inverted: dfb259171a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs (L1507-L1508)
We are supposed to filter out incomplete columns when we DON'T have local blobs yet!
2. When the EL returns no blobs, we never store a partial in the assembler, and this code fails to publish our need to the network, as no partials are returned: dfb259171a/beacon_node/network/src/network_beacon_processor/mod.rs (L1038-L1050)


  The simple fix for 1 would be to invert the condition, but we can improve the flow here: Instead of not publishing anything, we can publish what we got, but not request anything. This ties into the fix for 2: After get blobs completes, we not only publish anything in the partial assembler, but also for every missing custody column in there, publish an empty column and a request for all cells.

In particular:
- When sending a partial message to `network`, allow specifying a request bitmap instead of hardcoding an all-ones bitmap.
- For clarity and to prepare for Gloas integration, add a `PubsubPartialMessage` enum with a `DataColumnFulu` variant.
- On republishing after merging a gossip column: always publish, but only request cells if local blobs are known or get blobs is disabled. This also prepares us to request only *some* cells, e.g. in cases where we are aware of the blobs that the EL is going to send us, e.g. via `engine_hasBlobs`.
- Move guards in `fetch_engine_blobs_and_publish` to ensure everything works fine if there are no blobs or if get_blobs is disabled.


Co-Authored-By: Daniel Knopik <daniel@dknopik.de>
This commit is contained in:
Daniel Knopik
2026-06-19 02:50:24 +02:00
committed by GitHub
parent ddfc265123
commit 560f90611e
15 changed files with 313 additions and 148 deletions

View File

@@ -33,7 +33,7 @@ use beacon_chain::{
use beacon_processor::{Work, WorkEvent};
use lighthouse_network::{
Client, GossipTopic, MessageAcceptance, MessageId, PeerAction, PeerId, PubsubMessage,
ReportSource,
PubsubPartialMessage, ReportSource,
};
use logging::crit;
use operation_pool::ReceivedPreCapella;
@@ -937,9 +937,15 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
Ok(mut column) => {
let header = column.sidecar.header.take();
if let Some(header) = header {
// Requesting cells is irrelevant as all cells are available, simply clone
// the `cells_present_bitmap`.
let request_cells = column.sidecar.cells_present_bitmap.clone();
self.send_network_message(NetworkMessage::PublishPartialColumns {
columns: vec![Arc::new(column)],
header: Arc::new(header),
messages: vec![PubsubPartialMessage::DataColumnFulu {
column: Arc::new(column),
request_cells,
header: Arc::new(header),
}],
});
} else {
crit!("Converting from full to partial yielded headerless partial")
@@ -1077,8 +1083,10 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
debug!(block = %block_root, "Triggering getBlobs after receiving partial header");
// We want to publish immediately when this finishes
let publish_blobs = true;
self.fetch_engine_blobs_and_publish(header.into_header(), block_root, publish_blobs)
.await
let header = header.into_header();
self.fetch_engine_blobs_and_publish_full(header.clone(), block_root, publish_blobs)
.await;
self.publish_partial_data_columns(header, block_root).await;
}
}
}
@@ -1311,28 +1319,31 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
});
}
let only_send_completed_partials =
merge_result.local_blobs || self.chain.config.disable_get_blobs;
let columns = merge_result
.updated_partials
.into_iter()
.map(|partial| partial.into_inner())
.filter(|partial| {
!only_send_completed_partials || partial.sidecar.is_complete()
})
.collect::<Vec<_>>();
if !columns.is_empty() {
if only_send_completed_partials {
debug!(
block = %block_root,
"Not publishing incomplete partials before getBlobs"
);
}
self.send_network_message(NetworkMessage::PublishPartialColumns {
columns,
header: verified_header.into_header(),
});
if !merge_result.updated_partials.is_empty() {
let header = verified_header.into_header();
let messages = merge_result
.updated_partials
.into_iter()
.map(|partial| {
let column = partial.into_inner();
let present_cells = &column.sidecar.cells_present_bitmap;
let request_cells = if merge_result.local_blobs {
// Request all cells that are not available locally.
let mut all_one = present_cells.clone_zeroed();
all_one.not_inplace();
all_one
} else {
// Do not request cells if we don't know the local blobs yet.
present_cells.clone_zeroed()
};
PubsubPartialMessage::DataColumnFulu {
column,
request_cells,
header: header.clone(),
}
})
.collect();
self.send_network_message(NetworkMessage::PublishPartialColumns { messages });
}
Ok(avail)
}
@@ -1803,8 +1814,16 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
self.executor.spawn(
async move {
if let Ok(header) = PartialDataColumnHeader::try_from(block_clone.as_ref()) {
let header = Arc::new(header);
self_clone
.fetch_engine_blobs_and_publish(Arc::new(header), block_root, publish_blobs)
.fetch_engine_blobs_and_publish_full(
header.clone(),
block_root,
publish_blobs,
)
.await;
self_clone
.publish_partial_data_columns(header, block_root)
.await
}
}

View File

@@ -22,9 +22,13 @@ use lighthouse_network::rpc::methods::{
use lighthouse_network::service::api_types::CustodyBackfillBatchId;
use lighthouse_network::{
Client, GossipTopic, MessageId, NetworkConfig, NetworkGlobals, PeerId, PubsubMessage,
PubsubPartialMessage,
rpc::{BlocksByRangeRequest, BlocksByRootRequest, LightClientBootstrapRequest, StatusMessage},
};
use logging::crit;
use rand::prelude::SliceRandom;
use ssz_types::VariableList;
use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
@@ -907,7 +911,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
});
}
pub async fn fetch_engine_blobs_and_publish(
pub async fn fetch_engine_blobs_and_publish_full(
self: &Arc<Self>,
header: Arc<PartialDataColumnHeader<T::EthSpec>>,
block_root: Hash256,
@@ -931,7 +935,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
match fetch_and_process_engine_blobs(
self.chain.clone(),
block_root,
header.clone(),
header,
custody_columns,
publish_fn,
)
@@ -975,44 +979,108 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
);
}
}
}
// Publish partial columns without eager send
// TODO(gloas): implement publish partial columns without eager send
if let Some(assembler) = self.chain.data_availability_checker.partial_assembler() {
let columns = assembler.get_columns_and_mark_as_local_fetched(block_root, &header);
pub async fn publish_partial_data_columns(
self: &Arc<Self>,
header: Arc<PartialDataColumnHeader<T::EthSpec>>,
block_root: Hash256,
) {
if header.kzg_commitments.is_empty() {
return;
}
// TODO(gloas): implement publish partial columns
let Some(assembler) = self.chain.data_availability_checker.partial_assembler() else {
// Partials are disabled.
return;
};
let epoch = header.slot().epoch(T::EthSpec::slots_per_epoch());
let custody_columns = self.chain.sampling_columns_for_epoch(epoch);
let columns = assembler.get_columns_and_mark_as_local_fetched(block_root, &header);
let mut present_indices: HashSet<ColumnIndex> = HashSet::with_capacity(columns.len());
let mut messages: Vec<PubsubPartialMessage<T::EthSpec>> = Vec::with_capacity(columns.len());
for column in columns {
// Republish both complete and incomplete columns as partials
let columns: Vec<_> = columns
.into_iter()
.filter_map(|column| match column {
AssemblyColumn::Incomplete(partial) => Some(partial.into_inner()),
AssemblyColumn::Complete(full) => {
let DataColumnSidecar::Fulu(fulu) = full.as_data_column() else {
return None;
};
match fulu.to_partial() {
Ok(partial) => Some(Arc::new(partial)),
Err(err) => {
error!(
%block_root,
column_index = %full.index(),
?err,
"Failed to convert complete column to partial for re-seeding"
);
None
}
let partial_column = match column {
AssemblyColumn::Incomplete(partial) => partial.into_inner(),
AssemblyColumn::Complete(full) => {
let DataColumnSidecar::Fulu(fulu) = full.as_data_column() else {
continue;
};
match fulu.to_partial() {
Ok(partial) => Arc::new(partial),
Err(err) => {
error!(
%block_root,
column_index = %full.index(),
?err,
"Failed to convert complete column to partial for re-seeding"
);
continue;
}
}
})
.collect();
if !columns.is_empty() {
debug!(block = %block_root, "Publishing all partials after getBlobs");
self.send_network_message(NetworkMessage::PublishPartialColumns {
columns,
header,
});
} else {
debug!(block = %block_root, "No partials to publish after getBlobs");
}
};
present_indices.insert(partial_column.index);
let mut request_cells = partial_column.sidecar.cells_present_bitmap.clone_zeroed();
request_cells.not_inplace();
messages.push(PubsubPartialMessage::DataColumnFulu {
column: partial_column,
request_cells,
header: header.clone(),
});
}
// For each custody column without any local partial, send an empty placeholder
// that requests all cells.
let num_cells = header.kzg_commitments.len();
for col_idx in custody_columns {
if present_indices.contains(col_idx) {
continue;
}
// `kzg_commitments.len()` is bounded by `MaxBlobCommitmentsPerBlock`, so the
// bitmap constructor is infallible.
let Ok(cells_present_bitmap) = CellBitmap::<T::EthSpec>::with_capacity(num_cells)
else {
crit!(
%block_root,
num_cells,
column_index = %col_idx,
"CellBitmap construction failed despite being bounded by MaxBlobCommitmentsPerBlock"
);
continue;
};
let request_cells = cells_present_bitmap.not();
messages.push(PubsubPartialMessage::DataColumnFulu {
column: Arc::new(PartialDataColumn {
block_root,
index: *col_idx,
sidecar: PartialDataColumnSidecar {
cells_present_bitmap,
column: VariableList::empty(),
kzg_proofs: VariableList::empty(),
header: None.into(),
},
}),
request_cells,
header: header.clone(),
});
}
if !messages.is_empty() {
debug!(
block = %block_root,
count = messages.len(),
"Publishing all partials"
);
self.send_network_message(NetworkMessage::PublishPartialColumns { messages });
} else {
// This should not happen, as any custody columns will have at least an empty
// partial published.
warn!(block = %block_root, "No partials to publish");
}
}

View File

@@ -222,7 +222,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
// to be sent from the peers if we already have them.
if let Ok(header) = signed_beacon_block.as_ref().try_into() {
let publish_blobs = false;
self.fetch_engine_blobs_and_publish(
self.fetch_engine_blobs_and_publish_full(
Arc::new(header),
block_root,
publish_blobs,

View File

@@ -19,7 +19,7 @@ use lighthouse_network::rpc::methods::RpcResponse;
use lighthouse_network::service::Network;
use lighthouse_network::types::GossipKind;
use lighthouse_network::{
Context, PeerAction, PubsubMessage, ReportSource, Response, Subnet,
Context, PeerAction, PubsubMessage, PubsubPartialMessage, ReportSource, Response, Subnet,
rpc::{GoodbyeReason, RpcErrorResponse},
};
use lighthouse_network::{MessageAcceptance, prometheus_client::registry::Registry};
@@ -39,8 +39,8 @@ use tokio::time::Sleep;
use tracing::{debug, error, info, trace, warn};
use typenum::Unsigned;
use types::{
EthSpec, ForkContext, PartialDataColumn, PartialDataColumnHeader, Slot, SubnetId,
SyncCommitteeSubscription, SyncSubnetId, ValidatorSubscription,
EthSpec, ForkContext, Slot, SubnetId, SyncCommitteeSubscription, SyncSubnetId,
ValidatorSubscription,
};
mod tests;
@@ -85,8 +85,7 @@ pub enum NetworkMessage<E: EthSpec> {
Publish { messages: Vec<PubsubMessage<E>> },
/// Publish partial data column sidecars via the partial gossipsub protocol.
PublishPartialColumns {
columns: Vec<Arc<PartialDataColumn<E>>>,
header: Arc<PartialDataColumnHeader<E>>,
messages: Vec<PubsubPartialMessage<E>>,
},
/// Validates a received gossipsub message. This will propagate the message on the network.
ValidationResult {
@@ -683,8 +682,8 @@ impl<T: BeaconChainTypes> NetworkService<T> {
);
self.libp2p.publish(messages);
}
NetworkMessage::PublishPartialColumns { columns, header } => {
self.libp2p.publish_partial(columns, header);
NetworkMessage::PublishPartialColumns { messages } => {
self.libp2p.publish_partial(messages);
}
NetworkMessage::ReportPeer {
peer_id,