Fix and improve handling of empty columns after getBlobs response (#9361)

This PR fixes two issues:

1. This condition is inverted: dfb259171a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs (L1507-L1508)
We are supposed to filter out incomplete columns when we DON'T have local blobs yet!
2. When the EL returns no blobs, we never store a partial in the assembler, and this code fails to publish our need to the network, as no partials are returned: dfb259171a/beacon_node/network/src/network_beacon_processor/mod.rs (L1038-L1050)


  The simple fix for 1 would be to invert the condition, but we can improve the flow here: Instead of not publishing anything, we can publish what we got, but not request anything. This ties into the fix for 2: After get blobs completes, we not only publish anything in the partial assembler, but also for every missing custody column in there, publish an empty column and a request for all cells.

In particular:
- When sending a partial message to `network`, allow specifying a request bitmap instead of hardcoding an all-ones bitmap.
- For clarity and to prepare for Gloas integration, add a `PubsubPartialMessage` enum with a `DataColumnFulu` variant.
- On republishing after merging a gossip column: always publish, but only request cells if local blobs are known or get blobs is disabled. This also prepares us to request only *some* cells, e.g. in cases where we are aware of the blobs that the EL is going to send us, e.g. via `engine_hasBlobs`.
- Move guards in `fetch_engine_blobs_and_publish` to ensure everything works fine if there are no blobs or if get_blobs is disabled.


Co-Authored-By: Daniel Knopik <daniel@dknopik.de>
This commit is contained in:
Daniel Knopik
2026-06-19 02:50:24 +02:00
committed by GitHub
parent ddfc265123
commit 560f90611e
15 changed files with 313 additions and 148 deletions

View File

@@ -98,8 +98,8 @@ impl std::fmt::Display for ClearDialError<'_> {
}
pub use crate::types::{
Enr, EnrSyncCommitteeBitfield, GossipTopic, NetworkGlobals, PubsubMessage, Subnet,
SubnetDiscovery, decode_partial,
Enr, EnrSyncCommitteeBitfield, GossipTopic, NetworkGlobals, PubsubMessage,
PubsubPartialMessage, Subnet, SubnetDiscovery, decode_partial,
};
pub use prometheus_client;

View File

@@ -20,7 +20,9 @@ use crate::types::{
SubnetDiscovery, all_topics_at_fork, core_topics_to_subscribe, is_fork_non_core_topic,
subnet_from_topic_hash,
};
use crate::{Enr, NetworkGlobals, PubsubMessage, TopicHash, decode_partial, metrics};
use crate::{
Enr, NetworkGlobals, PubsubMessage, PubsubPartialMessage, TopicHash, decode_partial, metrics,
};
use api_types::{AppRequestId, Response};
use futures::stream::StreamExt;
use gossipsub_scoring_parameters::{PeerScoreSettings, lighthouse_gossip_thresholds};
@@ -43,8 +45,9 @@ use std::sync::Arc;
use std::time::Duration;
use tracing::{debug, error, info, trace, warn};
use types::{
ChainSpec, DataColumnSubnetId, EnrForkId, EthSpec, ForkContext, ForkName, PartialDataColumn,
PartialDataColumnHeader, Slot, SubnetId, consts::altair::SYNC_COMMITTEE_SUBNET_COUNT,
CellBitmap, ChainSpec, DataColumnSubnetId, EnrForkId, EthSpec, ForkContext, ForkName,
PartialDataColumn, PartialDataColumnHeader, Slot, SubnetId,
consts::altair::SYNC_COMMITTEE_SUBNET_COUNT,
};
use utils::{Context as ServiceContext, build_transport, strip_peer_id};
@@ -920,65 +923,73 @@ impl<E: EthSpec> Network<E> {
}
/// Publishes partial data column sidecars to the gossipsub network.
pub fn publish_partial(
&mut self,
columns: Vec<Arc<PartialDataColumn<E>>>,
header: Arc<PartialDataColumnHeader<E>>,
) {
pub fn publish_partial(&mut self, messages: Vec<PubsubPartialMessage<E>>) {
if !self.network_globals.config.enable_partial_columns {
return;
}
debug!(
count = columns.len(),
"Sending partial data column sidecars"
);
debug!(count = messages.len(), "Sending partial messages");
for column in columns {
let subnet =
DataColumnSubnetId::from_column_index(column.index, &self.fork_context.spec);
let topic = GossipTopic::new(
GossipKind::DataColumnSidecar(subnet),
GossipEncoding::default(),
self.enr_fork_id.fork_digest,
);
let header_sent_set = self
.partial_column_header_tracker
.get_for_block(column.block_root);
let partial_message = OutgoingPartialColumn::new(column, &header, header_sent_set);
let publish_topic: Topic = topic.clone().into();
if let Err(e) = self
.gossipsub_mut()
.publish_partial(publish_topic, partial_message)
{
match e {
PublishError::NoPeersSubscribedToTopic => {
debug!(
kind = %topic.kind(),
"No peers supporting partial messages"
);
}
ref e => {
warn!(
error = ?e,
kind = %topic.kind(),
"Could not publish partial message"
);
}
}
// add to metrics
if let Some(v) = metrics::get_int_gauge(
&metrics::FAILED_PARTIAL_PUBLISHES_PER_MAIN_TOPIC,
&[&format!("{:?}", topic.kind())],
) {
v.inc()
};
for message in messages {
match message {
PubsubPartialMessage::DataColumnFulu {
column,
request_cells,
header,
} => self.publish_partial_data_column_fulu(column, request_cells, header),
}
}
}
fn publish_partial_data_column_fulu(
&mut self,
column: Arc<PartialDataColumn<E>>,
request_cells: CellBitmap<E>,
header: Arc<PartialDataColumnHeader<E>>,
) {
let subnet = DataColumnSubnetId::from_column_index(column.index, &self.fork_context.spec);
let topic = GossipTopic::new(
GossipKind::DataColumnSidecar(subnet),
GossipEncoding::default(),
self.enr_fork_id.fork_digest,
);
let header_sent_set = self
.partial_column_header_tracker
.get_for_block(column.block_root);
let partial_message =
OutgoingPartialColumn::new(column, &header, header_sent_set, request_cells);
let publish_topic: Topic = topic.clone().into();
if let Err(e) = self
.gossipsub_mut()
.publish_partial(publish_topic, partial_message)
{
match e {
PublishError::NoPeersSubscribedToTopic => {
debug!(
kind = %topic.kind(),
"No peers supporting partial messages"
);
}
ref e => {
warn!(
error = ?e,
kind = %topic.kind(),
"Could not publish partial message"
);
}
}
// add to metrics
if let Some(v) = metrics::get_int_gauge(
&metrics::FAILED_PARTIAL_PUBLISHES_PER_MAIN_TOPIC,
&[&format!("{:?}", topic.kind())],
) {
v.inc()
};
}
}
/// Informs the gossipsub about the result of a message validation.
/// If the message is valid it will get propagated by gossipsub.
pub fn report_message_validation_result(

View File

@@ -16,7 +16,7 @@ pub use eth2::lighthouse::sync_state::{BackFillState, CustodyBackFillState, Sync
pub use globals::NetworkGlobals;
pub use partial::HeaderSentSet;
pub use partial::OutgoingPartialColumn;
pub use pubsub::{PubsubMessage, SnappyTransform, decode_partial};
pub use pubsub::{PubsubMessage, PubsubPartialMessage, SnappyTransform, decode_partial};
pub use subnet::{Subnet, SubnetDiscovery};
pub use topics::{
GossipEncoding, GossipKind, GossipTopic, TopicConfig, all_topics_at_fork,

View File

@@ -9,7 +9,7 @@ use std::sync::Arc;
use tracing::{error, trace};
use types::core::{EthSpec, Hash256};
use types::data::{
PartialDataColumn, PartialDataColumnHeader, PartialDataColumnPartsMetadata,
CellBitmap, PartialDataColumn, PartialDataColumnHeader, PartialDataColumnPartsMetadata,
PartialDataColumnSidecar, PartialDataColumnSidecarRef,
};
@@ -30,10 +30,29 @@ impl<E: EthSpec> OutgoingPartialColumn<E> {
partial_column: Arc<PartialDataColumn<E>>,
header: &PartialDataColumnHeader<E>,
header_sent_set: HeaderSentSet,
requests: CellBitmap<E>,
) -> Self {
// For now, always request all cells
let mut requests = partial_column.sidecar.cells_present_bitmap.clone_zeroed();
requests.not_inplace();
// Always set the request bit for available cells.
//
// Gossipsub applys certain optimisations to avoid sending redundant messages. This
// requires that we stay consistent with our metadata. Gossipsub uses the `Metadata` trait
// impl below to determine whether it can perform these optimisations.
//
// If we request a cell and then receive it, un-setting the request bit in the next
// published message may cause issues:
// Gossipsub tries to avoid the impact of application race conditions by checking newly
// published metadata against previously published metadata. This no longer functions
// correctly if request bits are unset between calls, as Gossipsub will consider a message
// with new requests as new info to be propagated, possibly overwriting previous messages
// with more cells (but fewer request bits). This is because gossipsub will see that both
// metadata have some bits that are not set in the other metadata and therefore cannot
// decide which actually carries more data. By always setting request bits for available
// cells, we avoid this issue, as requests will never be unset between calls.
//
// In other words, gossipsub relies on the fact that metadata is additive. The request bit
// is, therefore, to be seen as a "request if not available" bit.
let requests = requests.union(&partial_column.sidecar.cells_present_bitmap);
let metadata = PartialDataColumnPartsMetadata::<E> {
available: partial_column.sidecar.cells_present_bitmap.clone(),
requests,
@@ -322,6 +341,14 @@ mod tests {
})
}
fn make_all_one_bitmap(len: usize) -> CellBitmap<E> {
let mut request_cells = CellBitmap::<E>::with_capacity(len).unwrap();
for idx in 0..request_cells.len() {
request_cells.set(idx, true).unwrap();
}
request_cells
}
fn random_peer_id() -> PeerId {
let keypair = Keypair::generate_ed25519();
PeerId::from(keypair.public())
@@ -422,7 +449,8 @@ mod tests {
let header = make_header(4);
let partial = make_partial_column(root, 4, &[0, 1]);
let header_sent_set: HeaderSentSet = Arc::new(Mutex::new(HashSet::new()));
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set);
let requests = make_all_one_bitmap(4);
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set, requests);
let peer = random_peer_id();
@@ -442,7 +470,8 @@ mod tests {
// We have cells [0, 2, 3]
let partial = make_partial_column(root, 4, &[0, 2, 3]);
let header_sent_set: HeaderSentSet = Arc::new(Mutex::new(HashSet::new()));
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set);
let requests = make_all_one_bitmap(4);
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set, requests);
let peer = random_peer_id();
@@ -474,7 +503,8 @@ mod tests {
// We have cells [0]
let partial = make_partial_column(root, 4, &[0]);
let header_sent_set: HeaderSentSet = Arc::new(Mutex::new(HashSet::new()));
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set);
let requests = make_all_one_bitmap(4);
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set, requests);
let peer = random_peer_id();

View File

@@ -7,10 +7,10 @@ use ssz::{Decode, Encode};
use std::io::{Error, ErrorKind};
use std::sync::Arc;
use types::{
AttesterSlashing, AttesterSlashingBase, AttesterSlashingElectra, DataColumnSidecar,
AttesterSlashing, AttesterSlashingBase, AttesterSlashingElectra, CellBitmap, DataColumnSidecar,
DataColumnSubnetId, EthSpec, ForkContext, ForkName, Hash256, LightClientFinalityUpdate,
LightClientOptimisticUpdate, PartialDataColumn, PartialDataColumnSidecar,
PayloadAttestationMessage, ProposerSlashing, SignedAggregateAndProof,
LightClientOptimisticUpdate, PartialDataColumn, PartialDataColumnHeader,
PartialDataColumnSidecar, PayloadAttestationMessage, ProposerSlashing, SignedAggregateAndProof,
SignedAggregateAndProofBase, SignedAggregateAndProofElectra, SignedBeaconBlock,
SignedBeaconBlockAltair, SignedBeaconBlockBase, SignedBeaconBlockBellatrix,
SignedBeaconBlockCapella, SignedBeaconBlockDeneb, SignedBeaconBlockElectra,
@@ -56,6 +56,24 @@ pub enum PubsubMessage<E: EthSpec> {
LightClientOptimisticUpdate(Box<LightClientOptimisticUpdate<E>>),
}
/// A message published via the partial gossipsub protocol.
#[derive(Debug, Clone)]
pub enum PubsubPartialMessage<E: EthSpec> {
/// A partial data column sidecar from the Fulu fork.
DataColumnFulu {
/// The column to publish. Libp2p will cache it and treat it as the data to send if any peer
/// asks for data within it.
column: Arc<PartialDataColumn<E>>,
/// The cells we are requesting. Usually, this will be all-ones, as we need all cells.
/// However, while get_blobs is still in progress, blobs we expect from the EL should not be
/// requested to conserve bandwidth.
request_cells: CellBitmap<E>,
/// The header associated with the column above. This is set separately here, as the column
/// to be published does not contain the header - it is stored without.
header: Arc<PartialDataColumnHeader<E>>,
},
}
// Implements the `DataTransform` trait of gossipsub to employ snappy compression
pub struct SnappyTransform {
/// Sets the maximum size we allow gossipsub messages to decompress to.