mirror of
https://github.com/sigp/lighthouse.git
synced 2026-06-29 19:04:27 +00:00
Fix and improve handling of empty columns after getBlobs response (#9361)
This PR fixes two issues: 1. This condition is inverted:dfb259171a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs (L1507-L1508)We are supposed to filter out incomplete columns when we DON'T have local blobs yet! 2. When the EL returns no blobs, we never store a partial in the assembler, and this code fails to publish our need to the network, as no partials are returned:dfb259171a/beacon_node/network/src/network_beacon_processor/mod.rs (L1038-L1050)The simple fix for 1 would be to invert the condition, but we can improve the flow here: Instead of not publishing anything, we can publish what we got, but not request anything. This ties into the fix for 2: After get blobs completes, we not only publish anything in the partial assembler, but also for every missing custody column in there, publish an empty column and a request for all cells. In particular: - When sending a partial message to `network`, allow specifying a request bitmap instead of hardcoding an all-ones bitmap. - For clarity and to prepare for Gloas integration, add a `PubsubPartialMessage` enum with a `DataColumnFulu` variant. - On republishing after merging a gossip column: always publish, but only request cells if local blobs are known or get blobs is disabled. This also prepares us to request only *some* cells, e.g. in cases where we are aware of the blobs that the EL is going to send us, e.g. via `engine_hasBlobs`. - Move guards in `fetch_engine_blobs_and_publish` to ensure everything works fine if there are no blobs or if get_blobs is disabled. Co-Authored-By: Daniel Knopik <daniel@dknopik.de>
This commit is contained in:
@@ -98,8 +98,8 @@ impl std::fmt::Display for ClearDialError<'_> {
|
||||
}
|
||||
|
||||
pub use crate::types::{
|
||||
Enr, EnrSyncCommitteeBitfield, GossipTopic, NetworkGlobals, PubsubMessage, Subnet,
|
||||
SubnetDiscovery, decode_partial,
|
||||
Enr, EnrSyncCommitteeBitfield, GossipTopic, NetworkGlobals, PubsubMessage,
|
||||
PubsubPartialMessage, Subnet, SubnetDiscovery, decode_partial,
|
||||
};
|
||||
|
||||
pub use prometheus_client;
|
||||
|
||||
@@ -20,7 +20,9 @@ use crate::types::{
|
||||
SubnetDiscovery, all_topics_at_fork, core_topics_to_subscribe, is_fork_non_core_topic,
|
||||
subnet_from_topic_hash,
|
||||
};
|
||||
use crate::{Enr, NetworkGlobals, PubsubMessage, TopicHash, decode_partial, metrics};
|
||||
use crate::{
|
||||
Enr, NetworkGlobals, PubsubMessage, PubsubPartialMessage, TopicHash, decode_partial, metrics,
|
||||
};
|
||||
use api_types::{AppRequestId, Response};
|
||||
use futures::stream::StreamExt;
|
||||
use gossipsub_scoring_parameters::{PeerScoreSettings, lighthouse_gossip_thresholds};
|
||||
@@ -43,8 +45,9 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tracing::{debug, error, info, trace, warn};
|
||||
use types::{
|
||||
ChainSpec, DataColumnSubnetId, EnrForkId, EthSpec, ForkContext, ForkName, PartialDataColumn,
|
||||
PartialDataColumnHeader, Slot, SubnetId, consts::altair::SYNC_COMMITTEE_SUBNET_COUNT,
|
||||
CellBitmap, ChainSpec, DataColumnSubnetId, EnrForkId, EthSpec, ForkContext, ForkName,
|
||||
PartialDataColumn, PartialDataColumnHeader, Slot, SubnetId,
|
||||
consts::altair::SYNC_COMMITTEE_SUBNET_COUNT,
|
||||
};
|
||||
use utils::{Context as ServiceContext, build_transport, strip_peer_id};
|
||||
|
||||
@@ -920,65 +923,73 @@ impl<E: EthSpec> Network<E> {
|
||||
}
|
||||
|
||||
/// Publishes partial data column sidecars to the gossipsub network.
|
||||
pub fn publish_partial(
|
||||
&mut self,
|
||||
columns: Vec<Arc<PartialDataColumn<E>>>,
|
||||
header: Arc<PartialDataColumnHeader<E>>,
|
||||
) {
|
||||
pub fn publish_partial(&mut self, messages: Vec<PubsubPartialMessage<E>>) {
|
||||
if !self.network_globals.config.enable_partial_columns {
|
||||
return;
|
||||
}
|
||||
|
||||
debug!(
|
||||
count = columns.len(),
|
||||
"Sending partial data column sidecars"
|
||||
);
|
||||
debug!(count = messages.len(), "Sending partial messages");
|
||||
|
||||
for column in columns {
|
||||
let subnet =
|
||||
DataColumnSubnetId::from_column_index(column.index, &self.fork_context.spec);
|
||||
let topic = GossipTopic::new(
|
||||
GossipKind::DataColumnSidecar(subnet),
|
||||
GossipEncoding::default(),
|
||||
self.enr_fork_id.fork_digest,
|
||||
);
|
||||
let header_sent_set = self
|
||||
.partial_column_header_tracker
|
||||
.get_for_block(column.block_root);
|
||||
let partial_message = OutgoingPartialColumn::new(column, &header, header_sent_set);
|
||||
let publish_topic: Topic = topic.clone().into();
|
||||
|
||||
if let Err(e) = self
|
||||
.gossipsub_mut()
|
||||
.publish_partial(publish_topic, partial_message)
|
||||
{
|
||||
match e {
|
||||
PublishError::NoPeersSubscribedToTopic => {
|
||||
debug!(
|
||||
kind = %topic.kind(),
|
||||
"No peers supporting partial messages"
|
||||
);
|
||||
}
|
||||
ref e => {
|
||||
warn!(
|
||||
error = ?e,
|
||||
kind = %topic.kind(),
|
||||
"Could not publish partial message"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// add to metrics
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::FAILED_PARTIAL_PUBLISHES_PER_MAIN_TOPIC,
|
||||
&[&format!("{:?}", topic.kind())],
|
||||
) {
|
||||
v.inc()
|
||||
};
|
||||
for message in messages {
|
||||
match message {
|
||||
PubsubPartialMessage::DataColumnFulu {
|
||||
column,
|
||||
request_cells,
|
||||
header,
|
||||
} => self.publish_partial_data_column_fulu(column, request_cells, header),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn publish_partial_data_column_fulu(
|
||||
&mut self,
|
||||
column: Arc<PartialDataColumn<E>>,
|
||||
request_cells: CellBitmap<E>,
|
||||
header: Arc<PartialDataColumnHeader<E>>,
|
||||
) {
|
||||
let subnet = DataColumnSubnetId::from_column_index(column.index, &self.fork_context.spec);
|
||||
let topic = GossipTopic::new(
|
||||
GossipKind::DataColumnSidecar(subnet),
|
||||
GossipEncoding::default(),
|
||||
self.enr_fork_id.fork_digest,
|
||||
);
|
||||
let header_sent_set = self
|
||||
.partial_column_header_tracker
|
||||
.get_for_block(column.block_root);
|
||||
let partial_message =
|
||||
OutgoingPartialColumn::new(column, &header, header_sent_set, request_cells);
|
||||
let publish_topic: Topic = topic.clone().into();
|
||||
|
||||
if let Err(e) = self
|
||||
.gossipsub_mut()
|
||||
.publish_partial(publish_topic, partial_message)
|
||||
{
|
||||
match e {
|
||||
PublishError::NoPeersSubscribedToTopic => {
|
||||
debug!(
|
||||
kind = %topic.kind(),
|
||||
"No peers supporting partial messages"
|
||||
);
|
||||
}
|
||||
ref e => {
|
||||
warn!(
|
||||
error = ?e,
|
||||
kind = %topic.kind(),
|
||||
"Could not publish partial message"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// add to metrics
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
&metrics::FAILED_PARTIAL_PUBLISHES_PER_MAIN_TOPIC,
|
||||
&[&format!("{:?}", topic.kind())],
|
||||
) {
|
||||
v.inc()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Informs the gossipsub about the result of a message validation.
|
||||
/// If the message is valid it will get propagated by gossipsub.
|
||||
pub fn report_message_validation_result(
|
||||
|
||||
@@ -16,7 +16,7 @@ pub use eth2::lighthouse::sync_state::{BackFillState, CustodyBackFillState, Sync
|
||||
pub use globals::NetworkGlobals;
|
||||
pub use partial::HeaderSentSet;
|
||||
pub use partial::OutgoingPartialColumn;
|
||||
pub use pubsub::{PubsubMessage, SnappyTransform, decode_partial};
|
||||
pub use pubsub::{PubsubMessage, PubsubPartialMessage, SnappyTransform, decode_partial};
|
||||
pub use subnet::{Subnet, SubnetDiscovery};
|
||||
pub use topics::{
|
||||
GossipEncoding, GossipKind, GossipTopic, TopicConfig, all_topics_at_fork,
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::sync::Arc;
|
||||
use tracing::{error, trace};
|
||||
use types::core::{EthSpec, Hash256};
|
||||
use types::data::{
|
||||
PartialDataColumn, PartialDataColumnHeader, PartialDataColumnPartsMetadata,
|
||||
CellBitmap, PartialDataColumn, PartialDataColumnHeader, PartialDataColumnPartsMetadata,
|
||||
PartialDataColumnSidecar, PartialDataColumnSidecarRef,
|
||||
};
|
||||
|
||||
@@ -30,10 +30,29 @@ impl<E: EthSpec> OutgoingPartialColumn<E> {
|
||||
partial_column: Arc<PartialDataColumn<E>>,
|
||||
header: &PartialDataColumnHeader<E>,
|
||||
header_sent_set: HeaderSentSet,
|
||||
requests: CellBitmap<E>,
|
||||
) -> Self {
|
||||
// For now, always request all cells
|
||||
let mut requests = partial_column.sidecar.cells_present_bitmap.clone_zeroed();
|
||||
requests.not_inplace();
|
||||
// Always set the request bit for available cells.
|
||||
//
|
||||
// Gossipsub applys certain optimisations to avoid sending redundant messages. This
|
||||
// requires that we stay consistent with our metadata. Gossipsub uses the `Metadata` trait
|
||||
// impl below to determine whether it can perform these optimisations.
|
||||
//
|
||||
// If we request a cell and then receive it, un-setting the request bit in the next
|
||||
// published message may cause issues:
|
||||
// Gossipsub tries to avoid the impact of application race conditions by checking newly
|
||||
// published metadata against previously published metadata. This no longer functions
|
||||
// correctly if request bits are unset between calls, as Gossipsub will consider a message
|
||||
// with new requests as new info to be propagated, possibly overwriting previous messages
|
||||
// with more cells (but fewer request bits). This is because gossipsub will see that both
|
||||
// metadata have some bits that are not set in the other metadata and therefore cannot
|
||||
// decide which actually carries more data. By always setting request bits for available
|
||||
// cells, we avoid this issue, as requests will never be unset between calls.
|
||||
//
|
||||
// In other words, gossipsub relies on the fact that metadata is additive. The request bit
|
||||
// is, therefore, to be seen as a "request if not available" bit.
|
||||
let requests = requests.union(&partial_column.sidecar.cells_present_bitmap);
|
||||
|
||||
let metadata = PartialDataColumnPartsMetadata::<E> {
|
||||
available: partial_column.sidecar.cells_present_bitmap.clone(),
|
||||
requests,
|
||||
@@ -322,6 +341,14 @@ mod tests {
|
||||
})
|
||||
}
|
||||
|
||||
fn make_all_one_bitmap(len: usize) -> CellBitmap<E> {
|
||||
let mut request_cells = CellBitmap::<E>::with_capacity(len).unwrap();
|
||||
for idx in 0..request_cells.len() {
|
||||
request_cells.set(idx, true).unwrap();
|
||||
}
|
||||
request_cells
|
||||
}
|
||||
|
||||
fn random_peer_id() -> PeerId {
|
||||
let keypair = Keypair::generate_ed25519();
|
||||
PeerId::from(keypair.public())
|
||||
@@ -422,7 +449,8 @@ mod tests {
|
||||
let header = make_header(4);
|
||||
let partial = make_partial_column(root, 4, &[0, 1]);
|
||||
let header_sent_set: HeaderSentSet = Arc::new(Mutex::new(HashSet::new()));
|
||||
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set);
|
||||
let requests = make_all_one_bitmap(4);
|
||||
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set, requests);
|
||||
|
||||
let peer = random_peer_id();
|
||||
|
||||
@@ -442,7 +470,8 @@ mod tests {
|
||||
// We have cells [0, 2, 3]
|
||||
let partial = make_partial_column(root, 4, &[0, 2, 3]);
|
||||
let header_sent_set: HeaderSentSet = Arc::new(Mutex::new(HashSet::new()));
|
||||
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set);
|
||||
let requests = make_all_one_bitmap(4);
|
||||
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set, requests);
|
||||
|
||||
let peer = random_peer_id();
|
||||
|
||||
@@ -474,7 +503,8 @@ mod tests {
|
||||
// We have cells [0]
|
||||
let partial = make_partial_column(root, 4, &[0]);
|
||||
let header_sent_set: HeaderSentSet = Arc::new(Mutex::new(HashSet::new()));
|
||||
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set);
|
||||
let requests = make_all_one_bitmap(4);
|
||||
let outgoing = OutgoingPartialColumn::new(partial, &header, header_sent_set, requests);
|
||||
|
||||
let peer = random_peer_id();
|
||||
|
||||
|
||||
@@ -7,10 +7,10 @@ use ssz::{Decode, Encode};
|
||||
use std::io::{Error, ErrorKind};
|
||||
use std::sync::Arc;
|
||||
use types::{
|
||||
AttesterSlashing, AttesterSlashingBase, AttesterSlashingElectra, DataColumnSidecar,
|
||||
AttesterSlashing, AttesterSlashingBase, AttesterSlashingElectra, CellBitmap, DataColumnSidecar,
|
||||
DataColumnSubnetId, EthSpec, ForkContext, ForkName, Hash256, LightClientFinalityUpdate,
|
||||
LightClientOptimisticUpdate, PartialDataColumn, PartialDataColumnSidecar,
|
||||
PayloadAttestationMessage, ProposerSlashing, SignedAggregateAndProof,
|
||||
LightClientOptimisticUpdate, PartialDataColumn, PartialDataColumnHeader,
|
||||
PartialDataColumnSidecar, PayloadAttestationMessage, ProposerSlashing, SignedAggregateAndProof,
|
||||
SignedAggregateAndProofBase, SignedAggregateAndProofElectra, SignedBeaconBlock,
|
||||
SignedBeaconBlockAltair, SignedBeaconBlockBase, SignedBeaconBlockBellatrix,
|
||||
SignedBeaconBlockCapella, SignedBeaconBlockDeneb, SignedBeaconBlockElectra,
|
||||
@@ -56,6 +56,24 @@ pub enum PubsubMessage<E: EthSpec> {
|
||||
LightClientOptimisticUpdate(Box<LightClientOptimisticUpdate<E>>),
|
||||
}
|
||||
|
||||
/// A message published via the partial gossipsub protocol.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum PubsubPartialMessage<E: EthSpec> {
|
||||
/// A partial data column sidecar from the Fulu fork.
|
||||
DataColumnFulu {
|
||||
/// The column to publish. Libp2p will cache it and treat it as the data to send if any peer
|
||||
/// asks for data within it.
|
||||
column: Arc<PartialDataColumn<E>>,
|
||||
/// The cells we are requesting. Usually, this will be all-ones, as we need all cells.
|
||||
/// However, while get_blobs is still in progress, blobs we expect from the EL should not be
|
||||
/// requested to conserve bandwidth.
|
||||
request_cells: CellBitmap<E>,
|
||||
/// The header associated with the column above. This is set separately here, as the column
|
||||
/// to be published does not contain the header - it is stored without.
|
||||
header: Arc<PartialDataColumnHeader<E>>,
|
||||
},
|
||||
}
|
||||
|
||||
// Implements the `DataTransform` trait of gossipsub to employ snappy compression
|
||||
pub struct SnappyTransform {
|
||||
/// Sets the maximum size we allow gossipsub messages to decompress to.
|
||||
|
||||
Reference in New Issue
Block a user