mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-09 11:41:51 +00:00
Separate committee subscriptions queue (#3508)
## Issue Addressed
NA
## Proposed Changes
As we've seen on Prater, there seems to be a correlation between these messages
```
WARN Not enough time for a discovery search subnet_id: ExactSubnet { subnet_id: SubnetId(19), slot: Slot(3742336) }, service: attestation_service
```
... and nodes falling 20-30 slots behind the head for short periods. These nodes are running ~20k Prater validators.
After running some metrics, I can see that the `network_recv` channel is processing ~250k `AttestationSubscribe` messages per minute. It occurred to me that perhaps the `AttestationSubscribe` messages are "washing out" the `SendRequest` and `SendResponse` messages. In this PR I separate the `AttestationSubscribe` and `SyncCommitteeSubscribe` messages into their own queue so the `tokio::select!` in the `NetworkService` can still process the other messages in the `network_recv` channel without necessarily having to clear all the subscription messages first.
~~I've also added filter to the HTTP API to prevent duplicate subscriptions going to the network service.~~
## Additional Info
- Currently being tested on Prater
This commit is contained in:
@@ -25,11 +25,10 @@ use beacon_chain::{
|
||||
BeaconChainTypes, ProduceBlockVerification, WhenSlotSkipped,
|
||||
};
|
||||
pub use block_id::BlockId;
|
||||
use eth2::types::ValidatorStatus;
|
||||
use eth2::types::{self as api_types, EndpointVersion, ValidatorId};
|
||||
use eth2::types::{self as api_types, EndpointVersion, ValidatorId, ValidatorStatus};
|
||||
use lighthouse_network::{types::SyncState, EnrExt, NetworkGlobals, PeerId, PubsubMessage};
|
||||
use lighthouse_version::version_with_platform;
|
||||
use network::NetworkMessage;
|
||||
use network::{NetworkMessage, NetworkSenders, ValidatorSubscriptionMessage};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use slog::{crit, debug, error, info, warn, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
@@ -42,7 +41,7 @@ use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::mpsc::UnboundedSender;
|
||||
use tokio::sync::mpsc::{Sender, UnboundedSender};
|
||||
use tokio_stream::{wrappers::BroadcastStream, StreamExt};
|
||||
use types::{
|
||||
Attestation, AttestationData, AttesterSlashing, BeaconStateError, BlindedPayload,
|
||||
@@ -93,7 +92,7 @@ pub struct TlsConfig {
|
||||
pub struct Context<T: BeaconChainTypes> {
|
||||
pub config: Config,
|
||||
pub chain: Option<Arc<BeaconChain<T>>>,
|
||||
pub network_tx: Option<UnboundedSender<NetworkMessage<T::EthSpec>>>,
|
||||
pub network_senders: Option<NetworkSenders<T::EthSpec>>,
|
||||
pub network_globals: Option<Arc<NetworkGlobals<T::EthSpec>>>,
|
||||
pub eth1_service: Option<eth1::Service>,
|
||||
pub log: Logger,
|
||||
@@ -337,14 +336,35 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
});
|
||||
|
||||
// Create a `warp` filter that provides access to the network sender channel.
|
||||
let inner_ctx = ctx.clone();
|
||||
let network_tx_filter = warp::any()
|
||||
.map(move || inner_ctx.network_tx.clone())
|
||||
.and_then(|network_tx| async move {
|
||||
match network_tx {
|
||||
Some(network_tx) => Ok(network_tx),
|
||||
let network_tx = ctx
|
||||
.network_senders
|
||||
.as_ref()
|
||||
.map(|senders| senders.network_send());
|
||||
let network_tx_filter =
|
||||
warp::any()
|
||||
.map(move || network_tx.clone())
|
||||
.and_then(|network_tx| async move {
|
||||
match network_tx {
|
||||
Some(network_tx) => Ok(network_tx),
|
||||
None => Err(warp_utils::reject::custom_not_found(
|
||||
"The networking stack has not yet started (network_tx).".to_string(),
|
||||
)),
|
||||
}
|
||||
});
|
||||
|
||||
// Create a `warp` filter that provides access to the network attestation subscription channel.
|
||||
let validator_subscriptions_tx = ctx
|
||||
.network_senders
|
||||
.as_ref()
|
||||
.map(|senders| senders.validator_subscription_send());
|
||||
let validator_subscription_tx_filter = warp::any()
|
||||
.map(move || validator_subscriptions_tx.clone())
|
||||
.and_then(|validator_subscriptions_tx| async move {
|
||||
match validator_subscriptions_tx {
|
||||
Some(validator_subscriptions_tx) => Ok(validator_subscriptions_tx),
|
||||
None => Err(warp_utils::reject::custom_not_found(
|
||||
"The networking stack has not yet started.".to_string(),
|
||||
"The networking stack has not yet started (validator_subscription_tx)."
|
||||
.to_string(),
|
||||
)),
|
||||
}
|
||||
});
|
||||
@@ -2083,7 +2103,7 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
.to_ref()
|
||||
.fork_name(&chain.spec)
|
||||
.map_err(inconsistent_fork_rejection)?;
|
||||
// Pose as a V2 endpoint so we return the fork `version`.
|
||||
// Pose as a V2 endpoint so we return the fork `version`.
|
||||
fork_versioned_response(V2, fork_name, block)
|
||||
.map(|response| warp::reply::json(&response))
|
||||
},
|
||||
@@ -2345,7 +2365,7 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
.and(not_while_syncing_filter.clone())
|
||||
.and(chain_filter.clone())
|
||||
.and(warp::body::json())
|
||||
.and(network_tx_filter.clone())
|
||||
.and(network_tx_filter)
|
||||
.and(log_filter.clone())
|
||||
.and_then(
|
||||
|chain: Arc<BeaconChain<T>>,
|
||||
@@ -2370,12 +2390,14 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
.and(warp::path("beacon_committee_subscriptions"))
|
||||
.and(warp::path::end())
|
||||
.and(warp::body::json())
|
||||
.and(network_tx_filter.clone())
|
||||
.and(validator_subscription_tx_filter.clone())
|
||||
.and(chain_filter.clone())
|
||||
.and(log_filter.clone())
|
||||
.and_then(
|
||||
|subscriptions: Vec<api_types::BeaconCommitteeSubscription>,
|
||||
network_tx: UnboundedSender<NetworkMessage<T::EthSpec>>,
|
||||
chain: Arc<BeaconChain<T>>| {
|
||||
validator_subscription_tx: Sender<ValidatorSubscriptionMessage>,
|
||||
chain: Arc<BeaconChain<T>>,
|
||||
log: Logger| {
|
||||
blocking_json_task(move || {
|
||||
for subscription in &subscriptions {
|
||||
chain
|
||||
@@ -2383,7 +2405,7 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
.write()
|
||||
.auto_register_local_validator(subscription.validator_index);
|
||||
|
||||
let subscription = api_types::ValidatorSubscription {
|
||||
let validator_subscription = api_types::ValidatorSubscription {
|
||||
validator_index: subscription.validator_index,
|
||||
attestation_committee_index: subscription.committee_index,
|
||||
slot: subscription.slot,
|
||||
@@ -2391,12 +2413,20 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
is_aggregator: subscription.is_aggregator,
|
||||
};
|
||||
|
||||
publish_network_message(
|
||||
&network_tx,
|
||||
NetworkMessage::AttestationSubscribe {
|
||||
subscriptions: vec![subscription],
|
||||
},
|
||||
)?;
|
||||
let message = ValidatorSubscriptionMessage::AttestationSubscribe {
|
||||
subscriptions: vec![validator_subscription],
|
||||
};
|
||||
if let Err(e) = validator_subscription_tx.try_send(message) {
|
||||
warn!(
|
||||
log,
|
||||
"Unable to process committee subscriptions";
|
||||
"info" => "the host may be overloaded or resource-constrained",
|
||||
"error" => ?e,
|
||||
);
|
||||
return Err(warp_utils::reject::custom_server_error(
|
||||
"unable to queue subscription, host may be overloaded or shutting down".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -2581,12 +2611,15 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
.and(warp::path("sync_committee_subscriptions"))
|
||||
.and(warp::path::end())
|
||||
.and(warp::body::json())
|
||||
.and(network_tx_filter)
|
||||
.and(validator_subscription_tx_filter)
|
||||
.and(chain_filter.clone())
|
||||
.and(log_filter.clone())
|
||||
.and_then(
|
||||
|subscriptions: Vec<types::SyncCommitteeSubscription>,
|
||||
network_tx: UnboundedSender<NetworkMessage<T::EthSpec>>,
|
||||
chain: Arc<BeaconChain<T>>| {
|
||||
validator_subscription_tx: Sender<ValidatorSubscriptionMessage>,
|
||||
chain: Arc<BeaconChain<T>>,
|
||||
log: Logger
|
||||
| {
|
||||
blocking_json_task(move || {
|
||||
for subscription in subscriptions {
|
||||
chain
|
||||
@@ -2594,12 +2627,20 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
.write()
|
||||
.auto_register_local_validator(subscription.validator_index);
|
||||
|
||||
publish_network_message(
|
||||
&network_tx,
|
||||
NetworkMessage::SyncCommitteeSubscribe {
|
||||
let message = ValidatorSubscriptionMessage::SyncCommitteeSubscribe {
|
||||
subscriptions: vec![subscription],
|
||||
},
|
||||
)?;
|
||||
};
|
||||
if let Err(e) = validator_subscription_tx.try_send(message) {
|
||||
warn!(
|
||||
log,
|
||||
"Unable to process sync subscriptions";
|
||||
"info" => "the host may be overloaded or resource-constrained",
|
||||
"error" => ?e
|
||||
);
|
||||
return Err(warp_utils::reject::custom_server_error(
|
||||
"unable to queue subscription, host may be overloaded or shutting down".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
Reference in New Issue
Block a user