mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-23 23:04:53 +00:00
Fix custody context initialization race condition that caused panic (#8391)
Take 2 of #8390. Fixes the race condition properly instead of propagating the error. I think this is a better alternative, and doesn't seem to look that bad. * Lift node id loading or generation from `NetworkService ` startup to the `ClientBuilder`, so that it can be used to compute custody columns for the beacon chain without waiting for Network bootstrap. I've considered and implemented a few alternatives: 1. passing `node_id` to beacon chain builder and compute columns when creating `CustodyContext`. This approach isn't good for separation of concerns and isn't great for testability 2. passing `ordered_custody_groups` to beacon chain. `CustodyContext` only uses this to compute ordered custody columns, so we might as well lift this logic out, so we don't have to do error handling in `CustodyContext` construction. Less tests to update;. Co-Authored-By: Jimmy Chen <jchen.tc@gmail.com>
This commit is contained in:
@@ -2,13 +2,11 @@ use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use ssz_derive::{Decode, Encode};
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::OnceLock;
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap},
|
||||
sync::atomic::{AtomicU64, Ordering},
|
||||
};
|
||||
use tracing::{debug, warn};
|
||||
use types::data_column_custody_group::{CustodyIndex, compute_columns_for_custody_group};
|
||||
use types::{ChainSpec, ColumnIndex, Epoch, EthSpec, Slot};
|
||||
|
||||
/// A delay before making the CGC change effective to the data availability checker.
|
||||
@@ -206,7 +204,7 @@ fn get_validators_custody_requirement(validator_custody_units: u64, spec: &Chain
|
||||
/// Therefore, the custody count at any point in time is calculated as the max of
|
||||
/// the validator custody at that time and the current cli params.
|
||||
///
|
||||
/// Choosing the max ensures that we always have the minimum required columns and
|
||||
/// Choosing the max ensures that we always have the minimum required columns, and
|
||||
/// we can adjust the `status.earliest_available_slot` value to indicate to our peers
|
||||
/// the columns that we can guarantee to serve.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Default, Deserialize, Serialize)]
|
||||
@@ -218,7 +216,7 @@ pub enum NodeCustodyType {
|
||||
/// wants to subscribe to the minimum number of columns to enable
|
||||
/// reconstruction (50%) of the full blob data on demand.
|
||||
SemiSupernode,
|
||||
/// The node isn't running with with any explicit cli parameters
|
||||
/// The node isn't running with any explicit cli parameters
|
||||
/// or is running with cli parameters to indicate that it wants
|
||||
/// to only subscribe to the minimal custody requirements.
|
||||
#[default]
|
||||
@@ -248,9 +246,9 @@ pub struct CustodyContext<E: EthSpec> {
|
||||
validator_custody_count: AtomicU64,
|
||||
/// Maintains all the validators that this node is connected to currently
|
||||
validator_registrations: RwLock<ValidatorRegistrations>,
|
||||
/// Stores an immutable, ordered list of all custody columns as determined by the node's NodeID
|
||||
/// on startup.
|
||||
all_custody_columns_ordered: OnceLock<Box<[ColumnIndex]>>,
|
||||
/// Stores an immutable, ordered list of all data column indices as determined by the node's NodeID
|
||||
/// on startup. This used to determine the node's custody columns.
|
||||
ordered_custody_column_indices: Vec<ColumnIndex>,
|
||||
_phantom_data: PhantomData<E>,
|
||||
}
|
||||
|
||||
@@ -259,7 +257,11 @@ impl<E: EthSpec> CustodyContext<E> {
|
||||
/// exists.
|
||||
///
|
||||
/// The `node_custody_type` value is based on current cli parameters.
|
||||
pub fn new(node_custody_type: NodeCustodyType, spec: &ChainSpec) -> Self {
|
||||
pub fn new(
|
||||
node_custody_type: NodeCustodyType,
|
||||
ordered_custody_column_indices: Vec<ColumnIndex>,
|
||||
spec: &ChainSpec,
|
||||
) -> Self {
|
||||
let cgc_override = node_custody_type.get_custody_count_override(spec);
|
||||
// If there's no override, we initialise `validator_custody_count` to 0. This has been the
|
||||
// existing behaviour and we maintain this for now to avoid a semantic schema change until
|
||||
@@ -267,7 +269,7 @@ impl<E: EthSpec> CustodyContext<E> {
|
||||
Self {
|
||||
validator_custody_count: AtomicU64::new(cgc_override.unwrap_or(0)),
|
||||
validator_registrations: RwLock::new(ValidatorRegistrations::new(cgc_override)),
|
||||
all_custody_columns_ordered: OnceLock::new(),
|
||||
ordered_custody_column_indices,
|
||||
_phantom_data: PhantomData,
|
||||
}
|
||||
}
|
||||
@@ -290,6 +292,7 @@ impl<E: EthSpec> CustodyContext<E> {
|
||||
ssz_context: CustodyContextSsz,
|
||||
node_custody_type: NodeCustodyType,
|
||||
head_epoch: Epoch,
|
||||
ordered_custody_column_indices: Vec<ColumnIndex>,
|
||||
spec: &ChainSpec,
|
||||
) -> (Self, Option<CustodyCountChanged>) {
|
||||
let CustodyContextSsz {
|
||||
@@ -355,39 +358,13 @@ impl<E: EthSpec> CustodyContext<E> {
|
||||
.into_iter()
|
||||
.collect(),
|
||||
}),
|
||||
all_custody_columns_ordered: OnceLock::new(),
|
||||
ordered_custody_column_indices,
|
||||
_phantom_data: PhantomData,
|
||||
};
|
||||
|
||||
(custody_context, custody_count_changed)
|
||||
}
|
||||
|
||||
/// Initializes an ordered list of data columns based on provided custody groups.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `all_custody_groups_ordered` - Vector of custody group indices to map to columns
|
||||
/// * `spec` - Chain specification containing custody parameters
|
||||
///
|
||||
/// # Returns
|
||||
/// Ok(()) if initialization succeeds, Err with description string if it fails
|
||||
pub fn init_ordered_data_columns_from_custody_groups(
|
||||
&self,
|
||||
all_custody_groups_ordered: Vec<CustodyIndex>,
|
||||
spec: &ChainSpec,
|
||||
) -> Result<(), String> {
|
||||
let mut ordered_custody_columns = vec![];
|
||||
for custody_index in all_custody_groups_ordered {
|
||||
let columns = compute_columns_for_custody_group::<E>(custody_index, spec)
|
||||
.map_err(|e| format!("Failed to compute columns for custody group {e:?}"))?;
|
||||
ordered_custody_columns.extend(columns);
|
||||
}
|
||||
self.all_custody_columns_ordered
|
||||
.set(ordered_custody_columns.into_boxed_slice())
|
||||
.map_err(|_| {
|
||||
"Failed to initialise CustodyContext with computed custody columns".to_string()
|
||||
})
|
||||
}
|
||||
|
||||
/// Register a new validator index and updates the list of validators if required.
|
||||
///
|
||||
/// Also modifies the internal structures if the validator custody has changed to
|
||||
@@ -497,11 +474,7 @@ impl<E: EthSpec> CustodyContext<E> {
|
||||
/// A slice of ordered column indices that should be sampled for this epoch based on the node's custody configuration
|
||||
pub fn sampling_columns_for_epoch(&self, epoch: Epoch, spec: &ChainSpec) -> &[ColumnIndex] {
|
||||
let num_of_columns_to_sample = self.num_of_data_columns_to_sample(epoch, spec);
|
||||
let all_columns_ordered = self
|
||||
.all_custody_columns_ordered
|
||||
.get()
|
||||
.expect("all_custody_columns_ordered should be initialized");
|
||||
&all_columns_ordered[..num_of_columns_to_sample]
|
||||
&self.ordered_custody_column_indices[..num_of_columns_to_sample]
|
||||
}
|
||||
|
||||
/// Returns the ordered list of column indices that the node is assigned to custody
|
||||
@@ -528,12 +501,11 @@ impl<E: EthSpec> CustodyContext<E> {
|
||||
self.custody_group_count_at_head(spec) as usize
|
||||
};
|
||||
|
||||
let all_columns_ordered = self
|
||||
.all_custody_columns_ordered
|
||||
.get()
|
||||
.expect("all_custody_columns_ordered should be initialized");
|
||||
// This is an unnecessary conversion for spec compliance, basically just multiplying by 1.
|
||||
let columns_per_custody_group = spec.data_columns_per_group::<E>() as usize;
|
||||
let custody_column_count = columns_per_custody_group * custody_group_count;
|
||||
|
||||
&all_columns_ordered[..custody_group_count]
|
||||
&self.ordered_custody_column_indices[..custody_column_count]
|
||||
}
|
||||
|
||||
/// The node has completed backfill for this epoch. Update the internal records so the function
|
||||
@@ -599,11 +571,9 @@ impl<E: EthSpec> From<&CustodyContext<E>> for CustodyContextSsz {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rand::rng;
|
||||
use rand::seq::SliceRandom;
|
||||
use types::MainnetEthSpec;
|
||||
|
||||
use super::*;
|
||||
use crate::test_utils::generate_data_column_indices_rand_order;
|
||||
use types::MainnetEthSpec;
|
||||
|
||||
type E = MainnetEthSpec;
|
||||
|
||||
@@ -623,13 +593,10 @@ mod tests {
|
||||
ssz_context,
|
||||
NodeCustodyType::Fullnode,
|
||||
head_epoch,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
spec,
|
||||
);
|
||||
|
||||
let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::<Vec<_>>();
|
||||
custody_context
|
||||
.init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, spec)
|
||||
.expect("should initialise ordered data columns");
|
||||
custody_context
|
||||
}
|
||||
|
||||
@@ -668,6 +635,7 @@ mod tests {
|
||||
ssz_context,
|
||||
target_node_custody_type,
|
||||
head_epoch,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
spec,
|
||||
);
|
||||
|
||||
@@ -738,6 +706,7 @@ mod tests {
|
||||
ssz_context,
|
||||
target_node_custody_type,
|
||||
head_epoch,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
spec,
|
||||
);
|
||||
|
||||
@@ -759,7 +728,11 @@ mod tests {
|
||||
#[test]
|
||||
fn no_validators_supernode_default() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Supernode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Supernode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
assert_eq!(
|
||||
custody_context.custody_group_count_at_head(&spec),
|
||||
spec.number_of_custody_groups
|
||||
@@ -773,7 +746,11 @@ mod tests {
|
||||
#[test]
|
||||
fn no_validators_semi_supernode_default() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::SemiSupernode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::SemiSupernode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
assert_eq!(
|
||||
custody_context.custody_group_count_at_head(&spec),
|
||||
spec.number_of_custody_groups / 2
|
||||
@@ -787,7 +764,11 @@ mod tests {
|
||||
#[test]
|
||||
fn no_validators_fullnode_default() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Fullnode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
assert_eq!(
|
||||
custody_context.custody_group_count_at_head(&spec),
|
||||
spec.custody_requirement,
|
||||
@@ -802,7 +783,11 @@ mod tests {
|
||||
#[test]
|
||||
fn register_single_validator_should_update_cgc() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Fullnode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
let bal_per_additional_group = spec.balance_per_additional_custody_group;
|
||||
let min_val_custody_requirement = spec.validator_custody_requirement;
|
||||
// One single node increases its balance over 3 epochs.
|
||||
@@ -826,7 +811,11 @@ mod tests {
|
||||
#[test]
|
||||
fn register_multiple_validators_should_update_cgc() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Fullnode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
let bal_per_additional_group = spec.balance_per_additional_custody_group;
|
||||
let min_val_custody_requirement = spec.validator_custody_requirement;
|
||||
// Add 3 validators over 3 epochs.
|
||||
@@ -863,7 +852,11 @@ mod tests {
|
||||
#[test]
|
||||
fn register_validators_should_not_update_cgc_for_supernode() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Supernode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Supernode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
let bal_per_additional_group = spec.balance_per_additional_custody_group;
|
||||
|
||||
// Add 3 validators over 3 epochs.
|
||||
@@ -901,7 +894,11 @@ mod tests {
|
||||
#[test]
|
||||
fn cgc_change_should_be_effective_to_sampling_after_delay() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Fullnode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
let current_slot = Slot::new(10);
|
||||
let current_epoch = current_slot.epoch(E::slots_per_epoch());
|
||||
let default_sampling_size =
|
||||
@@ -932,7 +929,11 @@ mod tests {
|
||||
#[test]
|
||||
fn validator_dropped_after_no_registrations_within_expiry_should_not_reduce_cgc() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Fullnode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
let current_slot = Slot::new(10);
|
||||
let val_custody_units_1 = 10;
|
||||
let val_custody_units_2 = 5;
|
||||
@@ -974,7 +975,11 @@ mod tests {
|
||||
#[test]
|
||||
fn validator_dropped_after_no_registrations_within_expiry() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Fullnode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
let current_slot = Slot::new(10);
|
||||
let val_custody_units_1 = 10;
|
||||
let val_custody_units_2 = 5;
|
||||
@@ -1021,37 +1026,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_init_ordered_data_columns_and_return_sampling_columns() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let sampling_size = custody_context.num_of_data_columns_to_sample(Epoch::new(0), &spec);
|
||||
|
||||
// initialise ordered columns
|
||||
let mut all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::<Vec<_>>();
|
||||
all_custody_groups_ordered.shuffle(&mut rng());
|
||||
|
||||
custody_context
|
||||
.init_ordered_data_columns_from_custody_groups(
|
||||
all_custody_groups_ordered.clone(),
|
||||
&spec,
|
||||
)
|
||||
.expect("should initialise ordered data columns");
|
||||
|
||||
let actual_sampling_columns =
|
||||
custody_context.sampling_columns_for_epoch(Epoch::new(0), &spec);
|
||||
|
||||
let expected_sampling_columns = &all_custody_groups_ordered
|
||||
.iter()
|
||||
.flat_map(|custody_index| {
|
||||
compute_columns_for_custody_group::<E>(*custody_index, &spec)
|
||||
.expect("should compute columns for custody group")
|
||||
})
|
||||
.collect::<Vec<_>>()[0..sampling_size];
|
||||
|
||||
assert_eq!(actual_sampling_columns, expected_sampling_columns)
|
||||
}
|
||||
|
||||
/// Update the validator every epoch and assert cgc against expected values.
|
||||
fn register_validators_and_assert_cgc<E: EthSpec>(
|
||||
custody_context: &CustodyContext<E>,
|
||||
@@ -1077,12 +1051,12 @@ mod tests {
|
||||
#[test]
|
||||
fn custody_columns_for_epoch_no_validators_fullnode() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::<Vec<_>>();
|
||||
|
||||
custody_context
|
||||
.init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, &spec)
|
||||
.expect("should initialise ordered data columns");
|
||||
let ordered_custody_column_indices = generate_data_column_indices_rand_order::<E>();
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Fullnode,
|
||||
ordered_custody_column_indices,
|
||||
&spec,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
custody_context.custody_columns_for_epoch(None, &spec).len(),
|
||||
@@ -1093,12 +1067,12 @@ mod tests {
|
||||
#[test]
|
||||
fn custody_columns_for_epoch_no_validators_supernode() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Supernode, &spec);
|
||||
let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::<Vec<_>>();
|
||||
|
||||
custody_context
|
||||
.init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, &spec)
|
||||
.expect("should initialise ordered data columns");
|
||||
let ordered_custody_column_indices = generate_data_column_indices_rand_order::<E>();
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Supernode,
|
||||
ordered_custody_column_indices,
|
||||
&spec,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
custody_context.custody_columns_for_epoch(None, &spec).len(),
|
||||
@@ -1109,14 +1083,14 @@ mod tests {
|
||||
#[test]
|
||||
fn custody_columns_for_epoch_with_validators_should_match_cgc() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::<Vec<_>>();
|
||||
let ordered_custody_column_indices = generate_data_column_indices_rand_order::<E>();
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Fullnode,
|
||||
ordered_custody_column_indices,
|
||||
&spec,
|
||||
);
|
||||
let val_custody_units = 10;
|
||||
|
||||
custody_context
|
||||
.init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, &spec)
|
||||
.expect("should initialise ordered data columns");
|
||||
|
||||
let _ = custody_context.register_validators(
|
||||
vec![(
|
||||
0,
|
||||
@@ -1135,14 +1109,14 @@ mod tests {
|
||||
#[test]
|
||||
fn custody_columns_for_epoch_specific_epoch_uses_epoch_cgc() {
|
||||
let spec = E::default_spec();
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::Fullnode, &spec);
|
||||
let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::<Vec<_>>();
|
||||
let ordered_custody_column_indices = generate_data_column_indices_rand_order::<E>();
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::Fullnode,
|
||||
ordered_custody_column_indices,
|
||||
&spec,
|
||||
);
|
||||
let test_epoch = Epoch::new(5);
|
||||
|
||||
custody_context
|
||||
.init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, &spec)
|
||||
.expect("should initialise ordered data columns");
|
||||
|
||||
let expected_cgc = custody_context.custody_group_count_at_epoch(test_epoch, &spec);
|
||||
assert_eq!(
|
||||
custody_context
|
||||
@@ -1165,6 +1139,7 @@ mod tests {
|
||||
ssz_context,
|
||||
NodeCustodyType::Fullnode,
|
||||
Epoch::new(0),
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
|
||||
@@ -1198,7 +1173,11 @@ mod tests {
|
||||
fn restore_semi_supernode_with_validators_can_exceed_64() {
|
||||
let spec = E::default_spec();
|
||||
let semi_supernode_cgc = spec.number_of_custody_groups / 2; // 64
|
||||
let custody_context = CustodyContext::<E>::new(NodeCustodyType::SemiSupernode, &spec);
|
||||
let custody_context = CustodyContext::<E>::new(
|
||||
NodeCustodyType::SemiSupernode,
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
|
||||
// Verify initial CGC is 64 (semi-supernode)
|
||||
assert_eq!(
|
||||
@@ -1348,6 +1327,7 @@ mod tests {
|
||||
ssz_context,
|
||||
NodeCustodyType::Fullnode,
|
||||
Epoch::new(20),
|
||||
generate_data_column_indices_rand_order::<E>(),
|
||||
&spec,
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user