De-duplicate attestations in the slasher (#2767)

## Issue Addressed Closes https://github.com/sigp/lighthouse/issues/2112 Closes https://github.com/sigp/lighthouse/issues/1861 ## Proposed Changes Collect attestations by validator index in the slasher, and use the magic of reference counting to automatically discard redundant attestations. This results in us storing only 1-2% of the attestations observed when subscribed to all subnets, which carries over to a 50-100x reduction in data stored 🎉 ## Additional Info There's some nuance to the configuration of the `slot-offset`. It has a profound effect on the effictiveness of de-duplication, see the docs added to the book for an explanation: 5442e695e5/book/src/slasher.md (slot-offset)
2026-05-09 11:25:55 +00:00 · 2021-11-08 00:01:09 +00:00
parent fadb8b2b2b
commit df02639b71
13 changed files with 252 additions and 93 deletions
--- a/slasher/service/src/service.rs
+++ b/slasher/service/src/service.rs
@@ -55,11 +55,18 @@ impl<T: BeaconChainTypes> SlasherService<T> {
        // don't need to burden them with more work (we can wait).
        let (notif_sender, notif_receiver) = sync_channel(1);
        let update_period = slasher.config().update_period;
+        let slot_offset = slasher.config().slot_offset;
        let beacon_chain = self.beacon_chain.clone();
        let network_sender = self.network_sender.clone();

        executor.spawn(
-            Self::run_notifier(beacon_chain.clone(), update_period, notif_sender, log),
+            Self::run_notifier(
+                beacon_chain.clone(),
+                update_period,
+                slot_offset,
+                notif_sender,
+                log,
+            ),
            "slasher_server_notifier",
        );

@@ -75,12 +82,19 @@ impl<T: BeaconChainTypes> SlasherService<T> {
    async fn run_notifier(
        beacon_chain: Arc<BeaconChain<T>>,
        update_period: u64,
+        slot_offset: f64,
        notif_sender: SyncSender<Epoch>,
        log: Logger,
    ) {
-        // NOTE: could align each run to some fixed point in each slot, see:
-        // https://github.com/sigp/lighthouse/issues/1861
-        let mut interval = interval_at(Instant::now(), Duration::from_secs(update_period));
+        let slot_offset = Duration::from_secs_f64(slot_offset);
+        let start_instant =
+            if let Some(duration_to_next_slot) = beacon_chain.slot_clock.duration_to_next_slot() {
+                Instant::now() + duration_to_next_slot + slot_offset
+            } else {
+                error!(log, "Error aligning slasher to slot clock");
+                Instant::now()
+            };
+        let mut interval = interval_at(start_instant, Duration::from_secs(update_period));

        loop {
            interval.tick().await;
--- a/slasher/src/array.rs
+++ b/slasher/src/array.rs
@@ -1,5 +1,5 @@
 use crate::metrics::{self, SLASHER_COMPRESSION_RATIO, SLASHER_NUM_CHUNKS_UPDATED};
-use crate::{AttesterRecord, AttesterSlashingStatus, Config, Error, SlasherDB};
+use crate::{AttesterSlashingStatus, Config, Error, IndexedAttesterRecord, SlasherDB};
 use flate2::bufread::{ZlibDecoder, ZlibEncoder};
 use lmdb::{RwTransaction, Transaction};
 use serde_derive::{Deserialize, Serialize};
@@ -486,7 +486,7 @@ pub fn update<E: EthSpec>(
    db: &SlasherDB<E>,
    txn: &mut RwTransaction<'_>,
    validator_chunk_index: usize,
-    batch: Vec<Arc<(IndexedAttestation<E>, AttesterRecord)>>,
+    batch: Vec<Arc<IndexedAttesterRecord<E>>>,
    current_epoch: Epoch,
    config: &Config,
 ) -> Result<HashSet<AttesterSlashing<E>>, Error> {
@@ -496,7 +496,7 @@ pub fn update<E: EthSpec>(
    let mut chunk_attestations = BTreeMap::new();
    for attestation in batch {
        chunk_attestations
-            .entry(config.chunk_index(attestation.0.data.source.epoch))
+            .entry(config.chunk_index(attestation.indexed.data.source.epoch))
            .or_insert_with(Vec::new)
            .push(attestation);
    }
@@ -573,7 +573,7 @@ pub fn update_array<E: EthSpec, T: TargetArrayChunk>(
    db: &SlasherDB<E>,
    txn: &mut RwTransaction<'_>,
    validator_chunk_index: usize,
-    chunk_attestations: &BTreeMap<usize, Vec<Arc<(IndexedAttestation<E>, AttesterRecord)>>>,
+    chunk_attestations: &BTreeMap<usize, Vec<Arc<IndexedAttesterRecord<E>>>>,
    current_epoch: Epoch,
    config: &Config,
 ) -> Result<HashSet<AttesterSlashing<E>>, Error> {
@@ -597,7 +597,7 @@ pub fn update_array<E: EthSpec, T: TargetArrayChunk>(
    for attestations in chunk_attestations.values() {
        for attestation in attestations {
            for validator_index in
-                config.attesting_validators_in_chunk(&attestation.0, validator_chunk_index)
+                config.attesting_validators_in_chunk(&attestation.indexed, validator_chunk_index)
            {
                let slashing_status = apply_attestation_for_validator::<E, T>(
                    db,
@@ -605,11 +605,11 @@ pub fn update_array<E: EthSpec, T: TargetArrayChunk>(
                    &mut updated_chunks,
                    validator_chunk_index,
                    validator_index,
-                    &attestation.0,
+                    &attestation.indexed,
                    current_epoch,
                    config,
                )?;
-                if let Some(slashing) = slashing_status.into_slashing(&attestation.0) {
+                if let Some(slashing) = slashing_status.into_slashing(&attestation.indexed) {
                    slashings.insert(slashing);
                }
            }
--- a/slasher/src/attestation_queue.rs
+++ b/slasher/src/attestation_queue.rs
@@ -1,64 +1,85 @@
-use crate::{AttesterRecord, Config};
+use crate::{AttesterRecord, Config, IndexedAttesterRecord};
 use parking_lot::Mutex;
-use std::collections::BTreeSet;
-use std::sync::Arc;
-use types::{EthSpec, IndexedAttestation};
+use std::collections::BTreeMap;
+use std::sync::{Arc, Weak};
+use types::{EthSpec, Hash256, IndexedAttestation};

 /// Staging area for attestations received from the network.
 ///
-/// To be added to the database in batches, for efficiency and to prevent data races.
+/// Attestations are not grouped by validator index at this stage so that they can be easily
+/// filtered for timeliness.
 #[derive(Debug, Default)]
 pub struct AttestationQueue<E: EthSpec> {
-    /// All attestations (unique) for storage on disk.
-    pub queue: Mutex<AttestationBatch<E>>,
+    pub queue: Mutex<SimpleBatch<E>>,
+}
+
+pub type SimpleBatch<E> = Vec<Arc<IndexedAttesterRecord<E>>>;
+
+/// Attestations dequeued from the queue and in preparation for processing.
+///
+/// This struct is responsible for mapping validator indices to attestations and performing
+/// de-duplication to remove redundant attestations.
+#[derive(Debug, Default)]
+pub struct AttestationBatch<E: EthSpec> {
+    /// Map from (`validator_index`, `attestation_data_hash`) to indexed attester record.
+    ///
+    /// This mapping is used for de-duplication, see:
+    ///
+    /// https://github.com/sigp/lighthouse/issues/2112
+    pub attesters: BTreeMap<(u64, Hash256), Arc<IndexedAttesterRecord<E>>>,
+
+    /// Vec of all unique indexed attester records.
+    ///
+    /// The weak references account for the fact that some records might prove useless after
+    /// de-duplication.
+    pub attestations: Vec<Weak<IndexedAttesterRecord<E>>>,
 }

 /// Attestations grouped by validator index range.
 #[derive(Debug)]
 pub struct GroupedAttestations<E: EthSpec> {
-    pub subqueues: Vec<AttestationBatch<E>>,
-}
-
-/// A queue of attestations for a range of validator indices.
-#[derive(Debug, Default)]
-pub struct AttestationBatch<E: EthSpec> {
-    pub attestations: Vec<Arc<(IndexedAttestation<E>, AttesterRecord)>>,
+    pub subqueues: Vec<SimpleBatch<E>>,
 }

 impl<E: EthSpec> AttestationBatch<E> {
-    pub fn len(&self) -> usize {
-        self.attestations.len()
+    /// Add an attestation to the queue.
+    pub fn queue(&mut self, indexed_record: Arc<IndexedAttesterRecord<E>>) {
+        self.attestations.push(Arc::downgrade(&indexed_record));
+
+        let attestation_data_hash = indexed_record.record.attestation_data_hash;
+        for &validator_index in &indexed_record.indexed.attesting_indices {
+            self.attesters
+                .entry((validator_index, attestation_data_hash))
+                .and_modify(|existing_entry| {
+                    // If the new record is for the same attestation data but with more bits set
+                    // then replace the existing record so that we might avoid storing the
+                    // smaller indexed attestation. Single-bit attestations will usually be removed
+                    // completely by this process, and aggregates will only be retained if they
+                    // are not redundant with respect to a larger aggregate seen in the same batch.
+                    if existing_entry.indexed.attesting_indices.len()
+                        < indexed_record.indexed.attesting_indices.len()
+                    {
+                        *existing_entry = indexed_record.clone();
+                    }
+                })
+                .or_insert_with(|| indexed_record.clone());
+        }
    }

-    pub fn is_empty(&self) -> bool {
-        self.attestations.is_empty()
-    }
-
-    /// Group the attestations by validator index.
-    pub fn group_by_validator_index(self, config: &Config) -> GroupedAttestations<E> {
+    /// Group the attestations by validator chunk index.
+    pub fn group_by_validator_chunk_index(self, config: &Config) -> GroupedAttestations<E> {
        let mut grouped_attestations = GroupedAttestations { subqueues: vec![] };

-        for attestation in self.attestations {
-            let subqueue_ids = attestation
-                .0
-                .attesting_indices
-                .iter()
-                .map(|validator_index| config.validator_chunk_index(*validator_index))
-                .collect::<BTreeSet<_>>();
+        for ((validator_index, _), indexed_record) in self.attesters {
+            let subqueue_id = config.validator_chunk_index(validator_index);

-            if let Some(max_subqueue_id) = subqueue_ids.iter().next_back() {
-                if *max_subqueue_id >= grouped_attestations.subqueues.len() {
-                    grouped_attestations
-                        .subqueues
-                        .resize_with(max_subqueue_id + 1, AttestationBatch::default);
-                }
+            if subqueue_id >= grouped_attestations.subqueues.len() {
+                grouped_attestations
+                    .subqueues
+                    .resize_with(subqueue_id + 1, SimpleBatch::default);
            }

-            for subqueue_id in subqueue_ids {
-                grouped_attestations.subqueues[subqueue_id]
-                    .attestations
-                    .push(attestation.clone());
-            }
+            grouped_attestations.subqueues[subqueue_id].push(indexed_record);
        }

        grouped_attestations
@@ -66,21 +87,18 @@ impl<E: EthSpec> AttestationBatch<E> {
 }

 impl<E: EthSpec> AttestationQueue<E> {
-    /// Add an attestation to the queue.
    pub fn queue(&self, attestation: IndexedAttestation<E>) {
        let attester_record = AttesterRecord::from(attestation.clone());
-        self.queue
-            .lock()
-            .attestations
-            .push(Arc::new((attestation, attester_record)));
+        let indexed_record = IndexedAttesterRecord::new(attestation, attester_record);
+        self.queue.lock().push(indexed_record);
    }

-    pub fn dequeue(&self) -> AttestationBatch<E> {
+    pub fn dequeue(&self) -> SimpleBatch<E> {
        std::mem::take(&mut self.queue.lock())
    }

-    pub fn requeue(&self, batch: AttestationBatch<E>) {
-        self.queue.lock().attestations.extend(batch.attestations);
+    pub fn requeue(&self, batch: SimpleBatch<E>) {
+        self.queue.lock().extend(batch);
    }

    pub fn len(&self) -> usize {
--- a/slasher/src/attester_record.rs
+++ b/slasher/src/attester_record.rs
@@ -1,4 +1,5 @@
 use ssz_derive::{Decode, Encode};
+use std::sync::Arc;
 use tree_hash::TreeHash as _;
 use tree_hash_derive::TreeHash;
 use types::{AggregateSignature, EthSpec, Hash256, IndexedAttestation, VariableList};
@@ -11,6 +12,21 @@ pub struct AttesterRecord {
    pub indexed_attestation_hash: Hash256,
 }

+/// Bundling of an `IndexedAttestation` with an `AttesterRecord`.
+///
+/// This struct gets `Arc`d and passed around between each stage of queueing and processing.
+#[derive(Debug)]
+pub struct IndexedAttesterRecord<E: EthSpec> {
+    pub indexed: IndexedAttestation<E>,
+    pub record: AttesterRecord,
+}
+
+impl<E: EthSpec> IndexedAttesterRecord<E> {
+    pub fn new(indexed: IndexedAttestation<E>, record: AttesterRecord) -> Arc<Self> {
+        Arc::new(IndexedAttesterRecord { indexed, record })
+    }
+}
+
 #[derive(Debug, Clone, Encode, Decode, TreeHash)]
 struct IndexedAttestationHeader<T: EthSpec> {
    pub attesting_indices: VariableList<u64, T::MaxValidatorsPerCommittee>,
--- a/slasher/src/config.rs
+++ b/slasher/src/config.rs
@@ -7,6 +7,7 @@ pub const DEFAULT_CHUNK_SIZE: usize = 16;
 pub const DEFAULT_VALIDATOR_CHUNK_SIZE: usize = 256;
 pub const DEFAULT_HISTORY_LENGTH: usize = 4096;
 pub const DEFAULT_UPDATE_PERIOD: u64 = 12;
+pub const DEFAULT_SLOT_OFFSET: f64 = 10.5;
 pub const DEFAULT_MAX_DB_SIZE: usize = 256 * 1024; // 256 GiB
 pub const DEFAULT_BROADCAST: bool = false;

@@ -26,12 +27,19 @@ pub struct Config {
    pub history_length: usize,
    /// Update frequency in seconds.
    pub update_period: u64,
+    /// Offset from the start of the slot to begin processing.
+    #[serde(skip, default = "default_slot_offset")]
+    pub slot_offset: f64,
    /// Maximum size of the LMDB database in megabytes.
    pub max_db_size_mbs: usize,
    /// Whether to broadcast slashings found to the network.
    pub broadcast: bool,
 }

+fn default_slot_offset() -> f64 {
+    DEFAULT_SLOT_OFFSET
+}
+
 impl Config {
    pub fn new(database_path: PathBuf) -> Self {
        Self {
@@ -40,6 +48,7 @@ impl Config {
            validator_chunk_size: DEFAULT_VALIDATOR_CHUNK_SIZE,
            history_length: DEFAULT_HISTORY_LENGTH,
            update_period: DEFAULT_UPDATE_PERIOD,
+            slot_offset: DEFAULT_SLOT_OFFSET,
            max_db_size_mbs: DEFAULT_MAX_DB_SIZE,
            broadcast: DEFAULT_BROADCAST,
        }
--- a/slasher/src/lib.rs
+++ b/slasher/src/lib.rs
@@ -15,8 +15,8 @@ pub mod test_utils;
 mod utils;

 pub use crate::slasher::Slasher;
-pub use attestation_queue::{AttestationBatch, AttestationQueue};
-pub use attester_record::AttesterRecord;
+pub use attestation_queue::{AttestationBatch, AttestationQueue, SimpleBatch};
+pub use attester_record::{AttesterRecord, IndexedAttesterRecord};
 pub use block_queue::BlockQueue;
 pub use config::Config;
 pub use database::SlasherDB;
--- a/slasher/src/metrics.rs
+++ b/slasher/src/metrics.rs
@@ -22,6 +22,11 @@ lazy_static! {
        "slasher_num_attestations_valid",
        "Number of valid attestations per batch"
    );
+    pub static ref SLASHER_NUM_ATTESTATIONS_STORED_PER_BATCH: Result<IntGauge> =
+        try_create_int_gauge(
+            "slasher_num_attestations_stored_per_batch",
+            "Number of attestations stored per batch"
+        );
    pub static ref SLASHER_NUM_BLOCKS_PROCESSED: Result<IntGauge> = try_create_int_gauge(
        "slasher_num_blocks_processed",
        "Number of blocks processed per batch",
--- a/slasher/src/migrate.rs
+++ b/slasher/src/migrate.rs
@@ -1,4 +1,8 @@
-use crate::{database::CURRENT_SCHEMA_VERSION, Config, Error, SlasherDB};
+use crate::{
+    config::{DEFAULT_BROADCAST, DEFAULT_SLOT_OFFSET},
+    database::CURRENT_SCHEMA_VERSION,
+    Config, Error, SlasherDB,
+};
 use lmdb::RwTransaction;
 use serde_derive::{Deserialize, Serialize};
 use std::path::PathBuf;
@@ -25,8 +29,9 @@ impl Into<ConfigV2> for ConfigV1 {
            validator_chunk_size: self.validator_chunk_size,
            history_length: self.history_length,
            update_period: self.update_period,
+            slot_offset: DEFAULT_SLOT_OFFSET,
            max_db_size_mbs: self.max_db_size_mbs,
-            broadcast: false,
+            broadcast: DEFAULT_BROADCAST,
        }
    }
 }
--- a/slasher/src/slasher.rs
+++ b/slasher/src/slasher.rs
@@ -1,11 +1,12 @@
 use crate::batch_stats::{AttestationStats, BatchStats, BlockStats};
 use crate::metrics::{
    self, SLASHER_NUM_ATTESTATIONS_DEFERRED, SLASHER_NUM_ATTESTATIONS_DROPPED,
-    SLASHER_NUM_ATTESTATIONS_VALID, SLASHER_NUM_BLOCKS_PROCESSED,
+    SLASHER_NUM_ATTESTATIONS_STORED_PER_BATCH, SLASHER_NUM_ATTESTATIONS_VALID,
+    SLASHER_NUM_BLOCKS_PROCESSED,
 };
 use crate::{
    array, AttestationBatch, AttestationQueue, AttesterRecord, BlockQueue, Config, Error,
-    ProposerSlashingStatus, SlasherDB,
+    ProposerSlashingStatus, SimpleBatch, SlasherDB,
 };
 use lmdb::{RwTransaction, Transaction};
 use parking_lot::Mutex;
@@ -132,33 +133,56 @@ impl<E: EthSpec> Slasher<E> {

        // Filter attestations for relevance.
        let (snapshot, deferred, num_dropped) = self.validate(snapshot, current_epoch);
+        let num_valid = snapshot.len();
        let num_deferred = deferred.len();
        self.attestation_queue.requeue(deferred);

-        // Insert attestations into database.
        debug!(
            self.log,
-            "Storing attestations in slasher DB";
-            "num_valid" => snapshot.len(),
+            "Pre-processing attestations for slasher";
+            "num_valid" => num_valid,
            "num_deferred" => num_deferred,
            "num_dropped" => num_dropped,
        );
-        metrics::set_gauge(&SLASHER_NUM_ATTESTATIONS_VALID, snapshot.len() as i64);
+        metrics::set_gauge(&SLASHER_NUM_ATTESTATIONS_VALID, num_valid as i64);
        metrics::set_gauge(&SLASHER_NUM_ATTESTATIONS_DEFERRED, num_deferred as i64);
        metrics::set_gauge(&SLASHER_NUM_ATTESTATIONS_DROPPED, num_dropped as i64);

-        for attestation in snapshot.attestations.iter() {
-            self.db.store_indexed_attestation(
-                txn,
-                attestation.1.indexed_attestation_hash,
-                &attestation.0,
-            )?;
+        // De-duplicate attestations and sort by validator index.
+        let mut batch = AttestationBatch::default();
+
+        for indexed_record in snapshot {
+            batch.queue(indexed_record);
        }

-        // Group attestations into batches and process them.
-        let grouped_attestations = snapshot.group_by_validator_index(&self.config);
+        // Insert relevant attestations into database.
+        let mut num_stored = 0;
+        for weak_record in &batch.attestations {
+            if let Some(indexed_record) = weak_record.upgrade() {
+                self.db.store_indexed_attestation(
+                    txn,
+                    indexed_record.record.indexed_attestation_hash,
+                    &indexed_record.indexed,
+                )?;
+                num_stored += 1;
+            }
+        }
+
+        debug!(
+            self.log,
+            "Stored attestations in slasher DB";
+            "num_stored" => num_stored,
+            "num_valid" => num_valid,
+        );
+        metrics::set_gauge(
+            &SLASHER_NUM_ATTESTATIONS_STORED_PER_BATCH,
+            num_stored as i64,
+        );
+
+        // Group attestations into chunked batches and process them.
+        let grouped_attestations = batch.group_by_validator_chunk_index(&self.config);
        for (subqueue_id, subqueue) in grouped_attestations.subqueues.into_iter().enumerate() {
-            self.process_batch(txn, subqueue_id, subqueue.attestations, current_epoch)?;
+            self.process_batch(txn, subqueue_id, subqueue, current_epoch)?;
        }
        Ok(AttestationStats { num_processed })
    }
@@ -168,12 +192,17 @@ impl<E: EthSpec> Slasher<E> {
        &self,
        txn: &mut RwTransaction<'_>,
        subqueue_id: usize,
-        batch: Vec<Arc<(IndexedAttestation<E>, AttesterRecord)>>,
+        batch: SimpleBatch<E>,
        current_epoch: Epoch,
    ) -> Result<(), Error> {
        // First, check for double votes.
        for attestation in &batch {
-            match self.check_double_votes(txn, subqueue_id, &attestation.0, attestation.1) {
+            match self.check_double_votes(
+                txn,
+                subqueue_id,
+                &attestation.indexed,
+                attestation.record,
+            ) {
                Ok(slashings) => {
                    if !slashings.is_empty() {
                        info!(
@@ -270,15 +299,15 @@ impl<E: EthSpec> Slasher<E> {
    /// Returns `(valid, deferred, num_dropped)`.
    fn validate(
        &self,
-        batch: AttestationBatch<E>,
+        batch: SimpleBatch<E>,
        current_epoch: Epoch,
-    ) -> (AttestationBatch<E>, AttestationBatch<E>, usize) {
+    ) -> (SimpleBatch<E>, SimpleBatch<E>, usize) {
        let mut keep = Vec::with_capacity(batch.len());
        let mut defer = vec![];
        let mut drop_count = 0;

-        for tuple in batch.attestations.into_iter() {
-            let attestation = &tuple.0;
+        for indexed_record in batch {
+            let attestation = &indexed_record.indexed;
            let target_epoch = attestation.data.target.epoch;
            let source_epoch = attestation.data.source.epoch;

@@ -292,20 +321,14 @@ impl<E: EthSpec> Slasher<E> {
            // Check that the attestation's target epoch is acceptable, and defer it
            // if it's not.
            if target_epoch > current_epoch {
-                defer.push(tuple);
+                defer.push(indexed_record);
            } else {
                // Otherwise the attestation is OK to process.
-                keep.push(tuple);
+                keep.push(indexed_record);
            }
        }

-        (
-            AttestationBatch { attestations: keep },
-            AttestationBatch {
-                attestations: defer,
-            },
-            drop_count,
-        )
+        (keep, defer, drop_count)
    }

    /// Prune unnecessary attestations and blocks from the on-disk database.