Add slasher broadcast (#2079)

## Issue Addressed

Closes #2048

## Proposed Changes

* Broadcast slashings when the `--slasher-broadcast` flag is provided.
* In the process of implementing this I refactored the slasher service into its own crate so that it could access the network code without creating a circular dependency. I moved the responsibility for putting slashings into the op pool into the service as well, as it makes sense for it to handle the whole slashing lifecycle.
This commit is contained in:
Michael Sproul
2020-12-16 03:44:01 +00:00
parent 63eeb14a81
commit 0c529b8d52
18 changed files with 414 additions and 193 deletions

View File

@@ -0,0 +1,16 @@
#[derive(Debug)]
pub struct BatchStats {
pub block_stats: BlockStats,
pub attestation_stats: AttestationStats,
}
#[derive(Debug)]
pub struct BlockStats {
pub num_processed: usize,
pub num_slashings: usize,
}
#[derive(Debug)]
pub struct AttestationStats {
pub num_processed: usize,
}

View File

@@ -8,6 +8,7 @@ pub const DEFAULT_VALIDATOR_CHUNK_SIZE: usize = 256;
pub const DEFAULT_HISTORY_LENGTH: usize = 4096;
pub const DEFAULT_UPDATE_PERIOD: u64 = 12;
pub const DEFAULT_MAX_DB_SIZE: usize = 256 * 1024; // 256 GiB
pub const DEFAULT_BROADCAST: bool = false;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Config {
@@ -20,6 +21,8 @@ pub struct Config {
pub update_period: u64,
/// Maximum size of the LMDB database in megabytes.
pub max_db_size_mbs: usize,
/// Whether to broadcast slashings found to the network.
pub broadcast: bool,
}
impl Config {
@@ -31,6 +34,7 @@ impl Config {
history_length: DEFAULT_HISTORY_LENGTH,
update_period: DEFAULT_UPDATE_PERIOD,
max_db_size_mbs: DEFAULT_MAX_DB_SIZE,
broadcast: DEFAULT_BROADCAST,
}
}

View File

@@ -3,13 +3,13 @@
mod array;
mod attestation_queue;
mod attester_record;
mod batch_stats;
mod block_queue;
pub mod config;
mod database;
mod error;
mod metrics;
pub mod metrics;
mod slasher;
mod slasher_server;
pub mod test_utils;
mod utils;
@@ -20,7 +20,6 @@ pub use block_queue::BlockQueue;
pub use config::Config;
pub use database::SlasherDB;
pub use error::Error;
pub use slasher_server::SlasherServer;
use types::{AttesterSlashing, EthSpec, IndexedAttestation, ProposerSlashing};

View File

@@ -1,3 +1,4 @@
use crate::batch_stats::{AttestationStats, BatchStats, BlockStats};
use crate::metrics::{
self, SLASHER_NUM_ATTESTATIONS_DEFERRED, SLASHER_NUM_ATTESTATIONS_DROPPED,
SLASHER_NUM_ATTESTATIONS_VALID, SLASHER_NUM_BLOCKS_PROCESSED,
@@ -18,12 +19,12 @@ use types::{
#[derive(Debug)]
pub struct Slasher<E: EthSpec> {
db: SlasherDB<E>,
pub(crate) attestation_queue: AttestationQueue<E>,
pub(crate) block_queue: BlockQueue,
attestation_queue: AttestationQueue<E>,
block_queue: BlockQueue,
attester_slashings: Mutex<HashSet<AttesterSlashing<E>>>,
proposer_slashings: Mutex<HashSet<ProposerSlashing>>,
config: Arc<Config>,
pub(crate) log: Logger,
log: Logger,
}
impl<E: EthSpec> Slasher<E> {
@@ -60,6 +61,10 @@ impl<E: EthSpec> Slasher<E> {
&self.config
}
pub fn log(&self) -> &Logger {
&self.log
}
/// Accept an attestation from the network and queue it for processing.
pub fn accept_attestation(&self, attestation: IndexedAttestation<E>) {
self.attestation_queue.queue(attestation);
@@ -71,17 +76,23 @@ impl<E: EthSpec> Slasher<E> {
}
/// Apply queued blocks and attestations to the on-disk database, and detect slashings!
pub fn process_queued(&self, current_epoch: Epoch) -> Result<(), Error> {
pub fn process_queued(&self, current_epoch: Epoch) -> Result<BatchStats, Error> {
let mut txn = self.db.begin_rw_txn()?;
self.process_blocks(&mut txn)?;
self.process_attestations(current_epoch, &mut txn)?;
let block_stats = self.process_blocks(&mut txn)?;
let attestation_stats = self.process_attestations(current_epoch, &mut txn)?;
txn.commit()?;
Ok(())
Ok(BatchStats {
block_stats,
attestation_stats,
})
}
/// Apply queued blocks to the on-disk database.
pub fn process_blocks(&self, txn: &mut RwTransaction<'_>) -> Result<(), Error> {
///
/// Return the number of blocks
pub fn process_blocks(&self, txn: &mut RwTransaction<'_>) -> Result<BlockStats, Error> {
let blocks = self.block_queue.dequeue();
let num_processed = blocks.len();
let mut slashings = vec![];
metrics::set_gauge(&SLASHER_NUM_BLOCKS_PROCESSED, blocks.len() as i64);
@@ -94,6 +105,7 @@ impl<E: EthSpec> Slasher<E> {
}
}
let num_slashings = slashings.len();
if !slashings.is_empty() {
info!(
self.log,
@@ -103,7 +115,10 @@ impl<E: EthSpec> Slasher<E> {
self.proposer_slashings.lock().extend(slashings);
}
Ok(())
Ok(BlockStats {
num_processed,
num_slashings,
})
}
/// Apply queued attestations to the on-disk database.
@@ -111,8 +126,9 @@ impl<E: EthSpec> Slasher<E> {
&self,
current_epoch: Epoch,
txn: &mut RwTransaction<'_>,
) -> Result<(), Error> {
) -> Result<AttestationStats, Error> {
let snapshot = self.attestation_queue.dequeue();
let num_processed = snapshot.len();
// Filter attestations for relevance.
let (snapshot, deferred, num_dropped) = self.validate(snapshot, current_epoch);
@@ -144,7 +160,7 @@ impl<E: EthSpec> Slasher<E> {
for (subqueue_id, subqueue) in grouped_attestations.subqueues.into_iter().enumerate() {
self.process_batch(txn, subqueue_id, subqueue.attestations, current_epoch)?;
}
Ok(())
Ok(AttestationStats { num_processed })
}
/// Process a batch of attestations for a range of validator indices.

View File

@@ -1,95 +0,0 @@
use crate::metrics::{self, SLASHER_DATABASE_SIZE, SLASHER_RUN_TIME};
use crate::Slasher;
use directory::size_of_dir;
use slog::{debug, error, info, trace};
use slot_clock::SlotClock;
use std::sync::mpsc::{sync_channel, TrySendError};
use std::sync::Arc;
use task_executor::TaskExecutor;
use tokio::stream::StreamExt;
use tokio::time::{interval_at, Duration, Instant};
use types::EthSpec;
#[derive(Debug)]
pub struct SlasherServer;
impl SlasherServer {
pub fn run<E: EthSpec, C: SlotClock + 'static>(
slasher: Arc<Slasher<E>>,
slot_clock: C,
executor: &TaskExecutor,
) {
info!(slasher.log, "Starting slasher to detect misbehaviour");
// Buffer just a single message in the channel. If the receiver is still processing, we
// don't need to burden them with more work (we can wait).
let (sender, receiver) = sync_channel(1);
let log = slasher.log.clone();
let update_period = slasher.config().update_period;
executor.spawn(
async move {
// NOTE: could align each run to some fixed point in each slot, see:
// https://github.com/sigp/lighthouse/issues/1861
let slot_clock = Arc::new(slot_clock);
let mut interval = interval_at(Instant::now(), Duration::from_secs(update_period));
while interval.next().await.is_some() {
if let Some(current_slot) = slot_clock.clone().now() {
let current_epoch = current_slot.epoch(E::slots_per_epoch());
if let Err(TrySendError::Disconnected(_)) = sender.try_send(current_epoch) {
break;
}
} else {
trace!(log, "Slasher has nothing to do: we are pre-genesis");
}
}
},
"slasher_server",
);
executor.spawn_blocking(
move || {
while let Ok(current_epoch) = receiver.recv() {
let t = Instant::now();
let num_attestations = slasher.attestation_queue.len();
let num_blocks = slasher.block_queue.len();
let batch_timer = metrics::start_timer(&SLASHER_RUN_TIME);
if let Err(e) = slasher.process_queued(current_epoch) {
error!(
slasher.log,
"Error during scheduled slasher processing";
"epoch" => current_epoch,
"error" => format!("{:?}", e)
);
}
drop(batch_timer);
// Prune the database, even in the case where batch processing failed.
// If the LMDB database is full then pruning could help to free it up.
if let Err(e) = slasher.prune_database(current_epoch) {
error!(
slasher.log,
"Error during slasher database pruning";
"epoch" => current_epoch,
"error" => format!("{:?}", e),
);
continue;
}
debug!(
slasher.log,
"Completed slasher update";
"epoch" => current_epoch,
"time_taken" => format!("{}ms", t.elapsed().as_millis()),
"num_attestations" => num_attestations,
"num_blocks" => num_blocks,
);
let database_size = size_of_dir(&slasher.config().database_path);
metrics::set_gauge(&SLASHER_DATABASE_SIZE, database_size as i64);
}
},
"slasher_server_process_queued",
);
}
}