Start heavy refactor of validator client

- Block production is working
This commit is contained in:
Paul Hauner
2019-11-22 01:22:05 +11:00
parent 06002f3f6a
commit 114067bb50
20 changed files with 1165 additions and 150 deletions

View File

@@ -1,42 +1,44 @@
mod attestation_producer;
mod block_producer;
mod block_service;
mod cli;
mod config;
mod duties;
mod duties_service;
mod error;
mod service;
mod fork_service;
mod signer;
mod validator_store;
pub mod validator_directory;
pub use cli::cli_app;
pub use config::Config;
use block_service::{BlockService, BlockServiceBuilder};
use clap::ArgMatches;
use config::{Config as ClientConfig, KeySource};
use duties_service::{DutiesService, DutiesServiceBuilder};
use environment::RuntimeContext;
use eth2_config::Eth2Config;
use exit_future::Signal;
use futures::Stream;
use fork_service::{ForkService, ForkServiceBuilder};
use futures::{Future, IntoFuture};
use lighthouse_bootstrap::Bootstrapper;
use parking_lot::RwLock;
use protos::services_grpc::ValidatorServiceClient;
use service::Service;
use slog::{error, info, warn, Logger};
use remote_beacon_node::RemoteBeaconNode;
use slog::{info, Logger};
use slot_clock::SlotClock;
use slot_clock::SystemTimeSlotClock;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::timer::Interval;
use types::{EthSpec, Keypair};
/// A fixed amount of time after a slot to perform operations. This gives the node time to complete
/// per-slot processes.
const TIME_DELAY_FROM_SLOT: Duration = Duration::from_millis(100);
use std::time::Duration;
use types::EthSpec;
use validator_store::ValidatorStore;
#[derive(Clone)]
pub struct ProductionValidatorClient<T: EthSpec> {
context: RuntimeContext<T>,
service: Arc<Service<ValidatorServiceClient, Keypair, T>>,
duties_service: DutiesService<SystemTimeSlotClock, T>,
fork_service: ForkService<SystemTimeSlotClock, T>,
block_service: BlockService<SystemTimeSlotClock, T>,
exit_signals: Arc<RwLock<Vec<Signal>>>,
}
@@ -46,97 +48,156 @@ impl<T: EthSpec> ProductionValidatorClient<T> {
pub fn new_from_cli(
mut context: RuntimeContext<T>,
matches: &ArgMatches,
) -> Result<Self, String> {
) -> impl Future<Item = Self, Error = String> {
let mut log = context.log.clone();
let (config, eth2_config) = get_configs(&matches, &mut log)
.map_err(|e| format!("Unable to initialize config: {}", e))?;
get_configs(&matches, &mut log)
.into_future()
.map_err(|e| format!("Unable to initialize config: {}", e))
.and_then(|(client_config, eth2_config)| {
// TODO: the eth2 config in the env is being completely ignored.
//
// See https://github.com/sigp/lighthouse/issues/602
context.eth2_config = eth2_config;
// TODO: the eth2 config in the env is being completely ignored.
//
// See https://github.com/sigp/lighthouse/issues/602
context.eth2_config = eth2_config;
Self::new(context, config)
Self::new(context, client_config)
})
}
/// Instantiates the validator client, _without_ starting the timers to trigger block
/// and attestation production.
pub fn new(context: RuntimeContext<T>, config: Config) -> Result<Self, String> {
pub fn new(
mut context: RuntimeContext<T>,
client_config: ClientConfig,
) -> impl Future<Item = Self, Error = String> {
let log = context.log.clone();
info!(
log,
"Starting validator client";
"datadir" => config.full_data_dir().expect("Unable to find datadir").to_str(),
"datadir" => client_config.full_data_dir().expect("Unable to find datadir").to_str(),
);
let service: Service<ValidatorServiceClient, Keypair, T> =
Service::initialize_service(config, context.eth2_config.clone(), log.clone())
.map_err(|e| e.to_string())?;
format!(
"{}:{}",
client_config.server, client_config.server_http_port
)
.parse()
.map_err(|e| format!("Unable to parse server address: {:?}", e))
.into_future()
.and_then(|http_server_addr| {
RemoteBeaconNode::new(http_server_addr)
.map_err(|e| format!("Unable to init beacon node http client: {}", e))
})
.and_then(|beacon_node| {
// TODO: add loop function to retry if node not online.
beacon_node
.http
.spec()
.get_eth2_config()
.map(|eth2_config| (beacon_node, eth2_config))
.map_err(|e| format!("Unable to read eth2 config from beacon node: {:?}", e))
})
.and_then(|(beacon_node, eth2_config)| {
beacon_node
.http
.beacon()
.get_genesis_time()
.map(|genesis_time| (beacon_node, eth2_config, genesis_time))
.map_err(|e| format!("Unable to read genesis time from beacon node: {:?}", e))
})
.and_then(move |(beacon_node, remote_eth2_config, genesis_time)| {
// Do not permit a connection to a beacon node using different spec constants.
if context.eth2_config.spec_constants != remote_eth2_config.spec_constants {
return Err(format!(
"Beacon node is using an incompatible spec. Got {}, expected {}",
remote_eth2_config.spec_constants, context.eth2_config.spec_constants
));
}
Ok(Self {
context,
service: Arc::new(service),
exit_signals: Arc::new(RwLock::new(vec![])),
// Note: here we just assume the spec variables of the remote node. This is very useful
// for testnets, but perhaps a security issue when it comes to mainnet.
//
// A damaging attack would be for a beacon node to convince the validator client of a
// different `SLOTS_PER_EPOCH` variable. This could result in slashable messages being
// produced. We are safe from this because `SLOTS_PER_EPOCH` is a type-level constant
// for Lighthouse.
context.eth2_config = remote_eth2_config;
let slot_clock = SystemTimeSlotClock::new(
context.eth2_config.spec.genesis_slot,
Duration::from_secs(genesis_time),
Duration::from_millis(context.eth2_config.spec.milliseconds_per_slot),
);
dbg!(context.eth2_config.spec.milliseconds_per_slot);
// TODO: fix expect.
let validator_store = ValidatorStore::load_from_disk(
client_config.full_data_dir().expect("Get rid of this."),
context.eth2_config.spec.clone(),
log.clone(),
)?;
info!(
log,
"Loaded validator keypair store";
"voting_validators" => validator_store.num_voting_validators()
);
let duties_service = DutiesServiceBuilder::new()
.slot_clock(slot_clock.clone())
.validator_store(validator_store.clone())
.beacon_node(beacon_node.clone())
.runtime_context(context.service_context("duties"))
.build()?;
let fork_service = ForkServiceBuilder::new()
.slot_clock(slot_clock.clone())
.beacon_node(beacon_node.clone())
.runtime_context(context.service_context("fork"))
.build()?;
let block_service = BlockServiceBuilder::new()
.duties_service(duties_service.clone())
.fork_service(fork_service.clone())
.slot_clock(slot_clock)
.validator_store(validator_store)
.beacon_node(beacon_node)
.runtime_context(context.service_context("block"))
.build()?;
Ok(Self {
context,
duties_service,
fork_service,
block_service,
exit_signals: Arc::new(RwLock::new(vec![])),
})
})
}
/// Starts the timers to trigger block and attestation production.
pub fn start_service(&self) -> Result<(), String> {
let service = self.clone().service;
let log = self.context.log.clone();
let duties_exit = self
.duties_service
.start_update_service(&self.context.eth2_config.spec)
.map_err(|e| format!("Unable to start duties service: {}", e))?;
let duration_to_next_slot = service
.slot_clock
.duration_to_next_slot()
.ok_or_else(|| "Unable to determine duration to next slot. Exiting.".to_string())?;
self.exit_signals.write().push(duties_exit);
// set up the validator work interval - start at next slot and proceed every slot
let interval = {
// Set the interval to start at the next slot, and every slot after
let slot_duration = Duration::from_millis(service.spec.milliseconds_per_slot);
//TODO: Handle checked add correctly
Interval::new(Instant::now() + duration_to_next_slot, slot_duration)
};
let fork_exit = self
.fork_service
.start_update_service(&self.context.eth2_config.spec)
.map_err(|e| format!("Unable to start fork service: {}", e))?;
if service.slot_clock.now().is_none() {
warn!(
log,
"Starting node prior to genesis";
);
}
self.exit_signals.write().push(fork_exit);
info!(
log,
"Waiting for next slot";
"seconds_to_wait" => duration_to_next_slot.as_secs()
);
let block_exit = self
.block_service
.start_update_service(&self.context.eth2_config.spec)
.map_err(|e| format!("Unable to start block service: {}", e))?;
let (exit_signal, exit_fut) = exit_future::signal();
self.exit_signals.write().push(exit_signal);
/* kick off the core service */
self.context.executor.spawn(
interval
.map_err(move |e| {
error! {
log,
"Timer thread failed";
"error" => format!("{}", e)
}
})
.and_then(move |_| if exit_fut.is_live() { Ok(()) } else { Err(()) })
.for_each(move |_| {
// wait for node to process
std::thread::sleep(TIME_DELAY_FROM_SLOT);
// if a non-fatal error occurs, proceed to the next slot.
let _ignore_error = service.per_slot_execution();
// completed a slot process
Ok(())
}),
);
self.exit_signals.write().push(block_exit);
Ok(())
}