Start heavy refactor of validator client

- Block production is working
2026-04-26 01:03:40 +00:00 · 2019-11-22 01:22:05 +11:00
parent 06002f3f6a
commit 114067bb50
20 changed files with 1165 additions and 150 deletions
--- a/validator_client/src/lib.rs
+++ b/validator_client/src/lib.rs
@@ -1,42 +1,44 @@
-mod attestation_producer;
-mod block_producer;
+mod block_service;
 mod cli;
 mod config;
-mod duties;
+mod duties_service;
 mod error;
-mod service;
+mod fork_service;
 mod signer;
+mod validator_store;
+
 pub mod validator_directory;

 pub use cli::cli_app;
 pub use config::Config;

+use block_service::{BlockService, BlockServiceBuilder};
 use clap::ArgMatches;
 use config::{Config as ClientConfig, KeySource};
+use duties_service::{DutiesService, DutiesServiceBuilder};
 use environment::RuntimeContext;
 use eth2_config::Eth2Config;
 use exit_future::Signal;
-use futures::Stream;
+use fork_service::{ForkService, ForkServiceBuilder};
+use futures::{Future, IntoFuture};
 use lighthouse_bootstrap::Bootstrapper;
 use parking_lot::RwLock;
-use protos::services_grpc::ValidatorServiceClient;
-use service::Service;
-use slog::{error, info, warn, Logger};
+use remote_beacon_node::RemoteBeaconNode;
+use slog::{info, Logger};
 use slot_clock::SlotClock;
+use slot_clock::SystemTimeSlotClock;
 use std::path::PathBuf;
 use std::sync::Arc;
-use std::time::{Duration, Instant};
-use tokio::timer::Interval;
-use types::{EthSpec, Keypair};
-
-/// A fixed amount of time after a slot to perform operations. This gives the node time to complete
-/// per-slot processes.
-const TIME_DELAY_FROM_SLOT: Duration = Duration::from_millis(100);
+use std::time::Duration;
+use types::EthSpec;
+use validator_store::ValidatorStore;

 #[derive(Clone)]
 pub struct ProductionValidatorClient<T: EthSpec> {
    context: RuntimeContext<T>,
-    service: Arc<Service<ValidatorServiceClient, Keypair, T>>,
+    duties_service: DutiesService<SystemTimeSlotClock, T>,
+    fork_service: ForkService<SystemTimeSlotClock, T>,
+    block_service: BlockService<SystemTimeSlotClock, T>,
    exit_signals: Arc<RwLock<Vec<Signal>>>,
 }

@@ -46,97 +48,156 @@ impl<T: EthSpec> ProductionValidatorClient<T> {
    pub fn new_from_cli(
        mut context: RuntimeContext<T>,
        matches: &ArgMatches,
-    ) -> Result<Self, String> {
+    ) -> impl Future<Item = Self, Error = String> {
        let mut log = context.log.clone();

-        let (config, eth2_config) = get_configs(&matches, &mut log)
-            .map_err(|e| format!("Unable to initialize config: {}", e))?;
+        get_configs(&matches, &mut log)
+            .into_future()
+            .map_err(|e| format!("Unable to initialize config: {}", e))
+            .and_then(|(client_config, eth2_config)| {
+                // TODO: the eth2 config in the env is being completely ignored.
+                //
+                // See https://github.com/sigp/lighthouse/issues/602
+                context.eth2_config = eth2_config;

-        // TODO: the eth2 config in the env is being completely ignored.
-        //
-        // See https://github.com/sigp/lighthouse/issues/602
-        context.eth2_config = eth2_config;
-
-        Self::new(context, config)
+                Self::new(context, client_config)
+            })
    }

    /// Instantiates the validator client, _without_ starting the timers to trigger block
    /// and attestation production.
-    pub fn new(context: RuntimeContext<T>, config: Config) -> Result<Self, String> {
+    pub fn new(
+        mut context: RuntimeContext<T>,
+        client_config: ClientConfig,
+    ) -> impl Future<Item = Self, Error = String> {
        let log = context.log.clone();

        info!(
            log,
            "Starting validator client";
-            "datadir" => config.full_data_dir().expect("Unable to find datadir").to_str(),
+            "datadir" => client_config.full_data_dir().expect("Unable to find datadir").to_str(),
        );

-        let service: Service<ValidatorServiceClient, Keypair, T> =
-            Service::initialize_service(config, context.eth2_config.clone(), log.clone())
-                .map_err(|e| e.to_string())?;
+        format!(
+            "{}:{}",
+            client_config.server, client_config.server_http_port
+        )
+        .parse()
+        .map_err(|e| format!("Unable to parse server address: {:?}", e))
+        .into_future()
+        .and_then(|http_server_addr| {
+            RemoteBeaconNode::new(http_server_addr)
+                .map_err(|e| format!("Unable to init beacon node http client: {}", e))
+        })
+        .and_then(|beacon_node| {
+            // TODO: add loop function to retry if node not online.
+            beacon_node
+                .http
+                .spec()
+                .get_eth2_config()
+                .map(|eth2_config| (beacon_node, eth2_config))
+                .map_err(|e| format!("Unable to read eth2 config from beacon node: {:?}", e))
+        })
+        .and_then(|(beacon_node, eth2_config)| {
+            beacon_node
+                .http
+                .beacon()
+                .get_genesis_time()
+                .map(|genesis_time| (beacon_node, eth2_config, genesis_time))
+                .map_err(|e| format!("Unable to read genesis time from beacon node: {:?}", e))
+        })
+        .and_then(move |(beacon_node, remote_eth2_config, genesis_time)| {
+            // Do not permit a connection to a beacon node using different spec constants.
+            if context.eth2_config.spec_constants != remote_eth2_config.spec_constants {
+                return Err(format!(
+                    "Beacon node is using an incompatible spec. Got {}, expected {}",
+                    remote_eth2_config.spec_constants, context.eth2_config.spec_constants
+                ));
+            }

-        Ok(Self {
-            context,
-            service: Arc::new(service),
-            exit_signals: Arc::new(RwLock::new(vec![])),
+            // Note: here we just assume the spec variables of the remote node. This is very useful
+            // for testnets, but perhaps a security issue when it comes to mainnet.
+            //
+            // A damaging attack would be for a beacon node to convince the validator client of a
+            // different `SLOTS_PER_EPOCH` variable. This could result in slashable messages being
+            // produced. We are safe from this because `SLOTS_PER_EPOCH` is a type-level constant
+            // for Lighthouse.
+            context.eth2_config = remote_eth2_config;
+
+            let slot_clock = SystemTimeSlotClock::new(
+                context.eth2_config.spec.genesis_slot,
+                Duration::from_secs(genesis_time),
+                Duration::from_millis(context.eth2_config.spec.milliseconds_per_slot),
+            );
+
+            dbg!(context.eth2_config.spec.milliseconds_per_slot);
+
+            // TODO: fix expect.
+            let validator_store = ValidatorStore::load_from_disk(
+                client_config.full_data_dir().expect("Get rid of this."),
+                context.eth2_config.spec.clone(),
+                log.clone(),
+            )?;
+
+            info!(
+                log,
+                "Loaded validator keypair store";
+                "voting_validators" => validator_store.num_voting_validators()
+            );
+
+            let duties_service = DutiesServiceBuilder::new()
+                .slot_clock(slot_clock.clone())
+                .validator_store(validator_store.clone())
+                .beacon_node(beacon_node.clone())
+                .runtime_context(context.service_context("duties"))
+                .build()?;
+
+            let fork_service = ForkServiceBuilder::new()
+                .slot_clock(slot_clock.clone())
+                .beacon_node(beacon_node.clone())
+                .runtime_context(context.service_context("fork"))
+                .build()?;
+
+            let block_service = BlockServiceBuilder::new()
+                .duties_service(duties_service.clone())
+                .fork_service(fork_service.clone())
+                .slot_clock(slot_clock)
+                .validator_store(validator_store)
+                .beacon_node(beacon_node)
+                .runtime_context(context.service_context("block"))
+                .build()?;
+
+            Ok(Self {
+                context,
+                duties_service,
+                fork_service,
+                block_service,
+                exit_signals: Arc::new(RwLock::new(vec![])),
+            })
        })
    }

-    /// Starts the timers to trigger block and attestation production.
    pub fn start_service(&self) -> Result<(), String> {
-        let service = self.clone().service;
-        let log = self.context.log.clone();
+        let duties_exit = self
+            .duties_service
+            .start_update_service(&self.context.eth2_config.spec)
+            .map_err(|e| format!("Unable to start duties service: {}", e))?;

-        let duration_to_next_slot = service
-            .slot_clock
-            .duration_to_next_slot()
-            .ok_or_else(|| "Unable to determine duration to next slot. Exiting.".to_string())?;
+        self.exit_signals.write().push(duties_exit);

-        // set up the validator work interval - start at next slot and proceed every slot
-        let interval = {
-            // Set the interval to start at the next slot, and every slot after
-            let slot_duration = Duration::from_millis(service.spec.milliseconds_per_slot);
-            //TODO: Handle checked add correctly
-            Interval::new(Instant::now() + duration_to_next_slot, slot_duration)
-        };
+        let fork_exit = self
+            .fork_service
+            .start_update_service(&self.context.eth2_config.spec)
+            .map_err(|e| format!("Unable to start fork service: {}", e))?;

-        if service.slot_clock.now().is_none() {
-            warn!(
-                log,
-                "Starting node prior to genesis";
-            );
-        }
+        self.exit_signals.write().push(fork_exit);

-        info!(
-            log,
-            "Waiting for next slot";
-            "seconds_to_wait" => duration_to_next_slot.as_secs()
-        );
+        let block_exit = self
+            .block_service
+            .start_update_service(&self.context.eth2_config.spec)
+            .map_err(|e| format!("Unable to start block service: {}", e))?;

-        let (exit_signal, exit_fut) = exit_future::signal();
-
-        self.exit_signals.write().push(exit_signal);
-
-        /* kick off the core service */
-        self.context.executor.spawn(
-            interval
-                .map_err(move |e| {
-                    error! {
-                        log,
-                        "Timer thread failed";
-                        "error" => format!("{}", e)
-                    }
-                })
-                .and_then(move |_| if exit_fut.is_live() { Ok(()) } else { Err(()) })
-                .for_each(move |_| {
-                    // wait for node to process
-                    std::thread::sleep(TIME_DELAY_FROM_SLOT);
-                    // if a non-fatal error occurs, proceed to the next slot.
-                    let _ignore_error = service.per_slot_execution();
-                    // completed a slot process
-                    Ok(())
-                }),
-        );
+        self.exit_signals.write().push(block_exit);

        Ok(())
    }