Use OS file locks in validator client (#1958)

## Issue Addressed Closes #1823 ## Proposed Changes * Use OS-level file locking for validator keystores, eliminating problems with lockfiles lingering after ungraceful shutdowns (`SIGKILL`, power outage). I'm using the `fs2` crate because it's cross-platform (unlike `file-lock`), and it seems to have the most downloads on crates.io. * Deprecate + disable `--delete-lockfiles` CLI param, it's no longer necessary * Delete the `validator_dir::Manager`, as it was mostly dead code and was only used in the `validator list` command, which has been rewritten to read the validator definitions YAML instead. ## Additional Info Tested on: - [x] Linux - [x] macOS - [x] Docker Linux - [x] Docker macOS - [ ] Windows
2026-04-17 12:58:31 +00:00 · 2020-11-26 11:25:46 +00:00
parent fc07cc3fdf
commit 3486d6a809
21 changed files with 282 additions and 411 deletions
--- a/common/eth2_wallet_manager/Cargo.toml
+++ b/common/eth2_wallet_manager/Cargo.toml
@@ -9,6 +9,7 @@ edition = "2018"
 [dependencies]
 eth2_keystore = { path = "../../crypto/eth2_keystore" }
 eth2_wallet = { path = "../../crypto/eth2_wallet" }
+lockfile = { path = "../lockfile" }

 [dev-dependencies]
 tempfile = "3.1.0"
--- a/common/eth2_wallet_manager/src/locked_wallet.rs
+++ b/common/eth2_wallet_manager/src/locked_wallet.rs
@@ -3,7 +3,7 @@ use crate::{
    Error,
 };
 use eth2_wallet::{Uuid, ValidatorKeystores, Wallet};
-use std::fs::{remove_file, OpenOptions};
+use lockfile::Lockfile;
 use std::path::{Path, PathBuf};

 pub const LOCK_FILE: &str = ".lock";
@@ -26,6 +26,7 @@ pub const LOCK_FILE: &str = ".lock";
 pub struct LockedWallet {
    wallet_dir: PathBuf,
    wallet: Wallet,
+    _lockfile: Lockfile,
 }

 impl LockedWallet {
@@ -49,20 +50,12 @@ impl LockedWallet {
            return Err(Error::MissingWalletDir(wallet_dir));
        }

-        let lockfile = wallet_dir.join(LOCK_FILE);
-        if lockfile.exists() {
-            return Err(Error::WalletIsLocked(wallet_dir));
-        } else {
-            OpenOptions::new()
-                .write(true)
-                .create_new(true)
-                .open(lockfile)
-                .map_err(Error::UnableToCreateLockfile)?;
-        }
+        let _lockfile = Lockfile::new(wallet_dir.join(LOCK_FILE))?;

        Ok(Self {
            wallet: read(&wallet_dir, uuid)?,
            wallet_dir,
+            _lockfile,
        })
    }

@@ -99,13 +92,3 @@ impl LockedWallet {
        Ok(keystores)
    }
 }
-
-impl Drop for LockedWallet {
-    /// Clean-up the lockfile.
-    fn drop(&mut self) {
-        let lockfile = self.wallet_dir.clone().join(LOCK_FILE);
-        if let Err(e) = remove_file(&lockfile) {
-            eprintln!("Unable to remove {:?}: {:?}", lockfile, e);
-        }
-    }
-}
--- a/common/eth2_wallet_manager/src/wallet_manager.rs
+++ b/common/eth2_wallet_manager/src/wallet_manager.rs
@@ -3,6 +3,7 @@ use crate::{
    LockedWallet,
 };
 use eth2_wallet::{bip39::Mnemonic, Error as WalletError, Uuid, Wallet, WalletBuilder};
+use lockfile::LockfileError;
 use std::collections::HashMap;
 use std::ffi::OsString;
 use std::fs::{create_dir_all, read_dir, OpenOptions};
@@ -21,10 +22,9 @@ pub enum Error {
    WalletNameUnknown(String),
    WalletDirExists(PathBuf),
    IoError(io::Error),
-    WalletIsLocked(PathBuf),
    MissingWalletDir(PathBuf),
-    UnableToCreateLockfile(io::Error),
    UuidMismatch((Uuid, Uuid)),
+    LockfileError(LockfileError),
 }

 impl From<io::Error> for Error {
@@ -45,6 +45,12 @@ impl From<FilesystemError> for Error {
    }
 }

+impl From<LockfileError> for Error {
+    fn from(e: LockfileError) -> Error {
+        Error::LockfileError(e)
+    }
+}
+
 /// Defines the type of an EIP-2386 wallet.
 ///
 /// Presently only `Hd` wallets are supported.
@@ -358,7 +364,7 @@ mod tests {
        );

        match LockedWallet::open(&base_dir, &uuid_a) {
-            Err(Error::WalletIsLocked(_)) => {}
+            Err(Error::LockfileError(_)) => {}
            _ => panic!("did not get locked error"),
        };

@@ -368,7 +374,7 @@ mod tests {
            .expect("should open wallet a after previous instance is dropped");

        match LockedWallet::open(&base_dir, &uuid_b) {
-            Err(Error::WalletIsLocked(_)) => {}
+            Err(Error::LockfileError(_)) => {}
            _ => panic!("did not get locked error"),
        };

--- a/common/lockfile/Cargo.toml
+++ b/common/lockfile/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "lockfile"
+version = "0.1.0"
+authors = ["Michael Sproul <michael@sigmaprime.io>"]
+edition = "2018"
+
+[dependencies]
+fs2 = "0.4.3"
+
+[dev-dependencies]
+tempdir = "0.3.7"
--- a/common/lockfile/src/lib.rs
+++ b/common/lockfile/src/lib.rs
@@ -0,0 +1,133 @@
+use fs2::FileExt;
+use std::fs::{self, File, OpenOptions};
+use std::io::{self, ErrorKind};
+use std::path::{Path, PathBuf};
+
+/// Cross-platform file lock that auto-deletes on drop.
+///
+/// This lockfile uses OS locking primitives (`flock` on Unix, `LockFile` on Windows), and will
+/// only fail if locked by another process. I.e. if the file being locked already exists but isn't
+/// locked, then it can still be locked. This is relevant if an ungraceful shutdown (SIGKILL, power
+/// outage) caused the lockfile not to be deleted.
+#[derive(Debug)]
+pub struct Lockfile {
+    file: File,
+    path: PathBuf,
+    file_existed: bool,
+}
+
+#[derive(Debug)]
+pub enum LockfileError {
+    FileLocked(PathBuf, io::Error),
+    IoError(PathBuf, io::Error),
+    UnableToOpenFile(PathBuf, io::Error),
+}
+
+impl Lockfile {
+    /// Obtain an exclusive lock on the file at `path`, creating it if it doesn't exist.
+    pub fn new(path: PathBuf) -> Result<Self, LockfileError> {
+        let file_existed = path.exists();
+        let file = if file_existed {
+            File::open(&path)
+        } else {
+            OpenOptions::new()
+                .read(true)
+                .write(true)
+                .create_new(true)
+                .open(&path)
+        }
+        .map_err(|e| LockfileError::UnableToOpenFile(path.clone(), e))?;
+
+        file.try_lock_exclusive().map_err(|e| match e.kind() {
+            ErrorKind::WouldBlock => LockfileError::FileLocked(path.clone(), e),
+            _ => LockfileError::IoError(path.clone(), e),
+        })?;
+        Ok(Self {
+            file,
+            path,
+            file_existed,
+        })
+    }
+
+    /// Return `true` if the lockfile existed when the lock was created.
+    ///
+    /// This could indicate another process that isn't aware of the OS lock using the file,
+    /// or an ungraceful shutdown that caused the file not to be deleted.
+    pub fn file_existed(&self) -> bool {
+        self.file_existed
+    }
+
+    /// The path of the lockfile.
+    pub fn path(&self) -> &Path {
+        &self.path
+    }
+}
+
+impl Drop for Lockfile {
+    fn drop(&mut self) {
+        let _ = fs::remove_file(&self.path);
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use tempdir::TempDir;
+
+    #[cfg(unix)]
+    use std::{fs::Permissions, os::unix::fs::PermissionsExt};
+
+    #[test]
+    fn new_lock() {
+        let temp = TempDir::new("lock_test").unwrap();
+        let path = temp.path().join("lockfile");
+
+        let _lock = Lockfile::new(path.clone()).unwrap();
+        assert!(matches!(
+            Lockfile::new(path).unwrap_err(),
+            LockfileError::FileLocked(..)
+        ));
+    }
+
+    #[test]
+    fn relock_after_drop() {
+        let temp = TempDir::new("lock_test").unwrap();
+        let path = temp.path().join("lockfile");
+
+        let lock1 = Lockfile::new(path.clone()).unwrap();
+        drop(lock1);
+        let lock2 = Lockfile::new(path.clone()).unwrap();
+        assert!(!lock2.file_existed());
+        drop(lock2);
+
+        assert!(!path.exists());
+    }
+
+    #[test]
+    fn lockfile_exists() {
+        let temp = TempDir::new("lock_test").unwrap();
+        let path = temp.path().join("lockfile");
+
+        let _lockfile = File::create(&path).unwrap();
+
+        let lock = Lockfile::new(path.clone()).unwrap();
+        assert!(lock.file_existed());
+    }
+
+    #[test]
+    #[cfg(unix)]
+    fn permission_denied_create() {
+        let temp = TempDir::new("lock_test").unwrap();
+        let path = temp.path().join("lockfile");
+
+        let lockfile = File::create(&path).unwrap();
+        lockfile
+            .set_permissions(Permissions::from_mode(0o000))
+            .unwrap();
+
+        assert!(matches!(
+            Lockfile::new(path).unwrap_err(),
+            LockfileError::UnableToOpenFile(..)
+        ));
+    }
+}
--- a/common/validator_dir/Cargo.toml
+++ b/common/validator_dir/Cargo.toml
@@ -19,6 +19,8 @@ rayon = "1.4.1"
 tree_hash = "0.1.1"
 slog = { version = "2.5.2", features = ["max_level_trace", "release_max_level_trace"] }
 hex = "0.4.2"
+derivative = "2.1.1"
+lockfile = { path = "../lockfile" }

 [dev-dependencies]
 tempfile = "3.1.0"
--- a/common/validator_dir/src/lib.rs
+++ b/common/validator_dir/src/lib.rs
@@ -2,14 +2,12 @@
 //!
 //! - `ValidatorDir`: manages a directory containing validator keypairs, deposit info and other
 //! things.
-//! - `Manager`: manages a directory that contains multiple `ValidatorDir`.
 //!
 //! This crate is intended to be used by the account manager to create validators and the validator
 //! client to load those validators.

 mod builder;
 pub mod insecure_keys;
-mod manager;
 mod validator_dir;

 pub use crate::validator_dir::{
@@ -20,4 +18,3 @@ pub use builder::{
    Builder, Error as BuilderError, ETH1_DEPOSIT_DATA_FILE, VOTING_KEYSTORE_FILE,
    WITHDRAWAL_KEYSTORE_FILE,
 };
-pub use manager::{Error as ManagerError, Manager};
--- a/common/validator_dir/src/manager.rs
+++ b/common/validator_dir/src/manager.rs
@@ -1,175 +0,0 @@
-use crate::{Error as ValidatorDirError, ValidatorDir};
-use bls::Keypair;
-use rayon::prelude::*;
-use slog::{info, warn, Logger};
-use std::collections::BTreeMap;
-use std::fs::read_dir;
-use std::io;
-use std::iter::FromIterator;
-use std::path::{Path, PathBuf};
-
-#[derive(Debug)]
-pub enum Error {
-    DirectoryDoesNotExist(PathBuf),
-    UnableToReadBaseDir(io::Error),
-    UnableToReadFile(io::Error),
-    ValidatorDirError(ValidatorDirError),
-}
-
-/// Manages a directory containing multiple `ValidatorDir` directories.
-///
-/// ## Example
-///
-/// ```ignore
-/// validators
-/// └── 0x91494d3ac4c078049f37aa46934ba8cdf5a9cca6e1b9a9e12403d69d8a2c43a25a7f576df2a5a3d7cb3f45e6aa5e2812
-///     ├── eth1_deposit_data.rlp
-///     ├── deposit-tx-hash.txt
-///     ├── voting-keystore.json
-///     └── withdrawal-keystore.json
-/// ```
-pub struct Manager {
-    dir: PathBuf,
-}
-
-impl Manager {
-    /// Open a directory containing multiple validators.
-    ///
-    /// Pass the `validators` director as `dir` (see struct-level example).
-    pub fn open<P: AsRef<Path>>(dir: P) -> Result<Self, Error> {
-        let dir: PathBuf = dir.as_ref().into();
-
-        if dir.exists() {
-            Ok(Self { dir })
-        } else {
-            Err(Error::DirectoryDoesNotExist(dir))
-        }
-    }
-
-    /// Iterate the nodes in `self.dir`, filtering out things that are unlikely to be a validator
-    /// directory.
-    fn iter_dir(&self) -> Result<Vec<PathBuf>, Error> {
-        read_dir(&self.dir)
-            .map_err(Error::UnableToReadBaseDir)?
-            .map(|file_res| file_res.map(|f| f.path()))
-            // We use `map_or` with `true` here to ensure that we always fail if there is any
-            // error.
-            .filter(|path_res| path_res.as_ref().map_or(true, |p| p.is_dir()))
-            .map(|res| res.map_err(Error::UnableToReadFile))
-            .collect()
-    }
-
-    /// Open a `ValidatorDir` at the given `path`.
-    ///
-    /// ## Note
-    ///
-    /// It is not enforced that `path` is contained in `self.dir`.
-    pub fn open_validator<P: AsRef<Path>>(&self, path: P) -> Result<ValidatorDir, Error> {
-        ValidatorDir::open(path).map_err(Error::ValidatorDirError)
-    }
-
-    /// Opens all the validator directories in `self`.
-    ///
-    /// ## Errors
-    ///
-    /// Returns an error if any of the directories is unable to be opened, perhaps due to a
-    /// file-system error or directory with an active lockfile.
-    pub fn open_all_validators(&self) -> Result<Vec<ValidatorDir>, Error> {
-        self.iter_dir()?
-            .into_iter()
-            .map(|path| ValidatorDir::open(path).map_err(Error::ValidatorDirError))
-            .collect()
-    }
-
-    /// Opens all the validator directories in `self` and decrypts the validator keypairs,
-    /// regardless if a lockfile exists or not.
-    ///
-    /// If `log.is_some()`, an `info` log will be generated for each decrypted validator.
-    /// Additionally, a warning log will be created if a lockfile existed already.
-    ///
-    /// ## Errors
-    ///
-    /// Returns an error if any of the directories is unable to be opened.
-    pub fn force_decrypt_all_validators(
-        &self,
-        secrets_dir: PathBuf,
-        log_opt: Option<&Logger>,
-    ) -> Result<Vec<(Keypair, ValidatorDir)>, Error> {
-        self.iter_dir()?
-            .into_par_iter()
-            .map(|path| {
-                ValidatorDir::force_open(path)
-                    .and_then(|(v, existed)| {
-                        v.voting_keypair(&secrets_dir).map(|kp| (kp, v, existed))
-                    })
-                    .map(|(kp, v, lockfile_existed)| {
-                        if let Some(log) = log_opt {
-                            info!(
-                                log,
-                                "Decrypted validator keystore";
-                                "voting_pubkey" => kp.pk.to_hex_string()
-                            );
-                            if lockfile_existed {
-                                warn!(
-                                    log,
-                                    "Lockfile already existed";
-                                    "msg" => "ensure no other validator client is running on this host",
-                                    "voting_pubkey" => kp.pk.to_hex_string()
-                                );
-                            }
-                        }
-                        (kp, v)
-                    })
-                    .map_err(Error::ValidatorDirError)
-            })
-            .collect()
-    }
-
-    /// Opens all the validator directories in `self` and decrypts the validator keypairs.
-    ///
-    /// If `log.is_some()`, an `info` log will be generated for each decrypted validator.
-    ///
-    /// ## Errors
-    ///
-    /// Returns an error if any of the directories is unable to be opened.
-    pub fn decrypt_all_validators(
-        &self,
-        secrets_dir: PathBuf,
-        log_opt: Option<&Logger>,
-    ) -> Result<Vec<(Keypair, ValidatorDir)>, Error> {
-        self.iter_dir()?
-            .into_par_iter()
-            .map(|path| {
-                ValidatorDir::open(path)
-                    .and_then(|v| v.voting_keypair(&secrets_dir).map(|kp| (kp, v)))
-                    .map(|(kp, v)| {
-                        if let Some(log) = log_opt {
-                            info!(
-                                log,
-                                "Decrypted validator keystore";
-                                "voting_pubkey" => kp.pk.to_hex_string()
-                            )
-                        }
-                        (kp, v)
-                    })
-                    .map_err(Error::ValidatorDirError)
-            })
-            .collect()
-    }
-
-    /// Returns a map of directory name to full directory path. E.g., `myval -> /home/vals/myval`.
-    /// Filters out nodes in `self.dir` that are unlikely to be a validator directory.
-    ///
-    /// ## Errors
-    ///
-    /// Returns an error if a directory is unable to be read.
-    pub fn directory_names(&self) -> Result<BTreeMap<String, PathBuf>, Error> {
-        Ok(BTreeMap::from_iter(
-            self.iter_dir()?.into_iter().filter_map(|path| {
-                path.file_name()
-                    .and_then(|os_string| os_string.to_str().map(|s| s.to_string()))
-                    .map(|filename| (filename, path))
-            }),
-        ))
-    }
-}
--- a/common/validator_dir/src/validator_dir.rs
+++ b/common/validator_dir/src/validator_dir.rs
@@ -3,24 +3,22 @@ use crate::builder::{
    WITHDRAWAL_KEYSTORE_FILE,
 };
 use deposit_contract::decode_eth1_tx_data;
+use derivative::Derivative;
 use eth2_keystore::{Error as KeystoreError, Keystore, PlainText};
-use std::fs::{read, remove_file, write, OpenOptions};
+use lockfile::{Lockfile, LockfileError};
+use std::fs::{read, write, OpenOptions};
 use std::io;
 use std::path::{Path, PathBuf};
 use tree_hash::TreeHash;
 use types::{DepositData, Hash256, Keypair};

-/// The file used for indicating if a directory is in-use by another process.
-const LOCK_FILE: &str = ".lock";
-
 /// The file used to save the Eth1 transaction hash from a deposit.
 pub const ETH1_DEPOSIT_TX_HASH_FILE: &str = "eth1-deposit-tx-hash.txt";

 #[derive(Debug)]
 pub enum Error {
    DirectoryDoesNotExist(PathBuf),
-    DirectoryLocked(PathBuf),
-    UnableToCreateLockfile(io::Error),
+    LockfileError(LockfileError),
    UnableToOpenKeystore(io::Error),
    UnableToReadKeystore(KeystoreError),
    UnableToOpenPassword(io::Error),
@@ -58,19 +56,22 @@ pub struct Eth1DepositData {

 /// Provides a wrapper around a directory containing validator information.
 ///
-/// Creates/deletes a lockfile in `self.dir` to attempt to prevent concurrent access from multiple
+/// Holds a lockfile in `self.dir` to attempt to prevent concurrent access from multiple
 /// processes.
-#[derive(Debug, PartialEq)]
+#[derive(Debug, Derivative)]
+#[derivative(PartialEq)]
 pub struct ValidatorDir {
    dir: PathBuf,
+    #[derivative(PartialEq = "ignore")]
+    lockfile: Lockfile,
 }

 impl ValidatorDir {
-    /// Open `dir`, creating a lockfile to prevent concurrent access.
+    /// Open `dir`, obtaining a lockfile to prevent concurrent access.
    ///
    /// ## Errors
    ///
-    /// If there is a filesystem error or if a lockfile already exists.
+    /// If there is a filesystem error or if the lockfile is locked by another process.
    pub fn open<P: AsRef<Path>>(dir: P) -> Result<Self, Error> {
        let dir: &Path = dir.as_ref();
        let dir: PathBuf = dir.into();
@@ -79,49 +80,12 @@ impl ValidatorDir {
            return Err(Error::DirectoryDoesNotExist(dir));
        }

-        let lockfile = dir.join(LOCK_FILE);
-        if lockfile.exists() {
-            return Err(Error::DirectoryLocked(dir));
-        } else {
-            OpenOptions::new()
-                .write(true)
-                .create_new(true)
-                .open(lockfile)
-                .map_err(Error::UnableToCreateLockfile)?;
-        }
+        // Lock the keystore file that *might* be in this directory.
+        // This is not ideal, see: https://github.com/sigp/lighthouse/issues/1978
+        let lockfile_path = dir.join(format!("{}.lock", VOTING_KEYSTORE_FILE));
+        let lockfile = Lockfile::new(lockfile_path).map_err(Error::LockfileError)?;

-        Ok(Self { dir })
-    }
-
-    /// Open `dir`, regardless or not if a lockfile exists.
-    ///
-    /// Returns `(validator_dir, lockfile_existed)`, where `lockfile_existed == true` if a lockfile
-    /// was already present before opening. Creates a lockfile if one did not already exist.
-    ///
-    /// ## Errors
-    ///
-    /// If there is a filesystem error.
-    pub fn force_open<P: AsRef<Path>>(dir: P) -> Result<(Self, bool), Error> {
-        let dir: &Path = dir.as_ref();
-        let dir: PathBuf = dir.into();
-
-        if !dir.exists() {
-            return Err(Error::DirectoryDoesNotExist(dir));
-        }
-
-        let lockfile = dir.join(LOCK_FILE);
-
-        let lockfile_exists = lockfile.exists();
-
-        if !lockfile_exists {
-            OpenOptions::new()
-                .write(true)
-                .create_new(true)
-                .open(lockfile)
-                .map_err(Error::UnableToCreateLockfile)?;
-        }
-
-        Ok((Self { dir }, lockfile_exists))
+        Ok(Self { dir, lockfile })
    }

    /// Returns the `dir` provided to `Self::open`.
@@ -238,18 +202,6 @@ impl ValidatorDir {
    }
 }

-impl Drop for ValidatorDir {
-    fn drop(&mut self) {
-        let lockfile = self.dir.clone().join(LOCK_FILE);
-        if let Err(e) = remove_file(&lockfile) {
-            eprintln!(
-                "Unable to remove validator lockfile {:?}: {:?}",
-                lockfile, e
-            );
-        }
-    }
-}
-
 /// Attempts to load and decrypt a Keypair given path to the keystore.
 pub fn unlock_keypair<P: AsRef<Path>>(
    keystore_path: &PathBuf,