Files
lighthouse/common/promise_cache/src/lib.rs
2024-01-11 17:13:43 +11:00

228 lines
7.4 KiB
Rust

use derivative::Derivative;
use itertools::Itertools;
use oneshot_broadcast::{oneshot, Receiver, Sender};
use slog::Logger;
use std::collections::HashMap;
use std::hash::Hash;
use std::sync::Arc;
#[derive(Debug)]
pub struct PromiseCache<K, V, P>
where
K: Hash + Eq + Clone,
P: Protect<K>,
{
cache: HashMap<K, CacheItem<V>>,
capacity: usize,
protector: P,
max_concurrent_promises: usize,
logger: Logger,
}
/// A value implementing `Protect` is capable of preventing keys of type `K` from being evicted.
///
/// It also dictates an ordering on keys which is used to prioritise evictions.
pub trait Protect<K> {
type SortKey: Ord;
fn sort_key(&self, k: &K) -> Self::SortKey;
fn protect_from_eviction(&self, k: &K) -> bool;
fn notify_eviction(&self, _k: &K, _log: &Logger) {}
}
#[derive(Derivative)]
#[derivative(Clone(bound = ""))]
pub enum CacheItem<T> {
Complete(Arc<T>),
Promise(Receiver<Arc<T>>),
}
impl<T: std::fmt::Debug> std::fmt::Debug for CacheItem<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
match self {
CacheItem::Complete(value) => value.fmt(f),
CacheItem::Promise(_) => "Promise(..)".fmt(f),
}
}
}
#[derive(Debug)]
pub enum PromiseCacheError {
Failed(oneshot_broadcast::Error),
MaxConcurrentPromises(usize),
}
pub trait ToArc<T> {
fn to_arc(&self) -> Arc<T>;
}
impl<T> CacheItem<T> {
pub fn is_promise(&self) -> bool {
matches!(self, CacheItem::Promise(_))
}
pub fn wait(self) -> Result<Arc<T>, PromiseCacheError> {
match self {
CacheItem::Complete(value) => Ok(value),
CacheItem::Promise(receiver) => receiver.recv().map_err(PromiseCacheError::Failed),
}
}
}
impl<T> ToArc<T> for Arc<T> {
fn to_arc(&self) -> Arc<T> {
self.clone()
}
}
impl<T> ToArc<T> for T
where
T: Clone,
{
fn to_arc(&self) -> Arc<T> {
Arc::new(self.clone())
}
}
impl<K, V, P> PromiseCache<K, V, P>
where
K: Hash + Eq + Clone,
P: Protect<K>,
{
pub fn new(capacity: usize, protector: P, logger: Logger) -> Self {
// Making the concurrent promises directly configurable is considered overkill for now,
// so we just derive a vaguely sensible value from the cache size.
let max_concurrent_promises = std::cmp::max(2, capacity / 8);
Self {
cache: HashMap::new(),
capacity,
protector,
max_concurrent_promises,
logger,
}
}
pub fn get(&mut self, key: &K) -> Option<CacheItem<V>> {
match self.cache.get(key) {
// The cache contained the value, return it.
item @ Some(CacheItem::Complete(_)) => item.cloned(),
// The cache contains a promise for the value. Check to see if the promise has already
// been resolved, without waiting for it.
item @ Some(CacheItem::Promise(receiver)) => match receiver.try_recv() {
// The promise has already been resolved. Replace the entry in the cache with a
// `Complete` entry and then return the value.
Ok(Some(value)) => {
let ready = CacheItem::Complete(value);
self.insert_cache_item(key.clone(), ready.clone());
Some(ready)
}
// The promise has not yet been resolved. Return the promise so the caller can await
// it.
Ok(None) => item.cloned(),
// The sender has been dropped without sending a value. There was most likely an
// error computing the value. Drop the key from the cache and return
// `None` so the caller can recompute the value.
//
// It's worth noting that this is the only place where we removed unresolved
// promises from the cache. This means unresolved promises will only be removed if
// we try to access them again. This is OK, since the promises don't consume much
// memory. We expect that *all* promises should be resolved, unless there is a
// programming or database error.
Err(oneshot_broadcast::Error::SenderDropped) => {
self.cache.remove(key);
None
}
},
// The cache does not have this value and it's not already promised to be computed.
None => None,
}
}
pub fn contains(&self, key: &K) -> bool {
self.cache.contains_key(key)
}
pub fn insert_value<C: ToArc<V>>(&mut self, key: K, value: &C) {
if self
.cache
.get(&key)
// Replace the value if it's not present or if it's a promise. A bird in the hand is
// worth two in the promise-bush!
.map_or(true, CacheItem::is_promise)
{
self.insert_cache_item(key, CacheItem::Complete(value.to_arc()));
}
}
/// Take care of resolving a promise by ensuring the value is made available:
///
/// 1. To all waiting thread that are holding a `Receiver`.
/// 2. In the cache itself for future callers.
pub fn resolve_promise<C: ToArc<V>>(&mut self, sender: Sender<Arc<V>>, key: K, value: &C) {
// Use the sender to notify all actively waiting receivers.
let arc_value = value.to_arc();
sender.send(arc_value.clone());
// Re-insert the value into the cache. The promise may have been evicted in the meantime,
// but we probably want to keep this value (which resolved recently) over other older cache
// entries.
self.insert_value(key, &arc_value);
}
/// Prunes the cache first before inserting a new item.
fn insert_cache_item(&mut self, key: K, cache_item: CacheItem<V>) {
self.prune_cache();
self.cache.insert(key, cache_item);
}
pub fn create_promise(&mut self, key: K) -> Result<Sender<Arc<V>>, PromiseCacheError> {
let num_active_promises = self.cache.values().filter(|item| item.is_promise()).count();
if num_active_promises >= self.max_concurrent_promises {
return Err(PromiseCacheError::MaxConcurrentPromises(
num_active_promises,
));
}
let (sender, receiver) = oneshot();
self.insert_cache_item(key, CacheItem::Promise(receiver));
Ok(sender)
}
fn prune_cache(&mut self) {
let target_cache_size = self.capacity.saturating_sub(1);
if let Some(prune_count) = self.cache.len().checked_sub(target_cache_size) {
let keys_to_prune = self
.cache
.keys()
.filter(|k| !self.protector.protect_from_eviction(*k))
.sorted_by_key(|k| self.protector.sort_key(k))
.take(prune_count)
.cloned()
.collect::<Vec<_>>();
for key in &keys_to_prune {
self.protector.notify_eviction(key, &self.logger);
self.cache.remove(key);
}
}
}
pub fn update_protector(&mut self, protector: P) {
self.protector = protector;
}
pub fn len(&self) -> usize {
self.cache.len()
}
pub fn is_empty(&self) -> bool {
self.cache.is_empty()
}
pub fn max_concurrent_promises(&self) -> usize {
self.max_concurrent_promises
}
}