use derivative::Derivative; use itertools::Itertools; use oneshot_broadcast::{oneshot, Receiver, Sender}; use slog::Logger; use std::collections::HashMap; use std::hash::Hash; use std::sync::Arc; #[derive(Debug)] pub struct PromiseCache where K: Hash + Eq + Clone, P: Protect, { cache: HashMap>, capacity: usize, protector: P, max_concurrent_promises: usize, logger: Logger, } /// A value implementing `Protect` is capable of preventing keys of type `K` from being evicted. /// /// It also dictates an ordering on keys which is used to prioritise evictions. pub trait Protect { type SortKey: Ord; fn sort_key(&self, k: &K) -> Self::SortKey; fn protect_from_eviction(&self, k: &K) -> bool; fn notify_eviction(&self, _k: &K, _log: &Logger) {} } #[derive(Derivative)] #[derivative(Clone(bound = ""))] pub enum CacheItem { Complete(Arc), Promise(Receiver>), } impl std::fmt::Debug for CacheItem { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { match self { CacheItem::Complete(value) => value.fmt(f), CacheItem::Promise(_) => "Promise(..)".fmt(f), } } } #[derive(Debug)] pub enum PromiseCacheError { Failed(oneshot_broadcast::Error), MaxConcurrentPromises(usize), } pub trait ToArc { fn to_arc(&self) -> Arc; } impl CacheItem { pub fn is_promise(&self) -> bool { matches!(self, CacheItem::Promise(_)) } pub fn wait(self) -> Result, PromiseCacheError> { match self { CacheItem::Complete(value) => Ok(value), CacheItem::Promise(receiver) => receiver.recv().map_err(PromiseCacheError::Failed), } } } impl ToArc for Arc { fn to_arc(&self) -> Arc { self.clone() } } impl ToArc for T where T: Clone, { fn to_arc(&self) -> Arc { Arc::new(self.clone()) } } impl PromiseCache where K: Hash + Eq + Clone, P: Protect, { pub fn new(capacity: usize, protector: P, logger: Logger) -> Self { // Making the concurrent promises directly configurable is considered overkill for now, // so we just derive a vaguely sensible value from the cache size. let max_concurrent_promises = std::cmp::max(2, capacity / 8); Self { cache: HashMap::new(), capacity, protector, max_concurrent_promises, logger, } } pub fn get(&mut self, key: &K) -> Option> { match self.cache.get(key) { // The cache contained the value, return it. item @ Some(CacheItem::Complete(_)) => item.cloned(), // The cache contains a promise for the value. Check to see if the promise has already // been resolved, without waiting for it. item @ Some(CacheItem::Promise(receiver)) => match receiver.try_recv() { // The promise has already been resolved. Replace the entry in the cache with a // `Complete` entry and then return the value. Ok(Some(value)) => { let ready = CacheItem::Complete(value); self.insert_cache_item(key.clone(), ready.clone()); Some(ready) } // The promise has not yet been resolved. Return the promise so the caller can await // it. Ok(None) => item.cloned(), // The sender has been dropped without sending a value. There was most likely an // error computing the value. Drop the key from the cache and return // `None` so the caller can recompute the value. // // It's worth noting that this is the only place where we removed unresolved // promises from the cache. This means unresolved promises will only be removed if // we try to access them again. This is OK, since the promises don't consume much // memory. We expect that *all* promises should be resolved, unless there is a // programming or database error. Err(oneshot_broadcast::Error::SenderDropped) => { self.cache.remove(key); None } }, // The cache does not have this value and it's not already promised to be computed. None => None, } } pub fn contains(&self, key: &K) -> bool { self.cache.contains_key(key) } pub fn insert_value>(&mut self, key: K, value: &C) { if self .cache .get(&key) // Replace the value if it's not present or if it's a promise. A bird in the hand is // worth two in the promise-bush! .map_or(true, CacheItem::is_promise) { self.insert_cache_item(key, CacheItem::Complete(value.to_arc())); } } /// Take care of resolving a promise by ensuring the value is made available: /// /// 1. To all waiting thread that are holding a `Receiver`. /// 2. In the cache itself for future callers. pub fn resolve_promise>(&mut self, sender: Sender>, key: K, value: &C) { // Use the sender to notify all actively waiting receivers. let arc_value = value.to_arc(); sender.send(arc_value.clone()); // Re-insert the value into the cache. The promise may have been evicted in the meantime, // but we probably want to keep this value (which resolved recently) over other older cache // entries. self.insert_value(key, &arc_value); } /// Prunes the cache first before inserting a new item. fn insert_cache_item(&mut self, key: K, cache_item: CacheItem) { self.prune_cache(); self.cache.insert(key, cache_item); } pub fn create_promise(&mut self, key: K) -> Result>, PromiseCacheError> { let num_active_promises = self.cache.values().filter(|item| item.is_promise()).count(); if num_active_promises >= self.max_concurrent_promises { return Err(PromiseCacheError::MaxConcurrentPromises( num_active_promises, )); } let (sender, receiver) = oneshot(); self.insert_cache_item(key, CacheItem::Promise(receiver)); Ok(sender) } fn prune_cache(&mut self) { let target_cache_size = self.capacity.saturating_sub(1); if let Some(prune_count) = self.cache.len().checked_sub(target_cache_size) { let keys_to_prune = self .cache .keys() .filter(|k| !self.protector.protect_from_eviction(*k)) .sorted_by_key(|k| self.protector.sort_key(k)) .take(prune_count) .cloned() .collect::>(); for key in &keys_to_prune { self.protector.notify_eviction(key, &self.logger); self.cache.remove(key); } } } pub fn update_protector(&mut self, protector: P) { self.protector = protector; } pub fn len(&self) -> usize { self.cache.len() } pub fn is_empty(&self) -> bool { self.cache.is_empty() } pub fn max_concurrent_promises(&self) -> usize { self.max_concurrent_promises } }