From 0834346663302ca2eabea216b6aff0d486ce5692 Mon Sep 17 00:00:00 2001 From: John Hopper Date: Wed, 4 Feb 2026 08:15:17 -0800 Subject: [PATCH] feat: add additional cache variants; polish and update reachability cache; commutative bitmap operations --- algo/reach.go | 274 ++++++++++++++++++++----------------- algo/scc.go | 5 + cache/cache.go | 79 +++++++++++ cache/nemap.go | 62 +++++++++ cache/nemap_test.go | 38 +++++ cache/sieve.go | 206 ++++++++++++++++++++++++++++ cache/sieve_test.go | 100 ++++++++++++++ cardinality/commutative.go | 75 +++++----- go.sum | 2 - 9 files changed, 679 insertions(+), 162 deletions(-) create mode 100644 cache/cache.go create mode 100644 cache/nemap.go create mode 100644 cache/nemap_test.go create mode 100644 cache/sieve.go create mode 100644 cache/sieve_test.go diff --git a/algo/reach.go b/algo/reach.go index 9428dc3..691f42f 100644 --- a/algo/reach.go +++ b/algo/reach.go @@ -2,16 +2,21 @@ package algo import ( "context" - "sync" - "sync/atomic" "github.com/gammazero/deque" + "github.com/specterops/dawgs/cache" "github.com/specterops/dawgs/cardinality" "github.com/specterops/dawgs/container" "github.com/specterops/dawgs/graph" "github.com/specterops/dawgs/query" ) +// reachCursor tracks the DFS state for a single component while exploring the +// component graph. It stores the component identifier, the list of adjacent +// components to be visited, an index into that adjacency slice, the bitmap of +// components reachable from this component (including itself), and a pointer to +// the cursor of its parent component (used to roll up reachability when the +// DFS backtracks). type reachCursor struct { component uint64 adjacent []uint64 @@ -20,12 +25,18 @@ type reachCursor struct { ancestor *reachCursor } +// Complete merges the reach bitmap of this cursor into its ancestor’s bitmap. +// It is called when the DFS finishes processing a component, allowing the +// parent component to inherit the reachability of its child. func (s *reachCursor) Complete() { if s.ancestor != nil { s.ancestor.reach.Or(s.reach) } } +// NextAdjacent returns the next adjacent component identifier to be explored. +// The boolean indicates whether a component was returned (true) or the slice +// has been exhausted (false). The internal index is advanced on each call. func (s *reachCursor) NextAdjacent() (uint64, bool) { if s.adjacentIdx < len(s.adjacent) { next := s.adjacent[s.adjacentIdx] @@ -37,45 +48,42 @@ func (s *reachCursor) NextAdjacent() (uint64, bool) { return 0, false } +// ReachabilityCacheStats aggregates cache statistics for both inbound and +// outbound component‑reach caches. type ReachabilityCacheStats struct { Cached uint64 Hits uint64 } -type reachabilityCacheLookup struct { - ComponentReach cardinality.Duplex[uint64] - MemberReach cardinality.Duplex[uint64] -} - +// ReachabilityCache holds a component graph derived from a directed graph and +// two caches (inbound and outbound) that store the reachability set +// for each component. The caches are sized by the caller and automatically +// evict the least‑recently‑used entries when full. type ReachabilityCache struct { - Components ComponentGraph - inboundComponentReach map[uint64]cardinality.Duplex[uint64] - outboundComponentReach map[uint64]cardinality.Duplex[uint64] - inboundMemberReach map[uint64]cardinality.Duplex[uint64] - outboundMemberReach map[uint64]cardinality.Duplex[uint64] - cacheHits *atomic.Uint64 - maxCacheSize int - resolved cardinality.Duplex[uint64] - cacheLock *sync.RWMutex + components ComponentGraph + inboundComponentReach cache.Cache[uint64, cardinality.Duplex[uint64]] + outboundComponentReach cache.Cache[uint64, cardinality.Duplex[uint64]] } +// NewReachabilityCache creates a ReachabilityCache for the supplied directed +// graph. The component graph is built first, then two bounded caches are +// allocated with the given maxCacheSize (the maximum number of component +// reachability entries to retain). func NewReachabilityCache(ctx context.Context, digraph container.DirectedGraph, maxCacheSize int) *ReachabilityCache { return &ReachabilityCache{ - Components: NewComponentGraph(ctx, digraph), - inboundComponentReach: make(map[uint64]cardinality.Duplex[uint64]), - inboundMemberReach: make(map[uint64]cardinality.Duplex[uint64]), - outboundComponentReach: make(map[uint64]cardinality.Duplex[uint64]), - outboundMemberReach: make(map[uint64]cardinality.Duplex[uint64]), - cacheHits: &atomic.Uint64{}, - maxCacheSize: maxCacheSize, - resolved: cardinality.NewBitmap64(), - cacheLock: &sync.RWMutex{}, + components: NewComponentGraph(ctx, digraph), + inboundComponentReach: cache.NewSieve[uint64, cardinality.Duplex[uint64]](maxCacheSize), + outboundComponentReach: cache.NewSieve[uint64, cardinality.Duplex[uint64]](maxCacheSize), } } +// newReachCursor creates a reachCursor for a non‑root component during DFS. +// It pre‑populates the cursor’s reach bitmap with the component itself and all +// of its adjacent components (according to the supplied direction). The +// previous cursor becomes the ancestor so that reachability can be rolled up. func (s *ReachabilityCache) newReachCursor(component uint64, direction graph.Direction, previous *reachCursor) *reachCursor { var ( - adjacentComponents = container.AdjacentNodes(s.Components.Digraph(), component, direction) + adjacentComponents = container.AdjacentNodes(s.components.Digraph(), component, direction) componentReach = cardinality.NewBitmap64With(append(adjacentComponents, component)...) ) @@ -88,10 +96,14 @@ func (s *ReachabilityCache) newReachCursor(component uint64, direction graph.Dir } } +// newRootReachCursor creates a reachCursor for the root component of a DFS. +// Unlike newReachCursor, the initial reach bitmap contains only the root +// component itself (its adjacent components are stored separately for later +// traversal). func (s *ReachabilityCache) newRootReachCursor(component uint64, direction graph.Direction) *reachCursor { var ( - adjacentComponents = container.AdjacentNodes(s.Components.Digraph(), component, direction) - componentReach = cardinality.NewBitmap64With(append(adjacentComponents, component)...) + adjacentComponents = container.AdjacentNodes(s.components.Digraph(), component, direction) + componentReach = cardinality.NewBitmap64With(component) ) return &reachCursor{ @@ -102,158 +114,168 @@ func (s *ReachabilityCache) newRootReachCursor(component uint64, direction graph } } -func (s *ReachabilityCache) Stats() ReachabilityCacheStats { - s.cacheLock.RLock() - defer s.cacheLock.RUnlock() - - return ReachabilityCacheStats{ - Cached: s.resolved.Cardinality(), - Hits: s.cacheHits.Load(), - } +// Stats returns the combined cache statistics for both inbound and outbound +// component‑reach caches. +func (s *ReachabilityCache) Stats() cache.Stats { + return s.inboundComponentReach.Stats().Combined(s.outboundComponentReach.Stats()) } +// CanReach determines whether a directed path exists from startID to endID in +// the original graph, following the supplied direction (inbound or outbound). +// The method works on the component graph: if both IDs belong to known +// components, it asks the component graph whether the start component can +// reach the end component. func (s *ReachabilityCache) CanReach(startID, endID uint64, direction graph.Direction) bool { var ( - startComponent, hasStart = s.Components.ContainingComponent(startID) - endComponent, hasEnd = s.Components.ContainingComponent(endID) + startComponent, hasStart = s.components.ContainingComponent(startID) + endComponent, hasEnd = s.components.ContainingComponent(endID) ) if hasStart && hasEnd { - return s.Components.ComponentReachable(startComponent, endComponent, direction) + return s.components.ComponentReachable(startComponent, endComponent, direction) } return false } -func (s *ReachabilityCache) canCacheComponent(component uint64) bool { - s.cacheLock.RLock() - defer s.cacheLock.RUnlock() +// componentReachToMemberReachBitmap converts a bitmap of reachable component +// identifiers into a bitmap of reachable member identifiers. For each +// component in componentReach the method adds all of its member nodes to the +// returned bitmap. +func (s *ReachabilityCache) componentReachToMemberReachBitmap(componentReach cardinality.Duplex[uint64]) cardinality.Duplex[uint64] { + componentMembers := cardinality.NewBitmap64() - return !s.resolved.Contains(component) && len(s.inboundComponentReach)+len(s.outboundComponentReach) < s.maxCacheSize -} + componentReach.Each(func(reachableComponent uint64) bool { + s.components.CollectComponentMembers(reachableComponent, componentMembers) + return true + }) -func (s *ReachabilityCache) cacheComponentReach(component uint64, direction graph.Direction, reach cardinality.Duplex[uint64]) { - if !s.canCacheComponent(component) { - return - } + return componentMembers +} - // Collect the component members outside of the lock - componentMembers := cardinality.NewBitmap64() +// componentReachToMemberReachSlice converts a bitmap of reachable component +// identifiers into a slice where each element is a bitmap of the members of a +// single reachable component. This representation avoids the cost of +// materialising a single huge bitmap when the graph is very large. +func (s *ReachabilityCache) componentReachToMemberReachSlice(componentReach cardinality.Duplex[uint64]) []cardinality.Duplex[uint64] { + componentMembers := make([]cardinality.Duplex[uint64], 0, componentReach.Cardinality()) - reach.Each(func(reachableComponent uint64) bool { - s.Components.CollectComponentMembers(reachableComponent, componentMembers) + componentReach.Each(func(reachableComponent uint64) bool { + componentMembers = append(componentMembers, s.components.ComponentMembers(reachableComponent)) return true }) - // Lock the cache to save the updated component reach and component member reach - s.cacheLock.Lock() - defer s.cacheLock.Unlock() + return componentMembers +} - if s.resolved.CheckedAdd(component) { - switch direction { - case graph.DirectionInbound: - s.inboundComponentReach[component] = reach - s.inboundMemberReach[component] = componentMembers +// cacheComponentReach stores the reach bitmap for a component in the appropriate +// cache (inbound or outbound) based on the supplied direction. +func (s *ReachabilityCache) cacheComponentReach(cursor *reachCursor, direction graph.Direction) { + switch direction { + case graph.DirectionInbound: + s.inboundComponentReach.Put(cursor.component, cursor.reach) - case graph.DirectionOutbound: - s.outboundComponentReach[component] = reach - s.outboundMemberReach[component] = componentMembers - } + case graph.DirectionOutbound: + s.outboundComponentReach.Put(cursor.component, cursor.reach) } } -func (s *ReachabilityCache) cachedComponentReach(component uint64, direction graph.Direction) (reachabilityCacheLookup, bool) { - // Take the read lock to do a contains check for this component's cached reach - s.cacheLock.RLock() - defer s.cacheLock.RUnlock() - - if s.resolved.Contains(component) { - var ( - cachedComponentReach cardinality.Duplex[uint64] - cachedMemberReach cardinality.Duplex[uint64] - componentReachCached bool - ) - - switch direction { - case graph.DirectionInbound: - cachedComponentReach, componentReachCached = s.inboundComponentReach[component] - cachedMemberReach = s.inboundMemberReach[component] - - case graph.DirectionOutbound: - cachedComponentReach, componentReachCached = s.outboundComponentReach[component] - cachedMemberReach = s.outboundMemberReach[component] - } +// cachedComponentReach attempts to retrieve a component’s reach bitmap from the +// cache. The boolean indicates whether the entry was present. +func (s *ReachabilityCache) cachedComponentReach(component uint64, direction graph.Direction) (cardinality.Duplex[uint64], bool) { + var ( + entry cardinality.Duplex[uint64] + found = false + ) - if componentReachCached { - s.cacheHits.Add(1) + switch direction { + case graph.DirectionInbound: + entry, found = s.inboundComponentReach.Get(component) - return reachabilityCacheLookup{ - ComponentReach: cachedComponentReach, - MemberReach: cachedMemberReach, - }, true - } + case graph.DirectionOutbound: + entry, found = s.outboundComponentReach.Get(component) } - return reachabilityCacheLookup{}, false + return entry, found } -func (s *ReachabilityCache) componentMemberReachDFS(component uint64, direction graph.Direction) { +// componentReachDFS performs a depth‑first search over the component graph to +// compute the set of components reachable from the given start component in +// the specified direction. Results are cached so that subsequent calls for +// the same component/direction can be answered in O(1) time. +func (s *ReachabilityCache) componentReachDFS(component uint64, direction graph.Direction) cardinality.Duplex[uint64] { + if cachedReach, cached := s.cachedComponentReach(component, direction); cached { + return cachedReach + } + var ( - stack deque.Deque[*reachCursor] - visitedComponents = cardinality.NewBitmap64() + stack deque.Deque[*reachCursor] + rootCursor = s.newRootReachCursor(component, direction) ) // Mark the root component as visited and add it to the stack - visitedComponents.Add(component) - stack.PushBack(s.newRootReachCursor(component, direction)) + stack.PushBack(rootCursor) for stack.Len() > 0 { - reachCursor := stack.Back() + nextCursor := stack.Back() - if nextAdjacent, hasNext := reachCursor.NextAdjacent(); !hasNext { + if nextAdjacentComponent, hasNext := nextCursor.NextAdjacent(); !hasNext { stack.PopBack() // Complete the cursor to roll up reach cardinalities - reachCursor.Complete() + nextCursor.Complete() // Update the cache with this component's reach - s.cacheComponentReach(reachCursor.component, direction, reachCursor.reach) - } else if visitedComponents.CheckedAdd(nextAdjacent) { - if cachedReach, cached := s.cachedComponentReach(nextAdjacent, direction); cached { - visitedComponents.Or(cachedReach.ComponentReach) - - if reachCursor.reach != visitedComponents { - reachCursor.reach.Or(cachedReach.ComponentReach) - } + s.cacheComponentReach(nextCursor, direction) + } else if rootCursor.reach.CheckedAdd(nextAdjacentComponent) { + // This is a component not yet visited, check if it is cached. If it + // is cached, Or(...) its reach and if not traverse into it. + if cachedReach, cached := s.cachedComponentReach(nextAdjacentComponent, direction); cached { + nextCursor.reach.Or(cachedReach) } else { - stack.PushBack(s.newReachCursor(nextAdjacent, direction, reachCursor)) + stack.PushBack(s.newReachCursor(nextAdjacentComponent, direction, nextCursor)) } } } + + return rootCursor.reach } +// ReachSliceOfComponentContainingMember returns the reach of the component containing the given member and direction as +// a slice of membership bitmaps. These bitmaps are not combined for the purposes of maintaining performance when dealing +// with large scale digraphs. The computation cost of producing a single bitmap far exceeds the cost of iteratively +// scanning the returned slice of bitmaps. Additionally, the returned slice may be used by commutative bitmap operations. +func (s *ReachabilityCache) ReachSliceOfComponentContainingMember(member uint64, direction graph.Direction) []cardinality.Duplex[uint64] { + if rootComponent, rootInComponent := s.components.ContainingComponent(member); rootInComponent { + return s.componentReachToMemberReachSlice(s.componentReachDFS(rootComponent, direction)) + } + + return nil +} + +// ReachOfComponentContainingMember returns the reach of the component containing the given member and direction as a single +// bitwise ORed bitmap. For large scale digraphs use of this function may come at a high computational cost. If this function +// is utilized in a tight loop, consider utilizing ReachSliceOfComponentContainingMember with commutative bitmap operations. func (s *ReachabilityCache) ReachOfComponentContainingMember(member uint64, direction graph.Direction) cardinality.Duplex[uint64] { - if rootComponent, rootInComponent := s.Components.ContainingComponent(member); rootInComponent { - if cachedReach, cached := s.cachedComponentReach(rootComponent, direction); cached { - return cachedReach.MemberReach - } else { - s.componentMemberReachDFS(rootComponent, direction) - - if cachedReach, cached := s.cachedComponentReach(rootComponent, direction); cached { - return cachedReach.MemberReach - } - } + if rootComponent, rootInComponent := s.components.ContainingComponent(member); rootInComponent { + return s.componentReachToMemberReachBitmap(s.componentReachDFS(rootComponent, direction)) } return cardinality.NewBitmap64() } +// OrReach OR‑s the reachability set of the given node (in the supplied +// direction) into the provided duplex bitmap. The node itself is removed +// from the result so that only other reachable members remain. func (s *ReachabilityCache) OrReach(node uint64, direction graph.Direction, duplex cardinality.Duplex[uint64]) { // Reach bitmap will contain the member due to resolution of component reach duplex.Or(s.ReachOfComponentContainingMember(node, direction)) duplex.Remove(node) } +// XorReach XOR‑s the reachability set of the given node (in the supplied +// direction) into the provided duplex bitmap. The node itself is removed +// from the result before the XOR operation. func (s *ReachabilityCache) XorReach(node uint64, direction graph.Direction, duplex cardinality.Duplex[uint64]) { // Reach bitmap will contain the member due to resolution of component reach reachBitmap := s.ReachOfComponentContainingMember(node, direction).Clone() @@ -262,19 +284,27 @@ func (s *ReachabilityCache) XorReach(node uint64, direction graph.Direction, dup duplex.Xor(reachBitmap) } +// edgesFilteredByKinds returns a query. Criteria that selects only edges whose +// kind matches one of the supplied kinds. func edgesFilteredByKinds(kinds ...graph.Kind) graph.Criteria { return query.KindIn(query.Relationship(), kinds...) } +// FetchReachabilityCache builds a ReachabilityCache for the entire directed +// graph represented by the supplied database and criteria. The cache size is set +// to roughly 15% of the number of nodes in the graph (rounded down). func FetchReachabilityCache(ctx context.Context, db graph.Database, criteria graph.Criteria) (*ReachabilityCache, error) { if digraph, err := container.FetchDirectedGraph(ctx, db, criteria); err != nil { return nil, err } else { - // TODO: Present a more sane config here - return NewReachabilityCache(ctx, digraph, 1_700_000), nil + maxCacheCap := int(float64(digraph.NumNodes()) * .15) + return NewReachabilityCache(ctx, digraph, maxCacheCap), nil } } +// FetchFilteredReachabilityCache builds a ReachabilityCache for a graph that +// contains only edges of the supplied kinds. It is a convenience wrapper +// around FetchReachabilityCache that constructs the appropriate criteria. func FetchFilteredReachabilityCache(ctx context.Context, db graph.Database, traversalKinds ...graph.Kind) (*ReachabilityCache, error) { return FetchReachabilityCache(ctx, db, edgesFilteredByKinds(traversalKinds...)) } diff --git a/algo/scc.go b/algo/scc.go index 29ab550..2dff0f2 100644 --- a/algo/scc.go +++ b/algo/scc.go @@ -136,6 +136,7 @@ func (s ComponentGraph) HasMember(memberID uint64) bool { _, hasMember := s.memberComponentLookup[memberID] return hasMember } + func (s ComponentGraph) KnownMembers() cardinality.Duplex[uint64] { members := cardinality.NewBitmap64() @@ -155,6 +156,10 @@ func (s ComponentGraph) ContainingComponent(memberID uint64) (uint64, bool) { return component, inComponentDigraph } +func (s ComponentGraph) ComponentMembers(componentID uint64) cardinality.Duplex[uint64] { + return s.componentMembers[componentID] +} + func (s ComponentGraph) CollectComponentMembers(componentID uint64, members cardinality.Duplex[uint64]) { members.Or(s.componentMembers[componentID]) } diff --git a/cache/cache.go b/cache/cache.go new file mode 100644 index 0000000..2e45ec0 --- /dev/null +++ b/cache/cache.go @@ -0,0 +1,79 @@ +package cache + +import "sync/atomic" + +type Stats struct { + hits *atomic.Int64 + misses *atomic.Int64 + size *atomic.Int64 + Capacity int +} + +func (s Stats) Combined(other Stats) Stats { + var ( + hits = &atomic.Int64{} + misses = &atomic.Int64{} + size = &atomic.Int64{} + capacity = s.Capacity + other.Capacity + ) + + hits.Add(s.Hits()) + hits.Add(other.Hits()) + + misses.Add(s.Misses()) + misses.Add(other.Misses()) + + size.Add(s.Size()) + size.Add(other.Size()) + + return Stats{ + hits: hits, + misses: misses, + size: size, + Capacity: capacity, + } +} + +func (s Stats) Miss() { + s.misses.Add(1) +} + +func (s Stats) Misses() int64 { + return s.misses.Load() +} + +func (s Stats) Hit() { + s.hits.Add(1) +} + +func (s Stats) Hits() int64 { + return s.hits.Load() +} + +func (s Stats) Size() int64 { + return s.size.Load() +} + +func (s Stats) Put() { + s.size.Add(1) +} + +func (s Stats) Delete() { + s.size.Add(-1) +} + +func NewStats(capacity int) Stats { + return Stats{ + hits: &atomic.Int64{}, + misses: &atomic.Int64{}, + size: &atomic.Int64{}, + Capacity: capacity, + } +} + +type Cache[K comparable, V any] interface { + Put(key K, value V) + Get(key K) (V, bool) + Delete(key K) + Stats() Stats +} diff --git a/cache/nemap.go b/cache/nemap.go new file mode 100644 index 0000000..a8d3704 --- /dev/null +++ b/cache/nemap.go @@ -0,0 +1,62 @@ +package cache + +import ( + "sync" +) + +// NonExpiringMapCache +type NonExpiringMapCache[K comparable, V any] struct { + store map[K]V + stats Stats + rwLock sync.RWMutex +} + +func NewNonExpiringMapCache[K comparable, V any](capacity int) Cache[K, V] { + return &NonExpiringMapCache[K, V]{ + store: make(map[K]V, capacity), + stats: NewStats(capacity), + } +} + +func (s *NonExpiringMapCache[K, V]) Put(key K, value V) { + s.rwLock.Lock() + defer s.rwLock.Unlock() + + if _, exists := s.store[key]; exists { + s.store[key] = value + } else if int(s.stats.Size()) < s.stats.Capacity { + s.store[key] = value + s.stats.Put() + } +} + +func (s *NonExpiringMapCache[K, V]) Get(key K) (V, bool) { + s.rwLock.RLock() + defer s.rwLock.RUnlock() + + value, hasValue := s.store[key] + + if hasValue { + s.stats.Hit() + } else { + s.stats.Miss() + } + + return value, hasValue +} + +func (s *NonExpiringMapCache[K, V]) Delete(key K) { + s.rwLock.Lock() + defer s.rwLock.Unlock() + + _, exists := s.store[key] + + if exists { + delete(s.store, key) + s.stats.Delete() + } +} + +func (s *NonExpiringMapCache[K, V]) Stats() Stats { + return s.stats +} diff --git a/cache/nemap_test.go b/cache/nemap_test.go new file mode 100644 index 0000000..60a957f --- /dev/null +++ b/cache/nemap_test.go @@ -0,0 +1,38 @@ +package cache_test + +import ( + "testing" + + "github.com/specterops/dawgs/cache" + "github.com/stretchr/testify/require" +) + +func TestNonExpiringMapCache_PutGet(t *testing.T) { + type testValue struct { + id int + data string + } + + var ( + nemap = cache.NewNonExpiringMapCache[int, testValue](10) + expected = testValue{id: 1, data: "one"} + ) + + nemap.Put(1, expected) + fetched, exists := nemap.Get(1) + + require.True(t, exists) + require.Equal(t, expected, fetched) +} + +func TestNonExpiringMapCache_UpdateExistingKey(t *testing.T) { + nemap := cache.NewNonExpiringMapCache[string, int](5) + + nemap.Put("k", 10) + nemap.Put("k", 20) + + fetched, exists := nemap.Get("k") + + require.True(t, exists) + require.Equal(t, 20, fetched) +} diff --git a/cache/sieve.go b/cache/sieve.go new file mode 100644 index 0000000..21bd875 --- /dev/null +++ b/cache/sieve.go @@ -0,0 +1,206 @@ +package cache + +import ( + "container/list" + "sync" + "sync/atomic" +) + +// entry holds the key and value of a cache entry. +// +// The generic parameters K and V represent the key and value types +// respectively. An entry also tracks whether it has been visited +// during the most recent clock‑hand sweep via the atomic `visited` +// flag and stores a pointer to its position in the eviction queue. +type entry[K comparable, V any] struct { + key K + value V + visited atomic.Bool + element *list.Element +} + +// Sieve implements a clock‑hand (second‑chance) cache with a fixed +// capacity. It is safe for concurrent use by multiple goroutines. +// +// The cache maintains: +// - a map (`store`) from keys to entries for O(1) lookups, +// - a doubly‑linked list (`queue`) that orders entries from most +// recently inserted (front) to least recently inserted (back), +// - a “hand” pointer that walks the list during eviction, giving +// each entry a second chance if it has been visited since the +// previous sweep. +// +// Statistics about cache usage are collected in the embedded `stats` +// field. +type Sieve[K comparable, V any] struct { + rwLock sync.RWMutex + store map[K]*entry[K, V] + queue *list.List + hand *list.Element + stats Stats +} + +// NewSieve creates a new Sieve cache with the supplied capacity. +// +// If `capacity` is less than or equal to zero, a capacity of one is +// used to prevent function calls from panicking. The returned value +// implements the `Cache[K,V]` interface. +func NewSieve[K comparable, V any](capacity int) Cache[K, V] { + // Do not panic on an invalid capacity but ensure that the cache remains functional + if capacity <= 0 { + capacity = 1 + } + + return &Sieve[K, V]{ + store: make(map[K]*entry[K, V], capacity), + queue: list.New(), + stats: NewStats(capacity), + } +} + +// Stats returns a copy of the cache’s current statistics. +// +// The returned `Stats` value can be inspected without holding the +// cache lock; it contains counters for hits, misses, puts, and +// deletions as well as the configured capacity. +func (s *Sieve[K, V]) Stats() Stats { + return s.stats +} + +// putEntry inserts a new key/value pair into the cache without acquiring +// the lock. +// +// It first evicts an entry if the cache is already at capacity, then +// creates a fresh `entry`, pushes its key onto the front of the +// eviction queue, stores the entry in the map, and records a put in +// the statistics. +// +// This method is intended to be called only from other methods that +// already hold `s.rwLock`. +func (s *Sieve[K, V]) putEntry(key K, value V) { + // Evict first if needed + if s.queue.Len() >= int(s.stats.Capacity) { + s.evict() + } + + newEntry := &entry[K, V]{ + key: key, + value: value, + element: s.queue.PushFront(key), + } + + s.store[key] = newEntry + s.Stats().Put() +} + +// Put adds or updates the value associated with `key`. +// +// If the key already exists, its value is replaced and the entry is +// marked as visited (so it will survive the next eviction sweep). If +// the key is new, a fresh entry is inserted, possibly triggering an +// eviction when the cache is full. +// +// The operation holds an exclusive lock for the duration of the update. +func (s *Sieve[K, V]) Put(key K, value V) { + s.rwLock.Lock() + defer s.rwLock.Unlock() + + if existingEntry, exists := s.store[key]; exists { + // Update the entry values + existingEntry.value = value + existingEntry.visited.Store(true) + } else { + s.putEntry(key, value) + } +} + +// Get retrieves the value associated with `key`. +// +// If the key is present, the entry’s visited flag is set, a hit is +// recorded, and the cached value is returned with `true`. If the key +// is absent, a miss is recorded and the zero value of V is returned +// with `false`. +// +// The operation holds a read lock while accessing the map. +func (s *Sieve[K, V]) Get(key K) (V, bool) { + s.rwLock.RLock() + defer s.rwLock.RUnlock() + + if entry, exists := s.store[key]; exists { + s.stats.Hit() + + entry.visited.Store(true) + return entry.value, true + } + + s.stats.Miss() + + var emptyV V + return emptyV, false +} + +// removeEntry deletes `e` from both the eviction queue and the store map, +// and updates the statistics to reflect a deletion. +// +// This helper assumes that the caller already holds the appropriate lock. +func (s *Sieve[K, V]) removeEntry(e *entry[K, V]) { + s.queue.Remove(e.element) + delete(s.store, e.key) + + s.stats.Delete() +} + +// Delete removes the entry identified by `key` from the cache. +// +// If the entry being removed is currently pointed to by the clock hand, +// the hand is moved one step backward so that eviction can continue +// correctly. The method records a deletion in the statistics. +// +// The operation holds an exclusive lock for the duration of the removal. +func (s *Sieve[K, V]) Delete(key K) { + s.rwLock.Lock() + defer s.rwLock.Unlock() + + if entry, exists := s.store[key]; exists { + if entry.element == s.hand { + s.hand = s.hand.Prev() + } + + s.removeEntry(entry) + } +} + +// evict removes a single entry from the cache using the clock‑hand +// (second‑chance) algorithm. +// +// The hand walks backwards through the list (from most recent to +// least recent). For each visited entry the visited flag is cleared +// and the hand moves on, giving the entry a “second chance”. The +// first entry encountered whose visited flag is false is evicted. +// After eviction the hand is positioned one element before the removed +// entry so that the next eviction continues where this one left off. +// +// This method assumes the caller already holds the exclusive lock. +func (s *Sieve[K, V]) evict() { + hand := s.hand + + // if nil assign it to the tail element in the list + if hand == nil { + hand = s.queue.Back() + } + + entry := s.store[hand.Value.(K)] + + for entry.visited.Load() { + entry.visited.Store(false) + + if hand = hand.Prev(); hand == nil { + hand = s.queue.Back() + } + + entry = s.store[hand.Value.(K)] + } + + s.hand = hand.Prev() + s.removeEntry(entry) +} diff --git a/cache/sieve_test.go b/cache/sieve_test.go new file mode 100644 index 0000000..f39f01c --- /dev/null +++ b/cache/sieve_test.go @@ -0,0 +1,100 @@ +package cache_test + +import ( + "testing" + + "github.com/specterops/dawgs/cache" + "github.com/stretchr/testify/require" +) + +func TestSieve_PutGet(t *testing.T) { + type testValue struct { + id int + data string + } + + var ( + sieve = cache.NewSieve[int, testValue](10) + expected = testValue{id: 1, data: "one"} + ) + + sieve.Put(1, expected) + fetched, exists := sieve.Get(1) + + require.True(t, exists) + require.Equal(t, expected, fetched) +} + +func TestSieve_UpdateExistingKey(t *testing.T) { + sieve := cache.NewSieve[string, int](5) + + sieve.Put("k", 10) + sieve.Put("k", 20) + + fetched, exists := sieve.Get("k") + + require.True(t, exists) + require.Equal(t, 20, fetched) +} + +func TestSieve_EvictWhenFull(t *testing.T) { + var ( + sieve = cache.NewSieve[int, string](3) + assertions = map[int]string{ + 2: "two", + 3: "three", + 4: "four", + } + ) + + sieve.Put(1, "one") + sieve.Put(2, "two") + sieve.Put(3, "three") + sieve.Put(4, "four") + + _, exists := sieve.Get(1) + require.Falsef(t, exists, "first element must evict") + + for key, expected := range assertions { + fetched, exists := sieve.Get(key) + + require.True(t, exists) + require.Equal(t, expected, fetched) + } +} + +func TestSieve_EvictRespectsVisitedFlag(t *testing.T) { + var ( + sieve = cache.NewSieve[int, string](3) + assertions = map[int]string{ + 1: "one", + 3: "three", + 4: "four", + } + ) + + // 1 will become the oldest (tail) + sieve.Put(1, "one") + sieve.Put(2, "two") + + // 3 is the newest (head) + sieve.Put(3, "three") + + // Access key 1 – this marks it as visited. + _, exists := sieve.Get(1) + require.True(t, exists) + + // Adding a new entry forces eviction. + sieve.Put(4, "four") + + // Key 2 must have been evicted. + _, exists = sieve.Get(2) + require.False(t, exists, "key two must be evicted") + + for key, expected := range assertions { + fetched, exists := sieve.Get(key) + + require.True(t, exists) + require.Equal(t, expected, fetched) + } +} diff --git a/cardinality/commutative.go b/cardinality/commutative.go index cdae60a..48a5c52 100644 --- a/cardinality/commutative.go +++ b/cardinality/commutative.go @@ -1,61 +1,60 @@ package cardinality -type CommutativeDuplex64 struct { - duplexes []Duplex[uint64] -} - -func NewCommutativeDuplex64() *CommutativeDuplex64 { - return &CommutativeDuplex64{} -} - -func (s *CommutativeDuplex64) orAll() Duplex[uint64] { - tempBitmap := NewBitmap64() - - for _, nextDuplex := range s.duplexes { - tempBitmap.Or(nextDuplex) - } +type DuplexCommutation[T uint32 | uint64] []Duplex[T] - return tempBitmap +func CommutativeOr[T uint32 | uint64](duplexes ...Duplex[T]) DuplexCommutation[T] { + return duplexes } -func (s *CommutativeDuplex64) Or(duplex ...Duplex[uint64]) { - s.duplexes = append(s.duplexes, duplex...) +func (s DuplexCommutation[T]) Or(duplexes ...Duplex[T]) DuplexCommutation[T] { + return append(s, duplexes...) } -func (s *CommutativeDuplex64) OrInto(duplex Duplex[uint64]) { - for _, internalDuplex := range s.duplexes { - duplex.Or(internalDuplex) - } -} - -func (s *CommutativeDuplex64) AndInto(duplex Duplex[uint64]) { - for _, internalDuplex := range s.duplexes { - duplex.And(internalDuplex) +func (s DuplexCommutation[T]) Contains(value T) bool { + for _, duplex := range s { + if duplex.Cardinality() > 0 && duplex.Contains(value) { + return true + } } -} -func (s *CommutativeDuplex64) OrAll(other *CommutativeDuplex64) { - s.duplexes = append(s.duplexes, other.duplexes...) + return false } -func (s *CommutativeDuplex64) Cardinality() uint64 { - return s.orAll().Cardinality() +type CommutativeDuplexes[T uint32 | uint64] struct { + or []DuplexCommutation[T] + and []DuplexCommutation[T] } -func (s *CommutativeDuplex64) Slice() []uint64 { - return s.orAll().Slice() +func (s *CommutativeDuplexes[T]) Or(dc DuplexCommutation[T]) { + s.or = append(s.or, dc) } -func (s *CommutativeDuplex64) Each(delegate func(value uint64) bool) { - s.orAll().Each(delegate) +func (s *CommutativeDuplexes[T]) And(dc DuplexCommutation[T]) { + s.and = append(s.and, dc) } -func (s *CommutativeDuplex64) Contains(value uint64) bool { - for _, nextDuplex := range s.duplexes { - if nextDuplex.Contains(value) { +func (s *CommutativeDuplexes[T]) valueInOrSets(value T) bool { + // Search each bitwise or set. Only one set is required to contain the value. + for _, orSet := range s.or { + if orSet.Contains(value) { return true } } return false } + +func (s *CommutativeDuplexes[T]) valueInAndSets(value T) bool { + // Search each bitwise and set. Each and set must contain the value. + for _, andSet := range s.and { + if !andSet.Contains(value) { + return false + } + } + + return true +} + +func (s *CommutativeDuplexes[T]) Contains(value T) bool { + return s.valueInOrSets(value) && s.valueInAndSets(value) +} diff --git a/go.sum b/go.sum index 9e37a9f..e9c170f 100644 --- a/go.sum +++ b/go.sum @@ -14,7 +14,6 @@ github.com/bits-and-blooms/bitset v1.24.4 h1:95H15Og1clikBrKr/DuzMXkQzECs1M6hhoG github.com/bits-and-blooms/bitset v1.24.4/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= github.com/cockroachdb/apd/v3 v3.2.1 h1:U+8j7t0axsIgvQUqthuNm82HIrYXodOV2iWLWtEaIwg= github.com/cockroachdb/apd/v3 v3.2.1/go.mod h1:klXJcjp+FffLTHlhIG69tezTDvdP065naDsHzKhYSqc= @@ -93,7 +92,6 @@ github.com/jackc/pgx/v5 v5.8.0/go.mod h1:QVeDInX2m9VyzvNeiCJVjCkNFqzsNb43204HshN github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= -github.com/jackc/puddle v1.3.0 h1:eHK/5clGOatcjX3oWGBO/MpxpbHzSwud5EWTSCI+MX0= github.com/jackc/puddle v1.3.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=