diff --git a/src/snmalloc/backend/meta_protected_range.h b/src/snmalloc/backend/meta_protected_range.h
index 857e853d2..d7f8bfe8a 100644
--- a/src/snmalloc/backend/meta_protected_range.h
+++ b/src/snmalloc/backend/meta_protected_range.h
@@ -55,6 +55,7 @@ namespace snmalloc
       LogRange<3>,
       GlobalRange,
       CommitRange<PAL>,
+      DecayRange<PAL, Pagemap>,
       StatsRange>;
 
     // Controls the padding around the meta-data range.
diff --git a/src/snmalloc/backend/standard_range.h b/src/snmalloc/backend/standard_range.h
index 78609ed2d..675cee516 100644
--- a/src/snmalloc/backend/standard_range.h
+++ b/src/snmalloc/backend/standard_range.h
@@ -37,8 +37,14 @@ namespace snmalloc
       LogRange<2>,
       GlobalRange>;
 
-    // Track stats of the committed memory
-    using Stats = Pipe<GlobalR, CommitRange<PAL>, StatsRange>;
+    // Decay range caches deallocated memory and gradually releases it
+    // back to the parent, avoiding expensive repeated decommit/recommit
+    // cycles for transient allocation patterns.
+    using DecayR = Pipe<GlobalR, CommitRange<PAL>, DecayRange<PAL, Pagemap>>;
+
+    // Track stats of the memory handed out (outside decay so stats
+    // methods are directly visible to StatsCombiner).
+    using Stats = Pipe<DecayR, StatsRange>;
 
   private:
     static constexpr size_t page_size_bits =
diff --git a/src/snmalloc/backend_helpers/backend_helpers.h b/src/snmalloc/backend_helpers/backend_helpers.h
index ee339337b..8b643ca6b 100644
--- a/src/snmalloc/backend_helpers/backend_helpers.h
+++ b/src/snmalloc/backend_helpers/backend_helpers.h
@@ -5,6 +5,7 @@
 #include "buddy.h"
 #include "commitrange.h"
 #include "commonconfig.h"
+#include "decayrange.h"
 #include "defaultpagemapentry.h"
 #include "empty_range.h"
 #include "globalrange.h"
diff --git a/src/snmalloc/backend_helpers/decayrange.h b/src/snmalloc/backend_helpers/decayrange.h
new file mode 100644
index 000000000..99b9e22cb
--- /dev/null
+++ b/src/snmalloc/backend_helpers/decayrange.h
@@ -0,0 +1,418 @@
+#pragma once
+
+#include "../ds/ds.h"
+#include "../mem/mem.h"
+#include "empty_range.h"
+#include "largebuddyrange.h"
+#include "range_helpers.h"
+
+namespace snmalloc
+{
+  /**
+   * Intrusive singly-linked list using pagemap entries for storage.
+   *
+   * This uses BuddyChunkRep's pagemap entry access (direction=false, i.e.
+   * Word::Two) to store the "next" pointer for each node.
+   */
+  template<SNMALLOC_CONCEPT(IsWritablePagemap) Pagemap>
+  class DecayList
+  {
+    using Rep = BuddyChunkRep<Pagemap>;
+
+    uintptr_t head = 0;
+
+    DecayList(uintptr_t head) : head(head) {}
+
+  public:
+    constexpr DecayList() = default;
+
+    [[nodiscard]] bool is_empty() const
+    {
+      return head == 0;
+    }
+
+    DecayList get_next()
+    {
+      SNMALLOC_ASSERT(!is_empty());
+      auto next_field = Rep::ref(false, head);
+      auto next = Rep::get(next_field);
+      return {next};
+    }
+
+    capptr::Arena<void> get_capability()
+    {
+      return capptr::Arena<void>::unsafe_from(reinterpret_cast<void*>(head));
+    }
+
+    DecayList cons(capptr::Arena<void> new_head_cap)
+    {
+      auto new_head = new_head_cap.unsafe_uintptr();
+      auto field = Rep::ref(false, new_head);
+      Rep::set(field, head);
+      return {new_head};
+    }
+
+    template<typename F>
+    void forall(F f)
+    {
+      auto curr = *this;
+      while (!curr.is_empty())
+      {
+        auto next = curr.get_next();
+        f(curr.get_capability());
+        curr = next;
+      }
+    }
+  };
+
+  /**
+   * Concurrent stack for caching deallocated ranges.
+   *
+   * Supports the following concurrency pattern:
+   *   (push|pop)* || pop_all* || ... || pop_all*
+   *
+   * That is, a single thread can do push and pop, and other threads
+   * can do pop_all. pop_all returns all of the stack if it doesn't
+   * race, or empty if it does.
+   *
+   * The primary use case is single-threaded access, where other threads
+   * can attempt to drain all values (via the timer callback).
+   */
+  template<SNMALLOC_CONCEPT(IsWritablePagemap) Pagemap>
+  class DecayStack
+  {
+    static constexpr auto empty = DecayList<Pagemap>{};
+
+    alignas(CACHELINE_SIZE) stl::Atomic<DecayList<Pagemap>> stack{};
+
+    DecayList<Pagemap> take()
+    {
+      if (stack.load(stl::memory_order_relaxed).is_empty())
+        return empty;
+      return stack.exchange(empty, stl::memory_order_acquire);
+    }
+
+    void replace(DecayList<Pagemap> new_head)
+    {
+      SNMALLOC_ASSERT(stack.load().is_empty());
+      stack.store(new_head, stl::memory_order_release);
+    }
+
+  public:
+    constexpr DecayStack() = default;
+
+    void push(capptr::Arena<void> new_head_cap)
+    {
+      auto old_head = take();
+      auto new_head = old_head.cons(new_head_cap);
+      replace(new_head);
+    }
+
+    capptr::Arena<void> pop()
+    {
+      auto old_head = take();
+      if (old_head.is_empty())
+        return nullptr;
+
+      auto next = old_head.get_next();
+      replace(next);
+
+      return old_head.get_capability();
+    }
+
+    DecayList<Pagemap> pop_all()
+    {
+      return take();
+    }
+  };
+
+  /**
+   * A range that provides temporal caching of deallocated ranges.
+   *
+   * Instead of immediately releasing deallocated memory back to the parent
+   * range (which would decommit it), this range caches it locally and
+   * uses PAL timers to gradually release it. This avoids expensive
+   * repeated decommit/recommit cycles for transient allocation patterns
+   * (e.g. repeatedly allocating and deallocating ~800KB objects).
+   *
+   * The range uses an epoch-based rotation scheme:
+   *   - Deallocated ranges are placed in the current epoch's stack
+   *   - A timer periodically advances the epoch
+   *   - When the epoch advances, the oldest epoch's entries are flushed
+   *     to the parent range
+   *
+   * The parent range MUST be ConcurrencySafe, as the timer callback may
+   * flush entries from a different thread context.
+   *
+   * PAL - Platform abstraction layer (for timer support)
+   * Pagemap - Used for storing linked list nodes in pagemap entries
+   */
+  template<typename PAL, SNMALLOC_CONCEPT(IsWritablePagemap) Pagemap>
+  struct DecayRange
+  {
+    template<typename ParentRange = EmptyRange<>>
+    class Type : public ContainsParent<ParentRange>
+    {
+      using ContainsParent<ParentRange>::parent;
+
+    public:
+      static constexpr bool Aligned = ParentRange::Aligned;
+
+      static constexpr bool ConcurrencySafe = false;
+
+      using ChunkBounds = typename ParentRange::ChunkBounds;
+
+    private:
+      /**
+       * Maximum chunk size bits we cache (4 MiB = 2^22).
+       */
+      static constexpr size_t MAX_CACHEABLE_BITS = 22;
+
+      /**
+       * Maximum chunk size we cache (4 MiB).
+       * Larger allocations bypass the cache and go directly to/from parent.
+       */
+      static constexpr size_t MAX_CACHEABLE_SIZE =
+        bits::one_at_bit(MAX_CACHEABLE_BITS);
+
+      /**
+       * How many slab sizes that can be cached.
+       * Only covers sizes from MIN_CHUNK_SIZE up to MAX_CACHEABLE_SIZE.
+       */
+      static constexpr size_t NUM_SLAB_SIZES =
+        MAX_CACHEABLE_BITS - MIN_CHUNK_BITS + 1;
+
+      /**
+       * Number of epoch slots for cached ranges.
+       *
+       * Ranges not used within (NUM_EPOCHS - 1) timer periods will be
+       * released to the parent. E.g., with period=500ms and NUM_EPOCHS=4,
+       * memory not reused within 1500-2000ms will be released.
+       *
+       * Must be a power of 2.
+       */
+      static constexpr size_t NUM_EPOCHS = 4;
+      static_assert(bits::is_pow2(NUM_EPOCHS), "NUM_EPOCHS must be power of 2");
+
+      /**
+       * Per-sizeclass, per-epoch stacks of cached ranges.
+       */
+      ModArray<NUM_SLAB_SIZES, ModArray<NUM_EPOCHS, DecayStack<Pagemap>>>
+        chunk_stack;
+
+      /**
+       * Current epoch index.
+       */
+      static inline stl::Atomic<size_t> epoch{0};
+
+      /**
+       * Flag to ensure one-shot timer registration with the PAL.
+       */
+      static inline stl::AtomicBool registered_timer{false};
+
+      /**
+       * Flag indicating this instance has been registered in the global list.
+       */
+      stl::AtomicBool registered_local{false};
+
+      /**
+       * Global list of all activated DecayRange instances.
+       * Used by the timer to iterate and flush old entries.
+       */
+      static inline stl::Atomic<Type*> all_local{nullptr};
+
+      /**
+       * Next pointer for the global intrusive list.
+       */
+      Type* all_local_next{nullptr};
+
+      /**
+       * Flush the oldest epoch's entries across all instances
+       * and advance the epoch.
+       */
+      static void handle_decay_tick()
+      {
+        static_assert(
+          ParentRange::ConcurrencySafe,
+          "Parent range must be concurrency safe, as dealloc_range is called "
+          "from the timer callback on a potentially different thread.");
+
+        auto new_epoch =
+          (epoch.load(stl::memory_order_relaxed) + 1) % NUM_EPOCHS;
+
+        // Flush the epoch that is about to become current
+        // across all registered instances.
+        auto curr = all_local.load(stl::memory_order_acquire);
+        while (curr != nullptr)
+        {
+          for (size_t sc = 0; sc < NUM_SLAB_SIZES; sc++)
+          {
+            auto old_stack = curr->chunk_stack[sc][new_epoch].pop_all();
+
+            old_stack.forall([curr, sc](auto cap) {
+              size_t size = MIN_CHUNK_SIZE << sc;
+#ifdef SNMALLOC_TRACING
+              message<1024>(
+                "DecayRange::tick flushing {} size {} to parent",
+                cap.unsafe_ptr(),
+                size);
+#endif
+              curr->parent.dealloc_range(cap, size);
+            });
+          }
+          curr = curr->all_local_next;
+        }
+
+        // Advance the epoch
+        epoch.store(new_epoch, stl::memory_order_release);
+      }
+
+      /**
+       * Timer callback object for periodic decay.
+       */
+      class DecayMemoryTimerObject : public PalTimerObject
+      {
+        static void process(PalTimerObject*)
+        {
+#ifdef SNMALLOC_TRACING
+          message<1024>("DecayRange::handle_decay_tick timer");
+#endif
+          handle_decay_tick();
+        }
+
+        /// Timer fires every 500ms.
+        static constexpr size_t PERIOD = 500;
+
+      public:
+        constexpr DecayMemoryTimerObject() : PalTimerObject(&process, PERIOD) {}
+      };
+
+      static inline DecayMemoryTimerObject timer_object;
+
+      void ensure_registered()
+      {
+        // Register the global timer if this is the first instance.
+        if (
+          !registered_timer.load(stl::memory_order_relaxed) &&
+          !registered_timer.exchange(true, stl::memory_order_acq_rel))
+        {
+          PAL::register_timer(&timer_object);
+        }
+
+        // Register this instance in the global list.
+        if (
+          !registered_local.load(stl::memory_order_relaxed) &&
+          !registered_local.exchange(true, stl::memory_order_acq_rel))
+        {
+          auto* head = all_local.load(stl::memory_order_relaxed);
+          do
+          {
+            all_local_next = head;
+          } while (!all_local.compare_exchange_weak(
+            head, this, stl::memory_order_release, stl::memory_order_relaxed));
+        }
+      }
+
+    public:
+      constexpr Type() = default;
+
+      CapPtr<void, ChunkBounds> alloc_range(size_t size)
+      {
+        SNMALLOC_ASSERT(bits::is_pow2(size));
+        SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE);
+
+        auto slab_sizeclass = bits::next_pow2_bits(size) - MIN_CHUNK_BITS;
+
+        // Bypass cache for sizes beyond what we track.
+        if (slab_sizeclass >= NUM_SLAB_SIZES)
+          return parent.alloc_range(size);
+
+        if constexpr (pal_supports<Time, PAL>)
+        {
+          // Try local cache across all epochs, starting from current.
+          auto current_epoch = epoch.load(stl::memory_order_relaxed);
+          for (size_t e = 0; e < NUM_EPOCHS; e++)
+          {
+            auto p =
+              chunk_stack[slab_sizeclass][(current_epoch - e) % NUM_EPOCHS]
+                .pop();
+
+            if (p != nullptr)
+            {
+#ifdef SNMALLOC_TRACING
+              message<1024>(
+                "DecayRange::alloc_range returning {} from local cache",
+                p.unsafe_ptr());
+#endif
+              return p;
+            }
+          }
+        }
+
+        // Try parent. If OOM, flush decay caches and retry.
+        CapPtr<void, ChunkBounds> result;
+        for (size_t i = NUM_EPOCHS; i > 0; i--)
+        {
+          result = parent.alloc_range(size);
+          if (result != nullptr)
+          {
+#ifdef SNMALLOC_TRACING
+            message<1024>(
+              "DecayRange::alloc_range returning {} from parent",
+              result.unsafe_ptr());
+#endif
+            return result;
+          }
+
+          // OOM: force-flush decay caches to free memory.
+#ifdef SNMALLOC_TRACING
+          message<1024>("DecayRange::alloc_range OOM, flushing decay caches");
+#endif
+          handle_decay_tick();
+        }
+
+        // Final attempt after flushing all epochs.
+        result = parent.alloc_range(size);
+#ifdef SNMALLOC_TRACING
+        message<1024>(
+          "DecayRange::alloc_range final attempt: {}", result.unsafe_ptr());
+#endif
+        return result;
+      }
+
+      void dealloc_range(CapPtr<void, ChunkBounds> base, size_t size)
+      {
+        SNMALLOC_ASSERT(bits::is_pow2(size));
+        SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE);
+
+        auto slab_sizeclass = bits::next_pow2_bits(size) - MIN_CHUNK_BITS;
+
+        // Bypass cache for sizes beyond what we track.
+        if (slab_sizeclass >= NUM_SLAB_SIZES)
+        {
+          parent.dealloc_range(base, size);
+          return;
+        }
+
+        if constexpr (pal_supports<Time, PAL>)
+        {
+          ensure_registered();
+
+#ifdef SNMALLOC_TRACING
+          message<1024>(
+            "DecayRange::dealloc_range caching {} size {}",
+            base.unsafe_ptr(),
+            size);
+#endif
+          auto current_epoch = epoch.load(stl::memory_order_relaxed);
+          chunk_stack[slab_sizeclass][current_epoch].push(base);
+        }
+        else
+        {
+          // No timer support: pass through directly.
+          parent.dealloc_range(base, size);
+        }
+      }
+    };
+  };
+} // namespace snmalloc
diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h
index 5ec7bf1f3..d60908659 100644
--- a/src/snmalloc/mem/corealloc.h
+++ b/src/snmalloc/mem/corealloc.h
@@ -3,6 +3,7 @@
 #include "../ds/ds.h"
 #include "check_init.h"
 #include "freelist.h"
+#include "largecache.h"
 #include "metadata.h"
 #include "pool.h"
 #include "remotecache.h"
@@ -181,6 +182,13 @@ namespace snmalloc
      */
     Ticker<typename Config::Pal> ticker;
 
+    /**
+     * Cache for large object allocations.
+     * Avoids pagemap manipulation and backend buddy tree operations
+     * for recently freed large allocations.
+     */
+    LargeObjectCache<Config> large_object_cache;
+
     /**
      * The message queue needs to be accessible from other threads
      *
@@ -695,14 +703,79 @@ namespace snmalloc
                 return Conts::success(result, size, true);
               }
 
+              auto chunk_size = large_size_to_chunk_size(size);
+              auto sizeclass = size_to_sizeclass_full(size);
+
+              // Check the frontend large object cache first.
+              // This avoids all pagemap and backend manipulation.
+              auto* cached_meta = self->large_object_cache.try_alloc(
+                chunk_size, [self](BackendSlabMetadata* fmeta) {
+                  self->flush_large_cache_entry(fmeta);
+                });
+              if (cached_meta != nullptr)
+              {
+                // Cache hit: pagemap still valid, recover address from meta.
+                auto slab_addr =
+                  cached_meta->get_slab_interior(freelist::Object::key_root);
+                cached_meta->initialise_large(
+                  slab_addr, freelist::Object::key_root);
+                self->laden.insert(cached_meta);
+
+                // Reconstruct the capptr from the address.
+                auto p = Config::Backend::capptr_rederive_alloc(
+                  capptr::Alloc<void>::unsafe_from(
+                    reinterpret_cast<void*>(slab_addr)),
+                  chunk_size);
+                return Conts::success(capptr_reveal(p), size);
+              }
+
+              // Cache miss: go to backend.
               // Grab slab of correct size
               // Set remote as large allocator remote.
               auto [chunk, meta] = Config::Backend::alloc_chunk(
                 self->get_backend_local_state(),
-                large_size_to_chunk_size(size),
-                PagemapEntry::encode(
-                  self->public_state(), size_to_sizeclass_full(size)),
-                size_to_sizeclass_full(size));
+                chunk_size,
+                PagemapEntry::encode(self->public_state(), sizeclass),
+                sizeclass);
+
+              // If backend OOM, try staged cache flush and retry.
+              // First flush smaller sizes — they coalesce upward in the
+              // buddy. If that's not enough, flush one larger entry —
+              // the buddy can split it.
+              if (meta == nullptr)
+              {
+                auto flush_fn = [self](BackendSlabMetadata* fmeta) {
+                  self->flush_large_cache_entry(fmeta);
+                };
+
+                // Stage 1: flush all smaller sizeclasses.
+                if (self->large_object_cache.flush_smaller(
+                      chunk_size, flush_fn))
+                {
+                  auto retry = Config::Backend::alloc_chunk(
+                    self->get_backend_local_state(),
+                    chunk_size,
+                    PagemapEntry::encode(self->public_state(), sizeclass),
+                    sizeclass);
+                  chunk = retry.first;
+                  meta = retry.second;
+                }
+
+                // Stage 2: flush a single larger-or-equal entry.
+                if (
+                  meta == nullptr &&
+                  self->large_object_cache.flush_one_larger(
+                    chunk_size, flush_fn))
+                {
+                  auto retry = Config::Backend::alloc_chunk(
+                    self->get_backend_local_state(),
+                    chunk_size,
+                    PagemapEntry::encode(self->public_state(), sizeclass),
+                    sizeclass);
+                  chunk = retry.first;
+                  meta = retry.second;
+                }
+              }
 
 #ifdef SNMALLOC_TRACING
               message<1024>(
@@ -1086,6 +1159,7 @@ namespace snmalloc
       const PagemapEntry& entry,
       BackendSlabMetadata* meta) noexcept
     {
+      UNUSED(p);
       // TODO: Handle message queue on this path?
 
       if (meta->is_large())
@@ -1100,15 +1174,21 @@ namespace snmalloc
 
 #ifdef SNMALLOC_TRACING
         message<1024>("Large deallocation: {}", size);
-#else
-        UNUSED(size);
 #endif
 
         // Remove from set of fully used slabs.
         meta->node.remove();
 
-        Config::Backend::dealloc_chunk(
-          get_backend_local_state(), *meta, p, size, entry.get_sizeclass());
+        // Cache in the frontend large object cache.
+        // The meta's free_queue already holds the chunk address (from
+        // initialise_large), and the pagemap entry retains the sizeclass
+        // and remote allocator info. No data is stored in the freed object.
+        // Epoch sync happens internally; stale entries are flushed via the
+        // callback.
+        large_object_cache.cache(
+          meta, size, [this](BackendSlabMetadata* fmeta) {
+            flush_large_cache_entry(fmeta);
+          });
 
         return;
       }
@@ -1117,6 +1197,24 @@ namespace snmalloc
       dealloc_local_object_meta(entry, meta);
     }
 
+    /**
+     * Flush a single cached large object back to the backend.
+     * Recovers the chunk address from the metadata and size from the pagemap.
+     */
+    void flush_large_cache_entry(BackendSlabMetadata* meta)
+    {
+      auto slab_addr = meta->get_slab_interior(freelist::Object::key_root);
+      const PagemapEntry& entry = Config::Backend::get_metaentry(slab_addr);
+      size_t entry_sizeclass = entry.get_sizeclass().as_large();
+      size_t size = bits::one_at_bit(entry_sizeclass);
+
+      auto p =
+        capptr::Alloc<void>::unsafe_from(reinterpret_cast<void*>(slab_addr));
+
+      Config::Backend::dealloc_chunk(
+        get_backend_local_state(), *meta, p, size, entry.get_sizeclass());
+    }
+
     /**
      * Very slow path for object deallocation.
      *
@@ -1427,6 +1525,10 @@ namespace snmalloc
         dealloc_local_slabs<mitigations(freelist_teardown_validate)>(sizeclass);
       }
 
+      // Flush the large object cache back to the backend.
+      large_object_cache.flush_all(
+        [this](BackendSlabMetadata* fmeta) { flush_large_cache_entry(fmeta); });
+
       if constexpr (mitigations(freelist_teardown_validate))
       {
         laden.iterate(
diff --git a/src/snmalloc/mem/largecache.h b/src/snmalloc/mem/largecache.h
new file mode 100644
index 000000000..bcb095c44
--- /dev/null
+++ b/src/snmalloc/mem/largecache.h
@@ -0,0 +1,417 @@
+#pragma once
+
+#include "../ds/ds.h"
+#include "../pal/pal_ds.h"
+#include "metadata.h"
+#include "sizeclasstable.h"
+
+namespace snmalloc
+{
+  /**
+   * Frontend cache for large object allocations.
+   *
+   * This cache sits in the per-thread Allocator and intercepts large
+   * alloc/dealloc before they reach the backend. By caching recently freed
+   * large objects, we avoid:
+   *   - Pagemap writes on dealloc (clearing N entries) and alloc (setting N
+   *     entries)
+   *   - Metadata allocation/deallocation
+   *   - Buddy allocator tree operations
+   *   - Decommit/recommit syscalls (if DecayRange is also in the pipeline)
+   *
+   * The cache uses the slab metadata's SeqSet node to link cached entries,
+   * storing no data inside the freed object itself. The chunk address is
+   * recovered from the metadata's free_queue, and the chunk size from the
+   * pagemap entry's sizeclass.
+   *
+   * Epoch rotation is driven by a PAL timer (DecayMemoryTimerObject).
+   * A global epoch counter is advanced periodically by the timer. Each
+   * cache instance tracks the last epoch it observed and self-flushes
+   * stale epochs on its next operation. This means no concurrent access
+   * to the per-thread SeqSets is needed.
+   *
+   * Each sizeclass has an adaptive budget that bounds how many items can
+   * be cached. The budget starts at 1 and adjusts on each epoch rotation:
+   *   - If stale entries were flushed (surplus), decrease budget.
+   *   - If no entries were flushed and the cache was actively drained by
+   *     allocations (not just empty from startup), increase budget.
+   * This allows the cache to grow to match the working set while shrinking
+   * when the workload subsides.
+   *
+   * Template parameter Config provides Backend, PagemapEntry, Pal, etc.
+   */
+  template<typename Config>
+  class LargeObjectCache
+  {
+    using PAL = typename Config::Pal;
+    using BackendSlabMetadata = typename Config::Backend::SlabMetadata;
+    using PagemapEntry = typename Config::PagemapEntry;
+
+    /**
+     * Maximum chunk size bits we cache (4 MiB = 2^22).
+     */
+    static constexpr size_t MAX_CACHEABLE_BITS = 22;
+
+    /**
+     * Maximum chunk size we cache (4 MiB).
+     * Larger allocations bypass the cache and go directly to/from backend.
+     */
+    static constexpr size_t MAX_CACHEABLE_SIZE =
+      bits::one_at_bit(MAX_CACHEABLE_BITS);
+
+    /**
+     * Number of chunk sizeclasses we actually cache.
+     * Only covers sizes from MIN_CHUNK_SIZE up to MAX_CACHEABLE_SIZE.
+     */
+    static constexpr size_t NUM_SIZECLASSES =
+      MAX_CACHEABLE_BITS - MIN_CHUNK_BITS + 1;
+
+    /**
+     * Number of epoch slots for cached ranges.
+     * Must be a power of 2.
+     */
+    static constexpr size_t NUM_EPOCHS = 4;
+    static_assert(bits::is_pow2(NUM_EPOCHS));
+
+    /**
+     * Global epoch counter, advanced by the timer callback.
+     * All LargeObjectCache instances read this to detect when epochs
+     * have advanced and stale entries need flushing.
+     */
+    static inline stl::Atomic<size_t> global_epoch{0};
+
+    /**
+     * Timer callback that advances the global epoch.
+     */
+    class DecayMemoryTimerObject : public PalTimerObject
+    {
+      static void process(PalTimerObject*)
+      {
+        auto e = global_epoch.load(stl::memory_order_relaxed);
+        global_epoch.store(e + 1, stl::memory_order_release);
+      }
+
+      /// Timer fires every 500ms.
+      static constexpr size_t PERIOD = 500;
+
+    public:
+      constexpr DecayMemoryTimerObject() : PalTimerObject(&process, PERIOD) {}
+    };
+
+    static inline DecayMemoryTimerObject timer_object;
+
+    /**
+     * Flag to ensure one-shot timer registration.
+     */
+    static inline stl::AtomicBool registered_timer{false};
+
+    /**
+     * Per-sizeclass adaptive budget state.
+     */
+    struct SizeclassState
+    {
+      /// Maximum number of items allowed in the cache for this sizeclass.
+      /// Starts at 1 so the first deallocation is always cached.
+      size_t budget{1};
+
+      /// Current number of cached items across all epoch slots.
+      size_t count{0};
+
+      /// Number of cache misses since last cache insert.
+      /// Reset to 0 each time we successfully add to the cache.
+      size_t misses{0};
+
+      /// Peak value of misses this epoch.
+      /// This is what we use for budget growth - it captures the maximum
+      /// "depth" of consecutive misses, not cumulative misses.
+      size_t peak_misses{0};
+    };
+
+    /**
+     * Per-sizeclass budget tracking.
+     */
+    ModArray<NUM_SIZECLASSES, SizeclassState> sc_state;
+
+    /**
+     * Per-sizeclass, per-epoch SeqSets of cached metadata.
+     * Indexed as lists[sizeclass_index][epoch % NUM_EPOCHS].
+     */
+    ModArray<NUM_SIZECLASSES, ModArray<NUM_EPOCHS, SeqSet<BackendSlabMetadata>>>
+      lists;
+
+    /**
+     * The epoch this instance last synced to.
+     * Used to detect when new epochs have passed and old ones need flushing.
+     */
+    size_t local_epoch{0};
+
+    /**
+     * Convert a chunk size to a sizeclass index.
+     */
+    static size_t to_sizeclass(size_t chunk_size)
+    {
+      SNMALLOC_ASSERT(bits::is_pow2(chunk_size));
+      SNMALLOC_ASSERT(chunk_size >= MIN_CHUNK_SIZE);
+      return bits::next_pow2_bits(chunk_size) - MIN_CHUNK_BITS;
+    }
+
+    /**
+     * Register the global timer if not already done.
+     */
+    void ensure_registered()
+    {
+      if constexpr (pal_supports<Time, PAL>)
+      {
+        if (
+          !registered_timer.load(stl::memory_order_relaxed) &&
+          !registered_timer.exchange(true, stl::memory_order_acq_rel))
+        {
+          PAL::register_timer(&timer_object);
+        }
+      }
+    }
+
+    /**
+     * Catch up to the global epoch, flushing any stale epochs and
+     * adjusting per-sizeclass budgets.
+     */
+    template<typename FlushFn>
+    void sync_epoch(FlushFn&& flush_fn)
+    {
+      if constexpr (pal_supports<Time, PAL>)
+      {
+        auto current = global_epoch.load(stl::memory_order_acquire);
+
+        auto behind = current - local_epoch;
+        if (behind == 0)
+          return;
+
+        if (behind > NUM_EPOCHS)
+          behind = NUM_EPOCHS;
+
+        // Snapshot counts before flushing.
+        size_t before_count[NUM_SIZECLASSES];
+        for (size_t sc = 0; sc < NUM_SIZECLASSES; sc++)
+          before_count[sc] = sc_state[sc].count;
+
+        // Flush stale epoch slots.
+        for (size_t i = 0; i < behind; i++)
+        {
+          auto epoch_to_flush = (local_epoch + 1 + i) % NUM_EPOCHS;
+          flush_epoch_slot(epoch_to_flush, flush_fn);
+        }
+
+        // Adjust budgets based on what happened.
+        // Net out misses against flushed items to determine direction.
+        for (size_t sc = 0; sc < NUM_SIZECLASSES; sc++)
+        {
+          auto& state = sc_state[sc];
+          size_t flushed = before_count[sc] - state.count;
+
+          if (state.peak_misses > flushed)
+          {
+            // More misses than surplus: grow budget by the difference.
+            state.budget += state.peak_misses - flushed;
+          }
+          else if (flushed > state.peak_misses)
+          {
+            // More surplus than misses: shrink budget smoothly.
+            state.budget -= (flushed - state.peak_misses) / 2;
+          }
+          // If equal, budget stays the same.
+
+          state.misses = 0;
+          state.peak_misses = 0;
+        }
+
+        local_epoch = current;
+      }
+    }
+
+    /**
+     * Flush all entries in a single epoch slot.
+     * Decrements per-sizeclass counts.
+     */
+    template<typename FlushFn>
+    void flush_epoch_slot(size_t epoch_slot, FlushFn&& flush_fn)
+    {
+      for (size_t sc = 0; sc < NUM_SIZECLASSES; sc++)
+      {
+        auto& list = lists[sc][epoch_slot];
+        while (!list.is_empty())
+        {
+          sc_state[sc].count--;
+          flush_fn(list.pop_front());
+        }
+      }
+    }
+
+  public:
+    constexpr LargeObjectCache() = default;
+
+    /**
+     * Try to satisfy a large allocation from the cache.
+     *
+     * @param chunk_size  The power-of-2 chunk size needed.
+     * @param flush_fn    Callback to flush stale entries during epoch sync.
+     * @return Metadata for a cached chunk, or nullptr on cache miss.
+     */
+    template<typename FlushFn>
+    BackendSlabMetadata* try_alloc(size_t chunk_size, FlushFn&& flush_fn)
+    {
+      // Don't cache very large allocations.
+      if (chunk_size > MAX_CACHEABLE_SIZE)
+        return nullptr;
+
+      sync_epoch(flush_fn);
+
+      auto sc = to_sizeclass(chunk_size);
+      auto current = local_epoch;
+
+      // Check current epoch first, then older ones.
+      for (size_t age = 0; age < NUM_EPOCHS; age++)
+      {
+        auto& list = lists[sc][(current - age) % NUM_EPOCHS];
+        if (!list.is_empty())
+        {
+          sc_state[sc].count--;
+          return list.pop_front();
+        }
+      }
+
+      // Cache miss - track for budget growth.
+      sc_state[sc].misses++;
+      if (sc_state[sc].misses > sc_state[sc].peak_misses)
+        sc_state[sc].peak_misses = sc_state[sc].misses;
+      return nullptr;
+    }
+
+    /**
+     * Cache a large deallocation.
+     *
+     * If the sizeclass is at its budget, the entry is flushed immediately
+     * instead of being cached.
+     *
+     * @param meta        The slab metadata for the chunk.
+     * @param chunk_size  The power-of-2 chunk size.
+     * @param flush_fn    Callback to flush stale entries during epoch sync,
+     *                    and to flush this entry if over budget.
+     */
+    template<typename FlushFn>
+    void cache(BackendSlabMetadata* meta, size_t chunk_size, FlushFn&& flush_fn)
+    {
+      // Don't cache very large allocations - flush directly to backend.
+      if (chunk_size > MAX_CACHEABLE_SIZE)
+      {
+        flush_fn(meta);
+        return;
+      }
+
+      ensure_registered();
+      sync_epoch(flush_fn);
+
+      auto sc = to_sizeclass(chunk_size);
+
+      if (sc_state[sc].count >= sc_state[sc].budget)
+      {
+        // Over budget: flush immediately rather than caching.
+        flush_fn(meta);
+        return;
+      }
+
+      sc_state[sc].count++;
+      sc_state[sc].misses = 0; // Reset miss counter on successful cache.
+      lists[sc][local_epoch % NUM_EPOCHS].insert(meta);
+    }
+
+    /**
+     * Flush all cached entries back to the backend.
+     * Called during allocator teardown/flush.
+     */
+    template<typename FlushFn>
+    void flush_all(FlushFn&& flush_fn)
+    {
+      for (size_t e = 0; e < NUM_EPOCHS; e++)
+      {
+        flush_epoch_slot(e, flush_fn);
+      }
+    }
+
+    /**
+     * Flush all cached entries with sizeclass strictly smaller than
+     * the given chunk_size. These can coalesce in the buddy allocator
+     * to form the needed size.
+     *
+     * @return true if any entries were flushed.
+     */
+    template<typename FlushFn>
+    bool flush_smaller(size_t chunk_size, FlushFn&& flush_fn)
+    {
+      // If chunk_size > MAX_CACHEABLE_SIZE, all cached entries are smaller.
+      size_t target_sc = (chunk_size > MAX_CACHEABLE_SIZE) ?
+        NUM_SIZECLASSES :
+        to_sizeclass(chunk_size);
+      bool flushed = false;
+      for (size_t sc = 0; sc < target_sc; sc++)
+      {
+        for (size_t e = 0; e < NUM_EPOCHS; e++)
+        {
+          auto& list = lists[sc][e];
+          while (!list.is_empty())
+          {
+            sc_state[sc].count--;
+            flush_fn(list.pop_front());
+            flushed = true;
+          }
+        }
+      }
+      return flushed;
+    }
+
+    /**
+     * Flush a single cached entry with sizeclass >= the given chunk_size.
+     * The buddy allocator can split this to satisfy the request.
+     *
+     * @return true if an entry was flushed.
+     */
+    template<typename FlushFn>
+    bool flush_one_larger(size_t chunk_size, FlushFn&& flush_fn)
+    {
+      // Nothing in cache can satisfy requests larger than MAX_CACHEABLE_SIZE.
+      if (chunk_size > MAX_CACHEABLE_SIZE)
+        return false;
+
+      auto target_sc = to_sizeclass(chunk_size);
+      for (size_t sc = target_sc; sc < NUM_SIZECLASSES; sc++)
+      {
+        for (size_t e = 0; e < NUM_EPOCHS; e++)
+        {
+          auto& list = lists[sc][e];
+          if (!list.is_empty())
+          {
+            sc_state[sc].count--;
+            flush_fn(list.pop_front());
+            return true;
+          }
+        }
+      }
+      return false;
+    }
+
+    /**
+     * Check if the cache is completely empty.
+     */
+    bool is_empty() const
+    {
+      for (size_t sc = 0; sc < NUM_SIZECLASSES; sc++)
+      {
+        for (size_t e = 0; e < NUM_EPOCHS; e++)
+        {
+          if (!lists[sc][e].is_empty())
+            return false;
+        }
+      }
+      return true;
+    }
+  };
+} // namespace snmalloc
diff --git a/src/snmalloc/pal/pal_windows.h b/src/snmalloc/pal/pal_windows.h
index a44079dea..602749aad 100644
--- a/src/snmalloc/pal/pal_windows.h
+++ b/src/snmalloc/pal/pal_windows.h
@@ -592,7 +592,7 @@ namespace snmalloc
 
 #  ifdef PLATFORM_HAS_VIRTUALALLOC2
   template<bool state_using>
-  void* PALWindows::reserve_aligned(size_t size) noexcept
+  inline void* PALWindows::reserve_aligned(size_t size) noexcept
   {
     SNMALLOC_ASSERT(bits::is_pow2(size));
     SNMALLOC_ASSERT(size >= minimum_alloc_size);
@@ -622,7 +622,7 @@ namespace snmalloc
   }
 #  endif
 
-  void* PALWindows::reserve(size_t size) noexcept
+  inline void* PALWindows::reserve(size_t size) noexcept
   {
     void* ret = VirtualAlloc(nullptr, size, MEM_RESERVE, PAGE_READWRITE);