diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCache.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCache.java index f066a6397d9..25687239380 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCache.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCache.java @@ -24,6 +24,7 @@ import org.apache.jackrabbit.oak.commons.time.Stopwatch; import org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache.AbstractPersistentCache; import org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache.SegmentCacheStats; +import org.apache.jackrabbit.oak.spi.toggle.FeatureToggle; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,6 +59,28 @@ public class PersistentDiskCache extends AbstractPersistentCache { public static final long DEFAULT_TEMP_FILES_CLEANUP_WAIT_TIME_MS = 60000; private static final String TEMP_FILE_SUFFIX = ".part"; + /** + * Name of the feature toggle that controls the OAK-12212 fix, see + * {@link #FT_OAK_12212_SKIP_MISSING_FILE_CHECK}. + */ + public static final String FT_OAK_12212 = "FT_OAK-12212"; + + /** + * Kill switch for the OAK-12212 fix in {@link #writeSegment}. + *

+ * When {@code false} (default), {@code writeSegment} skips the on-disk + * write and the corresponding {@code cacheSize} increment if the segment + * is already present on disk. Segments are immutable, so a redundant + * write would only produce identical bytes — but every such call used to + * increment {@code cacheSize} while {@code Files.move} silently replaced + * the file on POSIX systems, causing the in-memory counter to drift far + * above the actual cache directory size and above {@code maxCacheSizeBytes}. + *

+ * Set to {@code true} via the {@link FeatureToggle} registered with the + * Whiteboard to revert to the pre-fix behaviour. + */ + public static final AtomicBoolean FT_OAK_12212_SKIP_MISSING_FILE_CHECK = new AtomicBoolean(false); + private final File directory; private final long maxCacheSizeBytes; private final DiskCacheIOMonitor diskCacheIOMonitor; @@ -148,17 +171,27 @@ public void writeSegment(long msb, long lsb, Buffer buffer) { Runnable task = () -> { if (writesPending.add(segmentId)) { try { - int fileSize; - try (FileChannel channel = new FileOutputStream(tempSegmentFile).getChannel()) { - fileSize = bufferCopy.write(channel); - } - try { - Files.move(tempSegmentFile.toPath(), segmentFile.toPath(), StandardCopyOption.ATOMIC_MOVE); - } catch (AtomicMoveNotSupportedException e) { - Files.move(tempSegmentFile.toPath(), segmentFile.toPath()); + // OAK-12212: skip the on-disk write and the cacheSize + // increment when the segment is already on disk. Segments + // are immutable, so a redundant write would only rewrite + // identical bytes; the pre-fix behaviour still incremented + // cacheSize on every such call while Files.move silently + // replaced the file on POSIX systems, leaking phantom + // bytes into the in-memory counter on every redundant + // write. Guarded by FT_OAK-12212 (disabled = active fix). + if (FT_OAK_12212_SKIP_MISSING_FILE_CHECK.get() || !segmentFile.exists()) { + int fileSize; + try (FileChannel channel = new FileOutputStream(tempSegmentFile).getChannel()) { + fileSize = bufferCopy.write(channel); + } + try { + Files.move(tempSegmentFile.toPath(), segmentFile.toPath(), StandardCopyOption.ATOMIC_MOVE); + } catch (AtomicMoveNotSupportedException e) { + Files.move(tempSegmentFile.toPath(), segmentFile.toPath()); + } + long cacheSizeAfter = cacheSize.addAndGet(fileSize); + diskCacheIOMonitor.updateCacheSize(cacheSizeAfter, fileSize); } - long cacheSizeAfter = cacheSize.addAndGet(fileSize); - diskCacheIOMonitor.updateCacheSize(cacheSizeAfter, fileSize); } catch (Exception e) { logger.error("Error writing segment {} to cache", segmentId, e); try { diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/RemotePersistentCacheService.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/RemotePersistentCacheService.java index a59ba65e817..f89d414c944 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/RemotePersistentCacheService.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/RemotePersistentCacheService.java @@ -25,6 +25,7 @@ import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard; import org.apache.jackrabbit.oak.segment.spi.monitor.RoleStatisticsProvider; import org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache.PersistentCache; +import org.apache.jackrabbit.oak.spi.toggle.FeatureToggle; import org.apache.jackrabbit.oak.spi.whiteboard.Registration; import org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils; import org.apache.jackrabbit.oak.stats.StatisticsProvider; @@ -38,6 +39,7 @@ import java.io.File; import java.io.IOException; +import java.util.Collections; import java.util.Hashtable; @Component( @@ -93,6 +95,12 @@ private PersistentCache createPersistentCache(Configuration configuration, Close PersistentDiskCache persistentDiskCache = new PersistentDiskCache(new File(configuration.diskCacheDirectory()), configuration.diskCacheMaxSizeMB(), diskCacheIOMonitor); closer.register(persistentDiskCache); + // OAK-12212: expose the kill switch for the cacheSize-accounting + // fix so it can be flipped at runtime via the Whiteboard. + registerCloseable(osgiWhiteboard.register(FeatureToggle.class, + new FeatureToggle(PersistentDiskCache.FT_OAK_12212, PersistentDiskCache.FT_OAK_12212_SKIP_MISSING_FILE_CHECK), + Collections.emptyMap())); + CacheStatsMBean diskCacheStatsMBean = persistentDiskCache.getCacheStats(); registerCloseable(registerMBean(CacheStatsMBean.class, diskCacheStatsMBean, CacheStats.TYPE, diskCacheStatsMBean.getName())); diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/package-info.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/package-info.java index ecbd97f0adf..d4be2d991b0 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/package-info.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/package-info.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -@Version("2.0.0") +@Version("2.1.0") package org.apache.jackrabbit.oak.segment.remote.persistentcache; import org.osgi.annotation.versioning.Version; diff --git a/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCacheSizeAccountingTest.java b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCacheSizeAccountingTest.java new file mode 100644 index 00000000000..4922e7fcaec --- /dev/null +++ b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCacheSizeAccountingTest.java @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.jackrabbit.oak.segment.remote.persistentcache; + +import org.apache.commons.io.FileUtils; +import org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache.AbstractPersistentCache; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.lang.reflect.Field; +import java.time.LocalDate; +import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +/** + * Regression tests for OAK-12212. + * + *

Before the OAK-12212 fix, the in-memory {@code cacheSize} counter + * maintained by {@link PersistentDiskCache} could drift well above the actual + * size of the cache directory: the {@code writesPending} guard in + * {@link PersistentDiskCache#writeSegment(long, long, org.apache.jackrabbit.oak.commons.Buffer)} + * only prevents simultaneously running write tasks for the same + * segment id, not sequentially running ones. Every {@code writeSegment} + * invocation that reached the body still added {@code fileSize} to + * {@code cacheSize}, yet on POSIX file systems {@code Files.move} with + * {@code ATOMIC_MOVE} maps to {@code rename(2)} and silently replaces an + * existing destination — so repeated writes of the same segment id produced + * a single file on disk but multiple increments of the in-memory counter. + * The cleanup path could only subtract the actual length of the (one) file + * it deleted, so the over-counted bytes were never repaid. + * + *

The fix is gated by {@link PersistentDiskCache#FT_OAK_12212_SKIP_MISSING_FILE_CHECK}. + * With the toggle in its default state (fix enabled), {@code writeSegment} + * short-circuits when the segment is already on disk; flipping the toggle + * restores the original behaviour for emergency rollback. + * + *

To make the tests deterministic, the cache's internal worker executor + * is replaced with a single-threaded one and explicitly drained after every + * {@code writeSegment} call using a marker task. + */ +public class PersistentDiskCacheSizeAccountingTest { + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target")); + + private static final int SEGMENT_LEN = 256 * 1024; + + private File cacheFolder; + private DiskCacheIOMonitor ioMonitor; + private PersistentDiskCache persistentCache; + + /** + * Captures the running counter value reported via + * {@link DiskCacheIOMonitor#updateCacheSize(long, long)} every time the + * cache mutates its in-memory {@code cacheSize}. The latest value mirrors + * the current state of the (otherwise package-private) {@code AtomicLong}. + */ + private final AtomicLong lastReportedCacheSize = new AtomicLong(0); + + @Before + public void setUp() throws Exception { + cacheFolder = temporaryFolder.newFolder(); + ioMonitor = mock(DiskCacheIOMonitor.class); + doAnswer(inv -> { + lastReportedCacheSize.set(inv.getArgument(0, Long.class)); + return null; + }).when(ioMonitor).updateCacheSize(anyLong(), anyLong()); + // The OAK-12212 kill switch is a process-wide AtomicBoolean. Reset + // it before each test so previous tests cannot leak their toggle + // state into the next one. + PersistentDiskCache.FT_OAK_12212_SKIP_MISSING_FILE_CHECK.set(false); + } + + @After + public void tearDown() { + if (persistentCache != null) { + persistentCache.close(); + persistentCache = null; + } + PersistentDiskCache.FT_OAK_12212_SKIP_MISSING_FILE_CHECK.set(false); + } + + /** + * Writes the same segment id repeatedly with a maximum cache size big + * enough that the cleanup path never runs. + * + *

With the OAK-12212 fix enabled (default), only the first write + * actually touches disk and the counter stays at one segment size. + * Without the fix the cache directory still holds exactly one + * segment-sized file (POSIX rename silently replaces), but the + * in-memory counter is incremented {@code writes} times. + */ + @Test + public void cacheSizeCounterMustMatchDirectorySizeAfterRepeatedWritesOfSameSegment() + throws Exception { + // Cache big enough that the cleanup path never runs during the test. + persistentCache = new PersistentDiskCache(cacheFolder, /* maxCacheSizeMB */ 1024, ioMonitor); + replaceExecutorWithSingleThreaded(persistentCache); + + final byte[] segmentBytes = randomBytes(SEGMENT_LEN); + final UUID segmentId = UUID.randomUUID(); + final long msb = segmentId.getMostSignificantBits(); + final long lsb = segmentId.getLeastSignificantBits(); + final int writes = 4; // mirrors the ~4x drift seen in the heap dump + + for (int i = 0; i < writes; i++) { + persistentCache.writeSegment(msb, lsb, org.apache.jackrabbit.oak.commons.Buffer.wrap(segmentBytes)); + drainExecutor(persistentCache); + } + + // Sanity check: updateCacheSize must have been invoked at least once + // per write, so the captured value is meaningful. + verify(ioMonitor, atLeastOnce()).updateCacheSize(anyLong(), anyLong()); + + File segmentFile = new File(cacheFolder, segmentId.toString()); + assertTrue("Segment file must exist on disk after the writes", segmentFile.isFile()); + + long actualDirectorySize = FileUtils.sizeOfDirectory(cacheFolder); + assertEquals( + "Directory must hold exactly one segment-sized file. If this fails" + + " the test setup is broken (e.g. the platform's Files.move" + + " did not replace the existing destination); rerun on a POSIX" + + " file system.", + SEGMENT_LEN, actualDirectorySize); + + long reportedCacheSize = lastReportedCacheSize.get(); + + // Core invariant established by the OAK-12212 fix: the in-memory + // counter must reflect the actual size on disk. + assertEquals( + "In-memory cacheSize counter has drifted above the actual cache" + + " directory size. counter=" + reportedCacheSize + + ", directorySize=" + actualDirectorySize + + ", writes=" + writes + + ", segmentSize=" + SEGMENT_LEN + ".", + (long) actualDirectorySize, reportedCacheSize); + } + + /** + * Same workload but with a tight {@code maxCacheSizeBytes} so the + * cleanup path would run between writes. Without the OAK-12212 fix, the + * cleanup decrements the counter by the actual length on disk but the + * extra increments contributed by previous redundant writes are never + * repaid; the end state has a counter that no longer matches the + * directory size. With the fix enabled, redundant writes never happen + * in the first place, so the counter and the directory stay in sync. + */ + @Test + public void cacheSizeCounterMustMatchDirectorySizeAcrossWriteAndCleanupCycles() + throws Exception { + // Small max so cleanUp() actually triggers between writes. + final int maxCacheSizeMB = 1; + persistentCache = new PersistentDiskCache(cacheFolder, maxCacheSizeMB, ioMonitor); + replaceExecutorWithSingleThreaded(persistentCache); + + final byte[] segmentBytes = randomBytes(SEGMENT_LEN); + final UUID segmentId = UUID.randomUUID(); + final long msb = segmentId.getMostSignificantBits(); + final long lsb = segmentId.getLeastSignificantBits(); + // Enough writes for the cleanup path to fire several times. + final int writes = 8; + + for (int i = 0; i < writes; i++) { + persistentCache.writeSegment(msb, lsb, org.apache.jackrabbit.oak.commons.Buffer.wrap(segmentBytes)); + drainExecutor(persistentCache); + } + + long actualDirectorySize = FileUtils.sizeOfDirectory(cacheFolder); + long reportedCacheSize = lastReportedCacheSize.get(); + + assertEquals( + "In-memory cacheSize counter has drifted out of sync with the" + + " cache directory size after repeated write+cleanup cycles." + + " counter=" + reportedCacheSize + + ", directorySize=" + actualDirectorySize + + ", writes=" + writes + + ", segmentSize=" + SEGMENT_LEN + ".", + actualDirectorySize, reportedCacheSize); + } + + /** + * Activates the kill switch ({@code FT_OAK_12212_DISABLE = true}) and + * verifies that the legacy buggy behaviour is restored: after writing + * the same segment id N times the in-memory counter is + * {@code N * segmentSize} while the directory holds a single + * segment-sized file. + * + *

This pins down what the toggle actually controls and ensures the + * rollback path is wired correctly, so we can flip the toggle in + * production with confidence if the fix ever needs to be disabled. + */ + @Test + public void killSwitchRestoresLegacyDoubleCountingBehaviour() throws Exception { + PersistentDiskCache.FT_OAK_12212_SKIP_MISSING_FILE_CHECK.set(true); + + persistentCache = new PersistentDiskCache(cacheFolder, /* maxCacheSizeMB */ 1024, ioMonitor); + replaceExecutorWithSingleThreaded(persistentCache); + + final byte[] segmentBytes = randomBytes(SEGMENT_LEN); + final UUID segmentId = UUID.randomUUID(); + final long msb = segmentId.getMostSignificantBits(); + final long lsb = segmentId.getLeastSignificantBits(); + final int writes = 4; + + for (int i = 0; i < writes; i++) { + persistentCache.writeSegment(msb, lsb, org.apache.jackrabbit.oak.commons.Buffer.wrap(segmentBytes)); + drainExecutor(persistentCache); + } + + long actualDirectorySize = FileUtils.sizeOfDirectory(cacheFolder); + long reportedCacheSize = lastReportedCacheSize.get(); + + assertEquals( + "With the kill switch active the directory still holds exactly" + + " one segment file (POSIX rename replaces silently).", + SEGMENT_LEN, actualDirectorySize); + assertEquals( + "Kill switch must restore the legacy behaviour: cacheSize is" + + " incremented by segmentSize on every write call.", + (long) writes * SEGMENT_LEN, reportedCacheSize); + assertNotEquals( + "Sanity: with the kill switch active the in-memory counter" + + " must diverge from the directory size.", + (long) actualDirectorySize, reportedCacheSize); + } + + /** + * Time-bombed removal reminder. If this test fails, the feature toggle + * {@code FT_OAK-12212} and its guard in + * {@link PersistentDiskCache#writeSegment} should be removed — the fix + * has been in production long enough. + */ + @Test + public void ft_oak_12212_toggleShouldBeRemoved() { + assertTrue("Feature toggle " + PersistentDiskCache.FT_OAK_12212 + " is overdue for removal", + LocalDate.now().isBefore(LocalDate.of(2027, 5, 14))); + } + + // --- Helpers ---------------------------------------------------------- + + private static byte[] randomBytes(int length) { + byte[] ret = new byte[length]; + new java.util.Random(42).nextBytes(ret); + return ret; + } + + /** + * Replaces the cache's internal worker executor (created in + * {@link AbstractPersistentCache}'s constructor) with a + * single-threaded one. Together with {@link #drainExecutor} this makes + * write task ordering and completion deterministic in tests, removing + * the {@code writesPending} race that otherwise makes the magnitude of + * the {@code cacheSize} drift timing-dependent. + */ + private static void replaceExecutorWithSingleThreaded(AbstractPersistentCache cache) + throws Exception { + Field executorField = AbstractPersistentCache.class.getDeclaredField("executor"); + executorField.setAccessible(true); + ExecutorService old = (ExecutorService) executorField.get(cache); + old.shutdownNow(); + executorField.set(cache, Executors.newSingleThreadExecutor()); + } + + /** + * Submits a no-op marker task to the cache's worker executor and waits + * for it to complete. With a single-threaded executor this guarantees + * that all previously submitted write tasks (and their trailing + * {@code cleanUp()} calls) have finished. + */ + private static void drainExecutor(AbstractPersistentCache cache) throws Exception { + Field executorField = AbstractPersistentCache.class.getDeclaredField("executor"); + executorField.setAccessible(true); + ExecutorService executor = (ExecutorService) executorField.get(cache); + Future marker = executor.submit(() -> { }); + marker.get(30, TimeUnit.SECONDS); + } +} diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java index 0f49eb119aa..5b1e4f1afbd 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java @@ -24,6 +24,7 @@ import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; @@ -44,7 +45,9 @@ * Conceptually this cache serves as a 2nd level cache for segments. The 1st * level cache is implemented by memoising the segment in its id (see {@code * SegmentId#segment}. Every time an segment is evicted from this cache the - * memoised segment is discarded (see {@code SegmentId#onAccess}. + * memoised segment is discarded (see {@code SegmentId#onAccess}). On an L1 hit, + * {@link #recordHit(SegmentId)} records L1 hits in {@link #getCacheStats()} and, when enabled, + * touches L2 so eviction policies see the access. */ public abstract class SegmentCache { @@ -103,11 +106,32 @@ public abstract Segment getSegment(@NotNull SegmentId id, @NotNull Callable + * When the toggle is {@code true} and {@code id} is a data segment, this performs one extra map + * lookup on the hottest read path whenever the segment is still in L2. * - * See {@code SegmentId#onAccess} + * @param id the segment id that was served from L1 */ - public abstract void recordHit(); + public abstract void recordHit(@NotNull SegmentId id); + + /** + * Feature toggle name for {@link #FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED}: propagate L1 memoisation hits to the + * segment L2 cache so frequency/recency used for eviction stay aligned with actual access. + * Disable at runtime via the OSGi Whiteboard when diagnosing behavior. + */ + public static final String FT_OAK_12214 = "FT_OAK-12214"; + + /** + * Whether L1 memoised hits are propagated to L2 so W-TinyLFU / LRU state matches actual access. + * Defaults to {@code true} as a bug-fix toggle (L2 was blind to L1); flip via + * the OSGi Whiteboard {@link org.apache.jackrabbit.oak.spi.toggle.FeatureToggle FeatureToggle} + * registered under {@link #FT_OAK_12214} for diagnosis or A/B runs. + */ + public static final AtomicBoolean FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED = new AtomicBoolean(true); private static class NonEmptyCache extends SegmentCache { @@ -217,8 +241,11 @@ public AbstractCacheStats getCacheStats() { } @Override - public void recordHit() { + public void recordHit(@NotNull SegmentId id) { stats.hitCount.incrementAndGet(); + if (id.isDataSegmentId() && FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED.get()) { + cache.getIfPresent(id); + } } } @@ -268,7 +295,7 @@ public AbstractCacheStats getCacheStats() { } @Override - public void recordHit() { + public void recordHit(@NotNull SegmentId id) { stats.hitCount.incrementAndGet(); } } diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentId.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentId.java index de74867732b..5b4fd2bed95 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentId.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentId.java @@ -22,6 +22,7 @@ import static org.apache.jackrabbit.oak.segment.SegmentStore.EMPTY_STORE; import java.util.UUID; +import java.util.function.Consumer; import org.apache.jackrabbit.oak.commons.StringUtils; import org.apache.jackrabbit.oak.segment.spi.persistence.GCGeneration; @@ -64,7 +65,8 @@ public static boolean isDataSegmentId(long lsb) { private final long creationTime; /** Callback called whenever an underlying and locally memoised segment is accessed */ - private final Runnable onAccess; + @NotNull + private final Consumer onAccess; /** * The gc generation of this segment or -1 if unknown. @@ -86,12 +88,17 @@ public static boolean isDataSegmentId(long lsb) { /** * Create a new segment id with access tracking. - * @param store store this is belongs to - * @param msb most significant bits of this id - * @param lsb least significant bits of this id - * @param onAccess callback called whenever an underlying and locally memoised segment is accessed. + * + * @param store store this id belongs to + * @param msb most significant bits of this id + * @param lsb least significant bits of this id + * @param onAccess callback invoked whenever the locally memoised segment is accessed + * ({@link #getSegment()}); receives {@code this} (e.g. to notify {@link SegmentCache}). + *

API note (Oak 2.2.0, OAK-12214): this parameter type changed from + * {@link Runnable} to {@link java.util.function.Consumer Consumer}{@code } for + * L1-to-L2 propagation; downstream code that constructed ids with a {@code Runnable} must be updated. */ - public SegmentId(@NotNull SegmentStore store, long msb, long lsb, @NotNull Runnable onAccess) { + public SegmentId(@NotNull SegmentStore store, long msb, long lsb, @NotNull Consumer onAccess) { this.store = store; this.msb = msb; this.lsb = lsb; @@ -106,7 +113,7 @@ public SegmentId(@NotNull SegmentStore store, long msb, long lsb, @NotNull Runna * @param lsb least significant bits of this id */ public SegmentId(@NotNull SegmentStore store, long msb, long lsb) { - this(store, msb, lsb, () -> {}); + this(store, msb, lsb, id -> {}); } /** @@ -154,7 +161,7 @@ public Segment getSegment() { } } } - onAccess.run(); + onAccess.accept(this); return segment; } diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreRegistrar.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreRegistrar.java index 3261d9bec9e..0aae6c6d576 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreRegistrar.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreRegistrar.java @@ -64,6 +64,7 @@ import org.apache.jackrabbit.oak.spi.state.NodeStore; import org.apache.jackrabbit.oak.spi.state.RevisionGC; import org.apache.jackrabbit.oak.spi.state.RevisionGCMBean; +import org.apache.jackrabbit.oak.spi.toggle.FeatureToggle; import org.apache.jackrabbit.oak.spi.whiteboard.AbstractServiceTracker; import org.apache.jackrabbit.oak.spi.whiteboard.Registration; import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard; @@ -79,6 +80,7 @@ import java.io.Closeable; import java.io.File; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -275,6 +277,11 @@ private SegmentNodeStore register() throws IOException { } registerCloseable(store); + // OAK-12214: bug-fix toggle (default on) so L2 eviction policy sees L1 memoised hits + registerCloseable(cfg.getWhiteboard().register(FeatureToggle.class, + new FeatureToggle(SegmentCache.FT_OAK_12214, SegmentCache.FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED), + Collections.emptyMap())); + // Listen for Executor services on the whiteboard WhiteboardExecutor executor = new WhiteboardExecutor(); diff --git a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java index 038318b7649..8e228bddc3f 100644 --- a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java +++ b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -34,9 +35,21 @@ import org.apache.jackrabbit.oak.cache.AbstractCacheStats; import org.apache.jackrabbit.oak.segment.spi.RepositoryNotReachableException; +import org.junit.Before; import org.junit.Test; +/** + * {@link SegmentCache} unit tests. L1/L2 behaviour assertions that depend on admission/eviction + * policy assume the default Oak build (Caffeine-backed {@code oak.cache} segment cache); the Guava + * cache implementation is not covered here. + */ public class SegmentCacheTest { + + @Before + public void resetOak12214Toggle() { + SegmentCache.FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED.set(true); + } + private final SegmentCache cache = newSegmentCache(DEFAULT_SEGMENT_CACHE_MB); private final SegmentId id1 = new SegmentId(EMPTY_STORE, 0x0000000000000001L, 0xa000000000000001L, cache::recordHit); @@ -160,6 +173,113 @@ public void evictionDuringLoad() throws ExecutionException { assertFalse(cached.get()); } + /** + * Verifies that repeated L1 hits keep a hot segment in L2 under eviction pressure. + * + *

Contract asserted: after filling a 1 MB cache and many L1 hits on + * {@code hotId}, {@link SegmentCache#getSegment(SegmentId, Callable)} must still return the + * memoised segment without calling the loader (no {@link SegmentNotFoundException} on L1). + * + *

Implementation note: Oak's default segment cache uses Caffeine; this + * workload relies on feeding L2 on each L1 hit so the hot entry is not chosen for eviction + * when a sixteenth entry is added. Caffeine's internal SLRU/TinyLFU details may change across + * versions; if this test becomes unstable, prefer tightening the scenario or asserting via + * {@link AbstractCacheStats} rather than internal queue names. + */ + @Test + public void recordAccessKeepsHotSegmentInL2UnderPressure() throws ExecutionException { + // 1 MB cache; 15 × 64 KB = 983 KB fits, 16th entry forces an eviction. + SegmentCache smallCache = newSegmentCache(1); + SegmentId hotId = new SegmentId(EMPTY_STORE, 0xdeadL, 0xa000000000000001L, smallCache::recordHit); + Segment hotSeg = mock(Segment.class); + when(hotSeg.getSegmentId()).thenReturn(hotId); + when(hotSeg.estimateMemoryUsage()).thenReturn(64 * 1024); + + // Load hotId first — it becomes the probationary LRU (oldest, eviction candidate). + smallCache.getSegment(hotId, () -> hotSeg); + + // Fill the rest of the cache with 14 fillers (no re-accesses), all in probationary. + for (int i = 0; i < 14; i++) { + SegmentId filler = new SegmentId(EMPTY_STORE, i + 10L, 0xa000000000000010L + i); + Segment fillerSeg = mock(Segment.class); + when(fillerSeg.getSegmentId()).thenReturn(filler); + when(fillerSeg.estimateMemoryUsage()).thenReturn(64 * 1024); + smallCache.getSegment(filler, () -> fillerSeg); + } + + long hitCountBeforeL1 = smallCache.getCacheStats().getHitCount(); + // 20 L1 hits after the cache is full: each calls recordHit (and getIfPresent when toggle on). + for (int i = 0; i < 20; i++) { + assertEquals(hotSeg, hotId.getSegment()); + } + assertEquals("each L1 hit should increment segment cache hit stats", 20, + smallCache.getCacheStats().getHitCount() - hitCountBeforeL1); + + // Add a 16th entry to force eviction pressure; hotId must still be served from cache + L1. + SegmentId trigger = new SegmentId(EMPTY_STORE, 999L, 0xa000000000000999L); + Segment triggerSeg = mock(Segment.class); + when(triggerSeg.getSegmentId()).thenReturn(trigger); + when(triggerSeg.estimateMemoryUsage()).thenReturn(64 * 1024); + smallCache.getSegment(trigger, () -> triggerSeg); + + // hotId must still be in L2 — loader must not be called. + assertEquals(hotSeg, smallCache.getSegment(hotId, () -> failToLoad(hotId))); + // L1 memoisation must also be intact. + assertEquals(hotSeg, hotId.getSegment()); + } + + /** + * With {@link SegmentCache#FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED} disabled, L1 hits do not touch L2, so repeated + * L1 reads do not refresh eviction policy for {@code hotId}. Under churn (each iteration loads a + * new 64 KB segment while the cache stays at capacity), {@code hotId} is eventually evicted + * from L2 and must be reloaded via the loader. + * + *

The {@code finally} block restores the toggle so other tests are unaffected. + */ + @Test + public void hotSegmentEvictedWithoutL2Notification() throws ExecutionException { + SegmentCache.FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED.set(false); + try { + // 1 MB cache — same size as the positive test. + SegmentCache smallCache = newSegmentCache(1); + SegmentId hotId = new SegmentId(EMPTY_STORE, 0xdeadL, 0xa000000000000001L, smallCache::recordHit); + Segment hotSeg = mock(Segment.class); + when(hotSeg.getSegmentId()).thenReturn(hotId); + when(hotSeg.estimateMemoryUsage()).thenReturn(64 * 1024); + + smallCache.getSegment(hotId, () -> hotSeg); + + for (int i = 0; i < 14; i++) { + SegmentId filler = new SegmentId(EMPTY_STORE, i + 10L, 0xa000000000000010L + i); + Segment fillerSeg = mock(Segment.class); + when(fillerSeg.getSegmentId()).thenReturn(filler); + when(fillerSeg.estimateMemoryUsage()).thenReturn(64 * 1024); + smallCache.getSegment(filler, () -> fillerSeg); + } + + for (int i = 0; i < 20; i++) { + assertEquals(hotSeg, hotId.getSegment()); + } + + AtomicBoolean reloaded = new AtomicBoolean(false); + final int maxChurnRounds = 48; + for (int round = 0; round < maxChurnRounds && !reloaded.get(); round++) { + SegmentId probe = new SegmentId(EMPTY_STORE, 4000L + round, 0xa0000000000e0000L + round); + Segment probeSeg = mock(Segment.class); + when(probeSeg.getSegmentId()).thenReturn(probe); + when(probeSeg.estimateMemoryUsage()).thenReturn(64 * 1024); + smallCache.getSegment(probe, () -> probeSeg); + smallCache.getSegment(hotId, () -> { + reloaded.set(true); + return hotSeg; + }); + } + assertTrue("hotId should have been evicted from L2 when notification is disabled", reloaded.get()); + } finally { + SegmentCache.FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED.set(true); + } + } + @Test public void nonEmptyCacheStatsTest() throws Exception { AbstractCacheStats stats = cache.getCacheStats();