Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.jackrabbit.oak.commons.time.Stopwatch;
import org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache.AbstractPersistentCache;
import org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache.SegmentCacheStats;
import org.apache.jackrabbit.oak.spi.toggle.FeatureToggle;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -58,6 +59,28 @@ public class PersistentDiskCache extends AbstractPersistentCache {
public static final long DEFAULT_TEMP_FILES_CLEANUP_WAIT_TIME_MS = 60000;
private static final String TEMP_FILE_SUFFIX = ".part";

/**
* Name of the feature toggle that controls the OAK-12212 fix, see
* {@link #FT_OAK_12212_SKIP_MISSING_FILE_CHECK}.
*/
public static final String FT_OAK_12212 = "FT_OAK-12212";

/**
* Kill switch for the OAK-12212 fix in {@link #writeSegment}.
* <p>
* When {@code false} (default), {@code writeSegment} skips the on-disk
* write and the corresponding {@code cacheSize} increment if the segment
* is already present on disk. Segments are immutable, so a redundant
* write would only produce identical bytes — but every such call used to
* increment {@code cacheSize} while {@code Files.move} silently replaced
* the file on POSIX systems, causing the in-memory counter to drift far
* above the actual cache directory size and above {@code maxCacheSizeBytes}.
* <p>
* Set to {@code true} via the {@link FeatureToggle} registered with the
* Whiteboard to revert to the pre-fix behaviour.
*/
public static final AtomicBoolean FT_OAK_12212_SKIP_MISSING_FILE_CHECK = new AtomicBoolean(false);

private final File directory;
private final long maxCacheSizeBytes;
private final DiskCacheIOMonitor diskCacheIOMonitor;
Expand Down Expand Up @@ -148,17 +171,27 @@ public void writeSegment(long msb, long lsb, Buffer buffer) {
Runnable task = () -> {
if (writesPending.add(segmentId)) {
try {
int fileSize;
try (FileChannel channel = new FileOutputStream(tempSegmentFile).getChannel()) {
fileSize = bufferCopy.write(channel);
}
try {
Files.move(tempSegmentFile.toPath(), segmentFile.toPath(), StandardCopyOption.ATOMIC_MOVE);
} catch (AtomicMoveNotSupportedException e) {
Files.move(tempSegmentFile.toPath(), segmentFile.toPath());
// OAK-12212: skip the on-disk write and the cacheSize
// increment when the segment is already on disk. Segments
// are immutable, so a redundant write would only rewrite
// identical bytes; the pre-fix behaviour still incremented
// cacheSize on every such call while Files.move silently
// replaced the file on POSIX systems, leaking phantom
// bytes into the in-memory counter on every redundant
// write. Guarded by FT_OAK-12212 (disabled = active fix).
if (FT_OAK_12212_SKIP_MISSING_FILE_CHECK.get() || !segmentFile.exists()) {
int fileSize;
try (FileChannel channel = new FileOutputStream(tempSegmentFile).getChannel()) {
fileSize = bufferCopy.write(channel);
}
try {
Files.move(tempSegmentFile.toPath(), segmentFile.toPath(), StandardCopyOption.ATOMIC_MOVE);
} catch (AtomicMoveNotSupportedException e) {
Files.move(tempSegmentFile.toPath(), segmentFile.toPath());
}
long cacheSizeAfter = cacheSize.addAndGet(fileSize);
diskCacheIOMonitor.updateCacheSize(cacheSizeAfter, fileSize);
}
long cacheSizeAfter = cacheSize.addAndGet(fileSize);
diskCacheIOMonitor.updateCacheSize(cacheSizeAfter, fileSize);
} catch (Exception e) {
logger.error("Error writing segment {} to cache", segmentId, e);
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
import org.apache.jackrabbit.oak.segment.spi.monitor.RoleStatisticsProvider;
import org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache.PersistentCache;
import org.apache.jackrabbit.oak.spi.toggle.FeatureToggle;
import org.apache.jackrabbit.oak.spi.whiteboard.Registration;
import org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils;
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
Expand All @@ -38,6 +39,7 @@

import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.Hashtable;

@Component(
Expand Down Expand Up @@ -93,6 +95,12 @@ private PersistentCache createPersistentCache(Configuration configuration, Close
PersistentDiskCache persistentDiskCache = new PersistentDiskCache(new File(configuration.diskCacheDirectory()), configuration.diskCacheMaxSizeMB(), diskCacheIOMonitor);
closer.register(persistentDiskCache);

// OAK-12212: expose the kill switch for the cacheSize-accounting
// fix so it can be flipped at runtime via the Whiteboard.
registerCloseable(osgiWhiteboard.register(FeatureToggle.class,
new FeatureToggle(PersistentDiskCache.FT_OAK_12212, PersistentDiskCache.FT_OAK_12212_SKIP_MISSING_FILE_CHECK),
Collections.emptyMap()));

CacheStatsMBean diskCacheStatsMBean = persistentDiskCache.getCacheStats();
registerCloseable(registerMBean(CacheStatsMBean.class, diskCacheStatsMBean, CacheStats.TYPE, diskCacheStatsMBean.getName()));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
@Version("2.0.0")
@Version("2.1.0")
package org.apache.jackrabbit.oak.segment.remote.persistentcache;

import org.osgi.annotation.versioning.Version;
Loading
Loading