Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
title: Fix restore failure when S3 backup has no directory marker objects for configset
type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other
authors:
- name: Samuel Verstraete
links:
- name: GitHub PR #4182
url: https://github.com/apache/solr/pull/4182
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
title: Fix S3 restore failure (NoSuchKey 404) when configset subdirectory markers were removed by aws s3 sync
type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other
authors:
- name: Samuel Verstraete
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,9 @@ public void uploadConfigDir(
String sourceConfigName, String targetConfigName, ConfigSetService configSetService)
throws IOException {
URI source = repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR, sourceConfigName);
if (!repository.exists(source)) {
// Use both exists() and listAll() to handle object stores (e.g. S3) where a "directory"
// may have no explicit marker object but still contain config files under that prefix.
if (!repository.exists(source) && repository.listAll(source).length == 0) {
throw new IllegalArgumentException("Configset expected at " + source + " does not exist");
}
uploadConfigToSolrCloud(configSetService, source, targetConfigName, "");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core.backup;

import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.net.URI;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.core.ConfigSetService;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.junit.BeforeClass;
import org.junit.Test;

/**
* Unit tests for {@link BackupManager#uploadConfigDir} covering the case where an object-store
* backup (e.g. S3) has no explicit directory marker but the config files are still present.
*/
public class BackupManagerUploadConfigTest extends SolrTestCaseJ4 {

@BeforeClass
public static void setUpClass() {
assumeWorkingMockito();
}

private static final URI BACKUP_PATH = URI.create("s3://bucket/backup/");
private static final URI ZK_STATE_URI = URI.create("s3://bucket/backup/zk_backup/");
private static final URI CONFIG_DIR_URI =
URI.create("s3://bucket/backup/zk_backup/configs/myconfig/");
private static final URI SOLRCONFIG_URI =
URI.create("s3://bucket/backup/zk_backup/configs/myconfig/solrconfig.xml");

/**
* When the directory marker object is absent (S3 "no-marker" scenario) but config files exist
* under the prefix, {@code uploadConfigDir} must succeed and upload those files.
*
* <p>This is the regression test for the case where a backup was copied through a local
* filesystem and re-uploaded to S3, losing the zero-byte directory marker objects in the process.
*/
@Test
public void testUploadConfigDir_withoutDirectoryMarker_succeedsWhenFilesPresent()
throws Exception {
BackupRepository mockRepo = mock(BackupRepository.class);
ZkStateReader mockZkStateReader = mock(ZkStateReader.class);
ConfigSetService mockConfigSetService = mock(ConfigSetService.class);

// Simulate S3: no marker object → exists() is false, but listAll() finds the files
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe just me, but I find mocks hard to understand. We have S3Mock as a dependency, could we be using it for these tests?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll have a look into this. I missed that.

when(mockRepo.exists(CONFIG_DIR_URI)).thenReturn(false);
when(mockRepo.listAll(CONFIG_DIR_URI)).thenReturn(new String[] {"solrconfig.xml"});

// resolveDirectory chain used by getZkStateDir() and uploadConfigDir()
when(mockRepo.resolveDirectory(BACKUP_PATH, BackupManager.ZK_STATE_DIR))
.thenReturn(ZK_STATE_URI);
when(mockRepo.resolveDirectory(ZK_STATE_URI, BackupManager.CONFIG_STATE_DIR, "myconfig"))
.thenReturn(CONFIG_DIR_URI);

// resolve() used by uploadConfigToSolrCloud() to build the per-file URI
when(mockRepo.resolve(CONFIG_DIR_URI, "solrconfig.xml")).thenReturn(SOLRCONFIG_URI);
when(mockRepo.getPathType(SOLRCONFIG_URI)).thenReturn(BackupRepository.PathType.FILE);

// Return minimal valid XML bytes so FileTypeMagicUtil does not reject the file
byte[] configBytes = "<config/>".getBytes(StandardCharsets.UTF_8);
IndexInput mockInput = mock(IndexInput.class);
when(mockInput.length()).thenReturn((long) configBytes.length);
doAnswer(
invocation -> {
byte[] dest = invocation.getArgument(0);
int offset = invocation.getArgument(1);
System.arraycopy(configBytes, 0, dest, offset, configBytes.length);
return null;
})
.when(mockInput)
.readBytes(any(), eq(0), eq(configBytes.length));
when(mockRepo.openInput(CONFIG_DIR_URI, "solrconfig.xml", IOContext.DEFAULT))
.thenReturn(mockInput);

BackupManager backupManager = BackupManager.forBackup(mockRepo, mockZkStateReader, BACKUP_PATH);

// Must not throw even though exists() returned false
backupManager.uploadConfigDir("myconfig", "restoredConfig", mockConfigSetService);

// The config file must have been uploaded to the target configset
verify(mockConfigSetService)
.uploadFileToConfig(
eq("restoredConfig"), eq("solrconfig.xml"), any(byte[].class), eq(false));
}

/**
* When neither a directory marker nor any files exist under the config prefix, {@code
* uploadConfigDir} must throw {@link IllegalArgumentException}.
*/
@Test
public void testUploadConfigDir_throwsWhenTrulyAbsent() throws Exception {
BackupRepository mockRepo = mock(BackupRepository.class);
ZkStateReader mockZkStateReader = mock(ZkStateReader.class);
ConfigSetService mockConfigSetService = mock(ConfigSetService.class);

URI missingConfigDir = URI.create("s3://bucket/backup/zk_backup/configs/nonexistent/");

when(mockRepo.exists(missingConfigDir)).thenReturn(false);
when(mockRepo.listAll(missingConfigDir)).thenReturn(new String[0]);

when(mockRepo.resolveDirectory(BACKUP_PATH, BackupManager.ZK_STATE_DIR))
.thenReturn(ZK_STATE_URI);
when(mockRepo.resolveDirectory(ZK_STATE_URI, BackupManager.CONFIG_STATE_DIR, "nonexistent"))
.thenReturn(missingConfigDir);

BackupManager backupManager = BackupManager.forBackup(mockRepo, mockZkStateReader, BACKUP_PATH);

assertThrows(
IllegalArgumentException.class,
() -> backupManager.uploadConfigDir("nonexistent", "target", mockConfigSetService));
}

/**
* When the directory marker IS present (the normal case), {@code uploadConfigDir} uses the fast
* path via {@code exists()} and must still upload files correctly.
*/
@Test
public void testUploadConfigDir_withDirectoryMarker_succeedsNormally() throws Exception {
BackupRepository mockRepo = mock(BackupRepository.class);
ZkStateReader mockZkStateReader = mock(ZkStateReader.class);
ConfigSetService mockConfigSetService = mock(ConfigSetService.class);

// Marker present → exists() returns true, listAll() should not be called
when(mockRepo.exists(CONFIG_DIR_URI)).thenReturn(true);
when(mockRepo.listAll(CONFIG_DIR_URI)).thenReturn(new String[] {"solrconfig.xml"});

when(mockRepo.resolveDirectory(BACKUP_PATH, BackupManager.ZK_STATE_DIR))
.thenReturn(ZK_STATE_URI);
when(mockRepo.resolveDirectory(ZK_STATE_URI, BackupManager.CONFIG_STATE_DIR, "myconfig"))
.thenReturn(CONFIG_DIR_URI);

when(mockRepo.resolve(CONFIG_DIR_URI, "solrconfig.xml")).thenReturn(SOLRCONFIG_URI);
when(mockRepo.getPathType(SOLRCONFIG_URI)).thenReturn(BackupRepository.PathType.FILE);

byte[] configBytes = "<config/>".getBytes(StandardCharsets.UTF_8);
IndexInput mockInput = mock(IndexInput.class);
when(mockInput.length()).thenReturn((long) configBytes.length);
doAnswer(
invocation -> {
byte[] dest = invocation.getArgument(0);
int offset = invocation.getArgument(1);
System.arraycopy(configBytes, 0, dest, offset, configBytes.length);
return null;
})
.when(mockInput)
.readBytes(any(), eq(0), eq(configBytes.length));
when(mockRepo.openInput(CONFIG_DIR_URI, "solrconfig.xml", IOContext.DEFAULT))
.thenReturn(mockInput);

BackupManager backupManager = BackupManager.forBackup(mockRepo, mockZkStateReader, BACKUP_PATH);

backupManager.uploadConfigDir("myconfig", "restoredConfig", mockConfigSetService);

verify(mockConfigSetService)
.uploadFileToConfig(
eq("restoredConfig"), eq("solrconfig.xml"), any(byte[].class), eq(false));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,13 @@ boolean pathExists(String path) throws S3Exception {
/**
* Check if path is directory.
*
* <p>A path is considered a directory if either (a) an explicit directory marker object exists
* (i.e. a zero-byte object with key {@code path + "/"} and content-type {@code
* application/x-directory}), or (b) no such marker exists but at least one object exists whose
* key starts with {@code path + "/"} (a "virtual directory"). Case (b) arises after an {@code aws
* s3 sync} round-trip, which skips directory marker objects, causing them to be absent in the
* target bucket.
*
* @param path to File/Directory in S3.
* @return true if path is directory, otherwise false.
*/
Expand All @@ -329,7 +336,30 @@ boolean isDirectory(String path) throws S3Exception {
return StrUtils.isNotNullOrEmpty(contentType)
&& contentType.equalsIgnoreCase(S3_DIR_CONTENT_TYPE);
} catch (NoSuchKeyException nske) {
return false;
// No explicit directory marker found; fall back to checking whether any objects exist under
// this prefix. This handles "virtual directories" that arise after an 'aws s3 sync'
// round-trip, which does not preserve directory marker objects (keys ending with '/').
return hasChildrenUnderPrefix(s3Path);
} catch (SdkException sdke) {
throw handleAmazonException(sdke);
}
}

/**
* Returns {@code true} if at least one object exists in S3 whose key starts with {@code dirPath}.
*
* <p>Used as a fallback in {@link #isDirectory} when no explicit directory marker object is
* present.
*
* @param dirPath directory path with a trailing {@code /} (as produced by {@link
* #sanitizedDirPath})
*/
private boolean hasChildrenUnderPrefix(String dirPath) throws S3Exception {
try {
return !s3Client
.listObjectsV2(b -> b.bucket(bucketName).prefix(dirPath).maxKeys(1))
.contents()
.isEmpty();
} catch (SdkException sdke) {
throw handleAmazonException(sdke);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,39 @@ public void testDeletePrefix() throws S3Exception {
assertTrue(client.pathExists("/my-file2"));
}

/**
* Test that a directory containing objects but lacking an explicit directory marker (i.e. a
* "virtual directory") is still recognised as a directory. This mirrors the situation that arises
* after an {@code aws s3 sync} round-trip: {@code aws s3 sync} skips directory marker objects
* (keys ending with {@code /}) when downloading, so they are never re-uploaded and are absent in
* the target bucket.
*/
@Test
public void testVirtualDirectoryWithoutMarker() throws S3Exception {
// Set up a normal directory tree with markers and content.
client.createDirectory("/virtual-dir");
pushContent("/virtual-dir/file1", "file1");
client.createDirectory("/virtual-dir/sub-dir");
pushContent("/virtual-dir/sub-dir/file2", "file2");

// Simulate the post-sync state by deleting just the directory marker objects.
// deleteObjects(Collection, int) accepts raw keys, bypassing sanitizedFilePath checks.
client.deleteObjects(List.of("virtual-dir/", "virtual-dir/sub-dir/"), 100);

// The paths should still be considered directories via the virtual-directory fallback.
assertTrue(
"A path with objects under it should be considered a virtual directory",
client.isDirectory("/virtual-dir"));
assertTrue(
"A nested path with objects under it should be considered a virtual directory",
client.isDirectory("/virtual-dir/sub-dir"));

// A path with neither a marker nor any objects underneath must not be a directory.
assertFalse(
"A path with no objects and no marker should not be a directory",
client.isDirectory("/virtual-dir/empty-sub-dir"));
}

/** Check listing objects of a directory. */
@Test
public void testListDir() throws S3Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,14 @@ The location must already exist within your S3 bucket before you can perform any
Empty Location::
If you do not want to use a sub-directory within your bucket to store your backup, you can use any of the following location options: `/`, `s3:/`, `s3://`.
However the location option is mandatory and you will receive an error when trying to perform backup operations without it.

Directory Markers::
S3 has no native concept of directories.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there some better approach that we SHOULD be using instead of this zero-byte marker objects appraoch? If you were doing this today, is this how you would fix it? It feels like you are doing a bug fix because of a more fundamentally flawed approach? Or, is this the way we do these typs of things?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well... I got sucked into this without indeed looking at the bigger picture. I'm not saying it's absolutely wrong to work with PREMARKER blobs, but it would certainly not how I would implement it.
The whole 'check if a file exists' is an anti-pattern anyway when working with 'cloud storage'. As even to check this you actually have to pay for that requests. It's way better to just try and deal with the error in case there is one.
I'm gonna look if it's possible to implement the way the backups themselves are taken and basically completely forget about the whole PREMARKER stuff. This should still be backwards compatibly with current backups that people rely on. The only restriction is that I will have to look into how the whole storage abstraction and see if I can make it work for both local and cloud storage without violating their way of working.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome.

The `S3BackupRepository` simulates directories by uploading zero-byte marker objects (e.g., `zk_backup/configs/myconfig/`) alongside the real backup files.
If a backup is copied through a local filesystem and re-uploaded to S3 (for example using `aws s3 sync`), these zero-byte marker objects are typically lost because most tools translate them into real filesystem directories and do not recreate them on re-upload.
+
Solr handles this gracefully: if the directory marker is absent, Solr falls back to listing objects by key prefix to confirm that the configset files are present before restoring them.
A restore will still fail with a clear error only if neither the marker nor any configset files can be found at the expected location.
====

An example configuration to enable S3 backups and restore can be seen below:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1663,6 +1663,11 @@ private CompletableFuture<DocCollection> triggerCollectionRefresh(String collect
DocCollection cached = cacheEntry == null ? null : cacheEntry.cached;
return CompletableFuture.completedFuture(cached);
}
// Remove any already-completed future that hasn't been cleaned up by whenCompleteAsync yet.
// Without this, computeIfAbsent below would reuse the stale done future instead of submitting
// a genuinely new refresh, causing callers (e.g. stale-state retry) to observe no new
// ref.get().
collectionRefreshes.computeIfPresent(collection, (k, v) -> v.isDone() ? null : v);
return collectionRefreshes.computeIfAbsent(
collection,
key -> {
Expand Down