diff --git a/changelog/unreleased/fix-s3-restore-missing-directory-marker.yml b/changelog/unreleased/fix-s3-restore-missing-directory-marker.yml new file mode 100644 index 000000000000..fbadc3d512f3 --- /dev/null +++ b/changelog/unreleased/fix-s3-restore-missing-directory-marker.yml @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Fix restore failure when S3 backup has no directory marker objects for configset +type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Samuel Verstraete +links: + - name: GitHub PR #4182 + url: https://github.com/apache/solr/pull/4182 diff --git a/changelog/unreleased/fix-s3-restore-virtual-directory-detection.yml b/changelog/unreleased/fix-s3-restore-virtual-directory-detection.yml new file mode 100644 index 000000000000..52ba2f614120 --- /dev/null +++ b/changelog/unreleased/fix-s3-restore-virtual-directory-detection.yml @@ -0,0 +1,5 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Fix S3 restore failure (NoSuchKey 404) when configset subdirectory markers were removed by aws s3 sync +type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Samuel Verstraete diff --git a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java index 403dd589b0f9..6504d9f97ad2 100644 --- a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java +++ b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java @@ -258,7 +258,9 @@ public void uploadConfigDir( String sourceConfigName, String targetConfigName, ConfigSetService configSetService) throws IOException { URI source = repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR, sourceConfigName); - if (!repository.exists(source)) { + // Use both exists() and listAll() to handle object stores (e.g. S3) where a "directory" + // may have no explicit marker object but still contain config files under that prefix. + if (!repository.exists(source) && repository.listAll(source).length == 0) { throw new IllegalArgumentException("Configset expected at " + source + " does not exist"); } uploadConfigToSolrCloud(configSetService, source, targetConfigName, ""); diff --git a/solr/core/src/test/org/apache/solr/core/backup/BackupManagerUploadConfigTest.java b/solr/core/src/test/org/apache/solr/core/backup/BackupManagerUploadConfigTest.java new file mode 100644 index 000000000000..a5c948a8aae1 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/core/backup/BackupManagerUploadConfigTest.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.core.backup; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.net.URI; +import java.nio.charset.StandardCharsets; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.core.ConfigSetService; +import org.apache.solr.core.backup.repository.BackupRepository; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Unit tests for {@link BackupManager#uploadConfigDir} covering the case where an object-store + * backup (e.g. S3) has no explicit directory marker but the config files are still present. + */ +public class BackupManagerUploadConfigTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void setUpClass() { + assumeWorkingMockito(); + } + + private static final URI BACKUP_PATH = URI.create("s3://bucket/backup/"); + private static final URI ZK_STATE_URI = URI.create("s3://bucket/backup/zk_backup/"); + private static final URI CONFIG_DIR_URI = + URI.create("s3://bucket/backup/zk_backup/configs/myconfig/"); + private static final URI SOLRCONFIG_URI = + URI.create("s3://bucket/backup/zk_backup/configs/myconfig/solrconfig.xml"); + + /** + * When the directory marker object is absent (S3 "no-marker" scenario) but config files exist + * under the prefix, {@code uploadConfigDir} must succeed and upload those files. + * + *
This is the regression test for the case where a backup was copied through a local
+ * filesystem and re-uploaded to S3, losing the zero-byte directory marker objects in the process.
+ */
+ @Test
+ public void testUploadConfigDir_withoutDirectoryMarker_succeedsWhenFilesPresent()
+ throws Exception {
+ BackupRepository mockRepo = mock(BackupRepository.class);
+ ZkStateReader mockZkStateReader = mock(ZkStateReader.class);
+ ConfigSetService mockConfigSetService = mock(ConfigSetService.class);
+
+ // Simulate S3: no marker object → exists() is false, but listAll() finds the files
+ when(mockRepo.exists(CONFIG_DIR_URI)).thenReturn(false);
+ when(mockRepo.listAll(CONFIG_DIR_URI)).thenReturn(new String[] {"solrconfig.xml"});
+
+ // resolveDirectory chain used by getZkStateDir() and uploadConfigDir()
+ when(mockRepo.resolveDirectory(BACKUP_PATH, BackupManager.ZK_STATE_DIR))
+ .thenReturn(ZK_STATE_URI);
+ when(mockRepo.resolveDirectory(ZK_STATE_URI, BackupManager.CONFIG_STATE_DIR, "myconfig"))
+ .thenReturn(CONFIG_DIR_URI);
+
+ // resolve() used by uploadConfigToSolrCloud() to build the per-file URI
+ when(mockRepo.resolve(CONFIG_DIR_URI, "solrconfig.xml")).thenReturn(SOLRCONFIG_URI);
+ when(mockRepo.getPathType(SOLRCONFIG_URI)).thenReturn(BackupRepository.PathType.FILE);
+
+ // Return minimal valid XML bytes so FileTypeMagicUtil does not reject the file
+ byte[] configBytes = "
A path is considered a directory if either (a) an explicit directory marker object exists + * (i.e. a zero-byte object with key {@code path + "/"} and content-type {@code + * application/x-directory}), or (b) no such marker exists but at least one object exists whose + * key starts with {@code path + "/"} (a "virtual directory"). Case (b) arises after an {@code aws + * s3 sync} round-trip, which skips directory marker objects, causing them to be absent in the + * target bucket. + * * @param path to File/Directory in S3. * @return true if path is directory, otherwise false. */ @@ -329,7 +336,30 @@ boolean isDirectory(String path) throws S3Exception { return StrUtils.isNotNullOrEmpty(contentType) && contentType.equalsIgnoreCase(S3_DIR_CONTENT_TYPE); } catch (NoSuchKeyException nske) { - return false; + // No explicit directory marker found; fall back to checking whether any objects exist under + // this prefix. This handles "virtual directories" that arise after an 'aws s3 sync' + // round-trip, which does not preserve directory marker objects (keys ending with '/'). + return hasChildrenUnderPrefix(s3Path); + } catch (SdkException sdke) { + throw handleAmazonException(sdke); + } + } + + /** + * Returns {@code true} if at least one object exists in S3 whose key starts with {@code dirPath}. + * + *
Used as a fallback in {@link #isDirectory} when no explicit directory marker object is
+ * present.
+ *
+ * @param dirPath directory path with a trailing {@code /} (as produced by {@link
+ * #sanitizedDirPath})
+ */
+ private boolean hasChildrenUnderPrefix(String dirPath) throws S3Exception {
+ try {
+ return !s3Client
+ .listObjectsV2(b -> b.bucket(bucketName).prefix(dirPath).maxKeys(1))
+ .contents()
+ .isEmpty();
} catch (SdkException sdke) {
throw handleAmazonException(sdke);
}
diff --git a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3PathsTest.java b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3PathsTest.java
index d314855c3b1b..9fd1ecf9a844 100644
--- a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3PathsTest.java
+++ b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3PathsTest.java
@@ -163,6 +163,39 @@ public void testDeletePrefix() throws S3Exception {
assertTrue(client.pathExists("/my-file2"));
}
+ /**
+ * Test that a directory containing objects but lacking an explicit directory marker (i.e. a
+ * "virtual directory") is still recognised as a directory. This mirrors the situation that arises
+ * after an {@code aws s3 sync} round-trip: {@code aws s3 sync} skips directory marker objects
+ * (keys ending with {@code /}) when downloading, so they are never re-uploaded and are absent in
+ * the target bucket.
+ */
+ @Test
+ public void testVirtualDirectoryWithoutMarker() throws S3Exception {
+ // Set up a normal directory tree with markers and content.
+ client.createDirectory("/virtual-dir");
+ pushContent("/virtual-dir/file1", "file1");
+ client.createDirectory("/virtual-dir/sub-dir");
+ pushContent("/virtual-dir/sub-dir/file2", "file2");
+
+ // Simulate the post-sync state by deleting just the directory marker objects.
+ // deleteObjects(Collection, int) accepts raw keys, bypassing sanitizedFilePath checks.
+ client.deleteObjects(List.of("virtual-dir/", "virtual-dir/sub-dir/"), 100);
+
+ // The paths should still be considered directories via the virtual-directory fallback.
+ assertTrue(
+ "A path with objects under it should be considered a virtual directory",
+ client.isDirectory("/virtual-dir"));
+ assertTrue(
+ "A nested path with objects under it should be considered a virtual directory",
+ client.isDirectory("/virtual-dir/sub-dir"));
+
+ // A path with neither a marker nor any objects underneath must not be a directory.
+ assertFalse(
+ "A path with no objects and no marker should not be a directory",
+ client.isDirectory("/virtual-dir/empty-sub-dir"));
+ }
+
/** Check listing objects of a directory. */
@Test
public void testListDir() throws S3Exception {
diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc
index e6fa3e4d4039..5a53da952472 100644
--- a/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc
+++ b/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc
@@ -672,6 +672,14 @@ The location must already exist within your S3 bucket before you can perform any
Empty Location::
If you do not want to use a sub-directory within your bucket to store your backup, you can use any of the following location options: `/`, `s3:/`, `s3://`.
However the location option is mandatory and you will receive an error when trying to perform backup operations without it.
+
+Directory Markers::
+S3 has no native concept of directories.
+The `S3BackupRepository` simulates directories by uploading zero-byte marker objects (e.g., `zk_backup/configs/myconfig/`) alongside the real backup files.
+If a backup is copied through a local filesystem and re-uploaded to S3 (for example using `aws s3 sync`), these zero-byte marker objects are typically lost because most tools translate them into real filesystem directories and do not recreate them on re-upload.
++
+Solr handles this gracefully: if the directory marker is absent, Solr falls back to listing objects by key prefix to confirm that the configset files are present before restoring them.
+A restore will still fail with a clear error only if neither the marker nor any configset files can be found at the expected location.
====
An example configuration to enable S3 backups and restore can be seen below:
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
index c9199fdafff2..9631f229553b 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
@@ -1663,6 +1663,11 @@ private CompletableFuture