diff --git a/changelog/unreleased/fix-s3-restore-missing-directory-marker.yml b/changelog/unreleased/fix-s3-restore-missing-directory-marker.yml new file mode 100644 index 000000000000..fbadc3d512f3 --- /dev/null +++ b/changelog/unreleased/fix-s3-restore-missing-directory-marker.yml @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Fix restore failure when S3 backup has no directory marker objects for configset +type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Samuel Verstraete +links: + - name: GitHub PR #4182 + url: https://github.com/apache/solr/pull/4182 diff --git a/changelog/unreleased/fix-s3-restore-virtual-directory-detection.yml b/changelog/unreleased/fix-s3-restore-virtual-directory-detection.yml new file mode 100644 index 000000000000..52ba2f614120 --- /dev/null +++ b/changelog/unreleased/fix-s3-restore-virtual-directory-detection.yml @@ -0,0 +1,5 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Fix S3 restore failure (NoSuchKey 404) when configset subdirectory markers were removed by aws s3 sync +type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Samuel Verstraete diff --git a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java index 403dd589b0f9..6504d9f97ad2 100644 --- a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java +++ b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java @@ -258,7 +258,9 @@ public void uploadConfigDir( String sourceConfigName, String targetConfigName, ConfigSetService configSetService) throws IOException { URI source = repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR, sourceConfigName); - if (!repository.exists(source)) { + // Use both exists() and listAll() to handle object stores (e.g. S3) where a "directory" + // may have no explicit marker object but still contain config files under that prefix. + if (!repository.exists(source) && repository.listAll(source).length == 0) { throw new IllegalArgumentException("Configset expected at " + source + " does not exist"); } uploadConfigToSolrCloud(configSetService, source, targetConfigName, ""); diff --git a/solr/core/src/test/org/apache/solr/core/backup/BackupManagerUploadConfigTest.java b/solr/core/src/test/org/apache/solr/core/backup/BackupManagerUploadConfigTest.java new file mode 100644 index 000000000000..a5c948a8aae1 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/core/backup/BackupManagerUploadConfigTest.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.core.backup; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.net.URI; +import java.nio.charset.StandardCharsets; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.core.ConfigSetService; +import org.apache.solr.core.backup.repository.BackupRepository; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Unit tests for {@link BackupManager#uploadConfigDir} covering the case where an object-store + * backup (e.g. S3) has no explicit directory marker but the config files are still present. + */ +public class BackupManagerUploadConfigTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void setUpClass() { + assumeWorkingMockito(); + } + + private static final URI BACKUP_PATH = URI.create("s3://bucket/backup/"); + private static final URI ZK_STATE_URI = URI.create("s3://bucket/backup/zk_backup/"); + private static final URI CONFIG_DIR_URI = + URI.create("s3://bucket/backup/zk_backup/configs/myconfig/"); + private static final URI SOLRCONFIG_URI = + URI.create("s3://bucket/backup/zk_backup/configs/myconfig/solrconfig.xml"); + + /** + * When the directory marker object is absent (S3 "no-marker" scenario) but config files exist + * under the prefix, {@code uploadConfigDir} must succeed and upload those files. + * + *

This is the regression test for the case where a backup was copied through a local + * filesystem and re-uploaded to S3, losing the zero-byte directory marker objects in the process. + */ + @Test + public void testUploadConfigDir_withoutDirectoryMarker_succeedsWhenFilesPresent() + throws Exception { + BackupRepository mockRepo = mock(BackupRepository.class); + ZkStateReader mockZkStateReader = mock(ZkStateReader.class); + ConfigSetService mockConfigSetService = mock(ConfigSetService.class); + + // Simulate S3: no marker object → exists() is false, but listAll() finds the files + when(mockRepo.exists(CONFIG_DIR_URI)).thenReturn(false); + when(mockRepo.listAll(CONFIG_DIR_URI)).thenReturn(new String[] {"solrconfig.xml"}); + + // resolveDirectory chain used by getZkStateDir() and uploadConfigDir() + when(mockRepo.resolveDirectory(BACKUP_PATH, BackupManager.ZK_STATE_DIR)) + .thenReturn(ZK_STATE_URI); + when(mockRepo.resolveDirectory(ZK_STATE_URI, BackupManager.CONFIG_STATE_DIR, "myconfig")) + .thenReturn(CONFIG_DIR_URI); + + // resolve() used by uploadConfigToSolrCloud() to build the per-file URI + when(mockRepo.resolve(CONFIG_DIR_URI, "solrconfig.xml")).thenReturn(SOLRCONFIG_URI); + when(mockRepo.getPathType(SOLRCONFIG_URI)).thenReturn(BackupRepository.PathType.FILE); + + // Return minimal valid XML bytes so FileTypeMagicUtil does not reject the file + byte[] configBytes = "".getBytes(StandardCharsets.UTF_8); + IndexInput mockInput = mock(IndexInput.class); + when(mockInput.length()).thenReturn((long) configBytes.length); + doAnswer( + invocation -> { + byte[] dest = invocation.getArgument(0); + int offset = invocation.getArgument(1); + System.arraycopy(configBytes, 0, dest, offset, configBytes.length); + return null; + }) + .when(mockInput) + .readBytes(any(), eq(0), eq(configBytes.length)); + when(mockRepo.openInput(CONFIG_DIR_URI, "solrconfig.xml", IOContext.DEFAULT)) + .thenReturn(mockInput); + + BackupManager backupManager = BackupManager.forBackup(mockRepo, mockZkStateReader, BACKUP_PATH); + + // Must not throw even though exists() returned false + backupManager.uploadConfigDir("myconfig", "restoredConfig", mockConfigSetService); + + // The config file must have been uploaded to the target configset + verify(mockConfigSetService) + .uploadFileToConfig( + eq("restoredConfig"), eq("solrconfig.xml"), any(byte[].class), eq(false)); + } + + /** + * When neither a directory marker nor any files exist under the config prefix, {@code + * uploadConfigDir} must throw {@link IllegalArgumentException}. + */ + @Test + public void testUploadConfigDir_throwsWhenTrulyAbsent() throws Exception { + BackupRepository mockRepo = mock(BackupRepository.class); + ZkStateReader mockZkStateReader = mock(ZkStateReader.class); + ConfigSetService mockConfigSetService = mock(ConfigSetService.class); + + URI missingConfigDir = URI.create("s3://bucket/backup/zk_backup/configs/nonexistent/"); + + when(mockRepo.exists(missingConfigDir)).thenReturn(false); + when(mockRepo.listAll(missingConfigDir)).thenReturn(new String[0]); + + when(mockRepo.resolveDirectory(BACKUP_PATH, BackupManager.ZK_STATE_DIR)) + .thenReturn(ZK_STATE_URI); + when(mockRepo.resolveDirectory(ZK_STATE_URI, BackupManager.CONFIG_STATE_DIR, "nonexistent")) + .thenReturn(missingConfigDir); + + BackupManager backupManager = BackupManager.forBackup(mockRepo, mockZkStateReader, BACKUP_PATH); + + assertThrows( + IllegalArgumentException.class, + () -> backupManager.uploadConfigDir("nonexistent", "target", mockConfigSetService)); + } + + /** + * When the directory marker IS present (the normal case), {@code uploadConfigDir} uses the fast + * path via {@code exists()} and must still upload files correctly. + */ + @Test + public void testUploadConfigDir_withDirectoryMarker_succeedsNormally() throws Exception { + BackupRepository mockRepo = mock(BackupRepository.class); + ZkStateReader mockZkStateReader = mock(ZkStateReader.class); + ConfigSetService mockConfigSetService = mock(ConfigSetService.class); + + // Marker present → exists() returns true, listAll() should not be called + when(mockRepo.exists(CONFIG_DIR_URI)).thenReturn(true); + when(mockRepo.listAll(CONFIG_DIR_URI)).thenReturn(new String[] {"solrconfig.xml"}); + + when(mockRepo.resolveDirectory(BACKUP_PATH, BackupManager.ZK_STATE_DIR)) + .thenReturn(ZK_STATE_URI); + when(mockRepo.resolveDirectory(ZK_STATE_URI, BackupManager.CONFIG_STATE_DIR, "myconfig")) + .thenReturn(CONFIG_DIR_URI); + + when(mockRepo.resolve(CONFIG_DIR_URI, "solrconfig.xml")).thenReturn(SOLRCONFIG_URI); + when(mockRepo.getPathType(SOLRCONFIG_URI)).thenReturn(BackupRepository.PathType.FILE); + + byte[] configBytes = "".getBytes(StandardCharsets.UTF_8); + IndexInput mockInput = mock(IndexInput.class); + when(mockInput.length()).thenReturn((long) configBytes.length); + doAnswer( + invocation -> { + byte[] dest = invocation.getArgument(0); + int offset = invocation.getArgument(1); + System.arraycopy(configBytes, 0, dest, offset, configBytes.length); + return null; + }) + .when(mockInput) + .readBytes(any(), eq(0), eq(configBytes.length)); + when(mockRepo.openInput(CONFIG_DIR_URI, "solrconfig.xml", IOContext.DEFAULT)) + .thenReturn(mockInput); + + BackupManager backupManager = BackupManager.forBackup(mockRepo, mockZkStateReader, BACKUP_PATH); + + backupManager.uploadConfigDir("myconfig", "restoredConfig", mockConfigSetService); + + verify(mockConfigSetService) + .uploadFileToConfig( + eq("restoredConfig"), eq("solrconfig.xml"), any(byte[].class), eq(false)); + } +} diff --git a/solr/modules/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java b/solr/modules/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java index 0add42da2d6b..2f3c2a6329d7 100644 --- a/solr/modules/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java +++ b/solr/modules/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java @@ -315,6 +315,13 @@ boolean pathExists(String path) throws S3Exception { /** * Check if path is directory. * + *

A path is considered a directory if either (a) an explicit directory marker object exists + * (i.e. a zero-byte object with key {@code path + "/"} and content-type {@code + * application/x-directory}), or (b) no such marker exists but at least one object exists whose + * key starts with {@code path + "/"} (a "virtual directory"). Case (b) arises after an {@code aws + * s3 sync} round-trip, which skips directory marker objects, causing them to be absent in the + * target bucket. + * * @param path to File/Directory in S3. * @return true if path is directory, otherwise false. */ @@ -329,7 +336,30 @@ boolean isDirectory(String path) throws S3Exception { return StrUtils.isNotNullOrEmpty(contentType) && contentType.equalsIgnoreCase(S3_DIR_CONTENT_TYPE); } catch (NoSuchKeyException nske) { - return false; + // No explicit directory marker found; fall back to checking whether any objects exist under + // this prefix. This handles "virtual directories" that arise after an 'aws s3 sync' + // round-trip, which does not preserve directory marker objects (keys ending with '/'). + return hasChildrenUnderPrefix(s3Path); + } catch (SdkException sdke) { + throw handleAmazonException(sdke); + } + } + + /** + * Returns {@code true} if at least one object exists in S3 whose key starts with {@code dirPath}. + * + *

Used as a fallback in {@link #isDirectory} when no explicit directory marker object is + * present. + * + * @param dirPath directory path with a trailing {@code /} (as produced by {@link + * #sanitizedDirPath}) + */ + private boolean hasChildrenUnderPrefix(String dirPath) throws S3Exception { + try { + return !s3Client + .listObjectsV2(b -> b.bucket(bucketName).prefix(dirPath).maxKeys(1)) + .contents() + .isEmpty(); } catch (SdkException sdke) { throw handleAmazonException(sdke); } diff --git a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3PathsTest.java b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3PathsTest.java index d314855c3b1b..9fd1ecf9a844 100644 --- a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3PathsTest.java +++ b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3PathsTest.java @@ -163,6 +163,39 @@ public void testDeletePrefix() throws S3Exception { assertTrue(client.pathExists("/my-file2")); } + /** + * Test that a directory containing objects but lacking an explicit directory marker (i.e. a + * "virtual directory") is still recognised as a directory. This mirrors the situation that arises + * after an {@code aws s3 sync} round-trip: {@code aws s3 sync} skips directory marker objects + * (keys ending with {@code /}) when downloading, so they are never re-uploaded and are absent in + * the target bucket. + */ + @Test + public void testVirtualDirectoryWithoutMarker() throws S3Exception { + // Set up a normal directory tree with markers and content. + client.createDirectory("/virtual-dir"); + pushContent("/virtual-dir/file1", "file1"); + client.createDirectory("/virtual-dir/sub-dir"); + pushContent("/virtual-dir/sub-dir/file2", "file2"); + + // Simulate the post-sync state by deleting just the directory marker objects. + // deleteObjects(Collection, int) accepts raw keys, bypassing sanitizedFilePath checks. + client.deleteObjects(List.of("virtual-dir/", "virtual-dir/sub-dir/"), 100); + + // The paths should still be considered directories via the virtual-directory fallback. + assertTrue( + "A path with objects under it should be considered a virtual directory", + client.isDirectory("/virtual-dir")); + assertTrue( + "A nested path with objects under it should be considered a virtual directory", + client.isDirectory("/virtual-dir/sub-dir")); + + // A path with neither a marker nor any objects underneath must not be a directory. + assertFalse( + "A path with no objects and no marker should not be a directory", + client.isDirectory("/virtual-dir/empty-sub-dir")); + } + /** Check listing objects of a directory. */ @Test public void testListDir() throws S3Exception { diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc index e6fa3e4d4039..5a53da952472 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc @@ -672,6 +672,14 @@ The location must already exist within your S3 bucket before you can perform any Empty Location:: If you do not want to use a sub-directory within your bucket to store your backup, you can use any of the following location options: `/`, `s3:/`, `s3://`. However the location option is mandatory and you will receive an error when trying to perform backup operations without it. + +Directory Markers:: +S3 has no native concept of directories. +The `S3BackupRepository` simulates directories by uploading zero-byte marker objects (e.g., `zk_backup/configs/myconfig/`) alongside the real backup files. +If a backup is copied through a local filesystem and re-uploaded to S3 (for example using `aws s3 sync`), these zero-byte marker objects are typically lost because most tools translate them into real filesystem directories and do not recreate them on re-upload. ++ +Solr handles this gracefully: if the directory marker is absent, Solr falls back to listing objects by key prefix to confirm that the configset files are present before restoring them. +A restore will still fail with a clear error only if neither the marker nor any configset files can be found at the expected location. ==== An example configuration to enable S3 backups and restore can be seen below: diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java index c9199fdafff2..9631f229553b 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java @@ -1663,6 +1663,11 @@ private CompletableFuture triggerCollectionRefresh(String collect DocCollection cached = cacheEntry == null ? null : cacheEntry.cached; return CompletableFuture.completedFuture(cached); } + // Remove any already-completed future that hasn't been cleaned up by whenCompleteAsync yet. + // Without this, computeIfAbsent below would reuse the stale done future instead of submitting + // a genuinely new refresh, causing callers (e.g. stale-state retry) to observe no new + // ref.get(). + collectionRefreshes.computeIfPresent(collection, (k, v) -> v.isDone() ? null : v); return collectionRefreshes.computeIfAbsent( collection, key -> {