diff --git a/driver-sync/src/test/functional/com/mongodb/client/vector/AbstractAutomatedEmbeddingVectorSearchFunctionalTest.java b/driver-sync/src/test/functional/com/mongodb/client/vector/AbstractAutomatedEmbeddingVectorSearchFunctionalTest.java index 0331ed563c..756c899e4f 100644 --- a/driver-sync/src/test/functional/com/mongodb/client/vector/AbstractAutomatedEmbeddingVectorSearchFunctionalTest.java +++ b/driver-sync/src/test/functional/com/mongodb/client/vector/AbstractAutomatedEmbeddingVectorSearchFunctionalTest.java @@ -28,12 +28,15 @@ import org.bson.codecs.configuration.CodecRegistry; import org.bson.codecs.pojo.PojoCodecProvider; import org.bson.conversions.Bson; +import org.junit.Ignore; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.util.ArrayList; import java.util.Collections; @@ -44,6 +47,7 @@ import static com.mongodb.client.model.Aggregates.vectorSearch; import static com.mongodb.client.model.search.SearchPath.fieldPath; import static com.mongodb.client.model.search.VectorSearchOptions.approximateVectorSearchOptions; +import static com.mongodb.client.model.search.VectorSearchOptions.exactVectorSearchOptions; import static com.mongodb.client.model.search.VectorSearchQuery.textQuery; import static java.util.Arrays.asList; import static org.bson.codecs.configuration.CodecRegistries.fromProviders; @@ -210,6 +214,200 @@ private void insertDocumentsForEmbedding() { )); } + + @ParameterizedTest(name = "should create auto embedding index with {0} quantization") + @ValueSource(strings = {"float", "scalar", "binary", "binaryNoRescore"}) + void shouldCreateAutoEmbeddingIndexWithQuantization(final String quantization) { + final String indexName = INDEX_NAME + "_" + quantization; + mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + SearchIndexModel indexModel = new SearchIndexModel( + indexName, + new Document( + "fields", + Collections.singletonList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", FIELD_SEARCH_PATH) + .append("model", "voyage-4-large") + .append("quantization", quantization) + )), + SearchIndexType.vectorSearch() + ); + List result = documentCollection.createSearchIndexes(Collections.singletonList(indexModel)); + Assertions.assertFalse(result.isEmpty()); + } + + @Test + @DisplayName("should create auto embedding index with custom numDimensions") + @Ignore("Currently numDimensions can't be used, it fails with server error: 'Invalid numDimensions value for autoEmbed field in index: test_auto_embed. Expected an integer.'") + void shouldCreateAutoEmbeddingIndexWithCustomNumDimensions() { + mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + SearchIndexModel indexModel = new SearchIndexModel( + INDEX_NAME, + new Document( + "fields", + Collections.singletonList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", FIELD_SEARCH_PATH) + .append("model", "voyage-4-large") + .append("numDimensions", 512) + )), + SearchIndexType.vectorSearch() + ); + List result = documentCollection.createSearchIndexes(Collections.singletonList(indexModel)); + Assertions.assertFalse(result.isEmpty()); + } + + @Test + @DisplayName("should create auto embedding index with filter field") + void shouldCreateAutoEmbeddingIndexWithFilterField() { + mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + SearchIndexModel indexModel = new SearchIndexModel( + INDEX_NAME, + new Document( + "fields", + asList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", FIELD_SEARCH_PATH) + .append("model", "voyage-4-large"), + new Document("type", "filter") + .append("path", "director") + )), + SearchIndexType.vectorSearch() + ); + List result = documentCollection.createSearchIndexes(Collections.singletonList(indexModel)); + Assertions.assertFalse(result.isEmpty()); + } + + @Test + @DisplayName("should fail when mixing vector and autoEmbed types in the same index") + void shouldFailWhenMixingVectorAndAutoEmbedTypes() { + mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + SearchIndexModel indexModel = new SearchIndexModel( + INDEX_NAME, + new Document( + "fields", + asList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", FIELD_SEARCH_PATH) + .append("model", "voyage-4-large"), + new Document("type", "vector") + .append("path", "plot_embedding") + .append("numDimensions", 1024) + .append("similarity", "cosine") + )), + SearchIndexType.vectorSearch() + ); + Assertions.assertThrows( + MongoCommandException.class, + () -> documentCollection.createSearchIndexes(Collections.singletonList(indexModel)), + "Expected index creation to fail because vector and autoEmbed types cannot be mixed" + ); + } + + @Test + @DisplayName("should fail when duplicate paths are used") + void shouldFailWhenDuplicatePathsAreUsed() { + mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + SearchIndexModel indexModel = new SearchIndexModel( + INDEX_NAME, + new Document( + "fields", + asList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", FIELD_SEARCH_PATH) + .append("model", "voyage-4-large"), + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", FIELD_SEARCH_PATH) + .append("model", "voyage-4-large") + )), + SearchIndexType.vectorSearch() + ); + Assertions.assertThrows( + MongoCommandException.class, + () -> documentCollection.createSearchIndexes(Collections.singletonList(indexModel)), + "Expected index creation to fail because of duplicate paths" + ); + } + + @Test + @DisplayName("should fail when autoEmbed field is used as filter field") + void shouldFailWhenAutoEmbedFieldUsedAsFilterField() { + mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + SearchIndexModel indexModel = new SearchIndexModel( + INDEX_NAME, + new Document( + "fields", + asList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", FIELD_SEARCH_PATH) + .append("model", "voyage-4-large"), + new Document("type", "filter") + .append("path", FIELD_SEARCH_PATH) + )), + SearchIndexType.vectorSearch() + ); + Assertions.assertThrows( + MongoCommandException.class, + () -> documentCollection.createSearchIndexes(Collections.singletonList(indexModel)), + "Expected index creation to fail because autoEmbed field cannot be used as a filter field" + ); + } + + @Test + @DisplayName("should create auto embedding index and run query with model override") + void shouldCreateAutoEmbeddingIndexAndRunQueryWithModelOverride() throws InterruptedException { + mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + createAutoEmbeddingIndex("voyage-4-large"); + TimeUnit.SECONDS.sleep(2L); + insertDocumentsForEmbedding(); + TimeUnit.SECONDS.sleep(2L); + + List pipeline = asList( + vectorSearch( + fieldPath(FIELD_SEARCH_PATH), + textQuery("movies about love").model("voyage-4-large"), + INDEX_NAME, + 5L, + approximateVectorSearchOptions(5L) + ) + ); + List documents = documentCollection.aggregate(pipeline).into(new ArrayList<>()); + + Assertions.assertFalse(documents.isEmpty(), "Expected to get some results from vector search query"); + Assertions.assertEquals(MOVIE_NAME, documents.get(0).getString("title")); + } + + @Test + @DisplayName("should create auto embedding index and run exact vector search query") + void shouldCreateAutoEmbeddingIndexAndRunExactVectorSearchQuery() throws InterruptedException { + mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + createAutoEmbeddingIndex("voyage-4-large"); + TimeUnit.SECONDS.sleep(2L); + insertDocumentsForEmbedding(); + TimeUnit.SECONDS.sleep(2L); + + List pipeline = asList( + vectorSearch( + fieldPath(FIELD_SEARCH_PATH), + textQuery("movies about love"), + INDEX_NAME, + 5L, + exactVectorSearchOptions() + ) + ); + List documents = documentCollection.aggregate(pipeline).into(new ArrayList<>()); + + Assertions.assertFalse(documents.isEmpty(), "Expected to get some results from exact vector search query"); + Assertions.assertEquals(MOVIE_NAME, documents.get(0).getString("title")); + } + private void createAutoEmbeddingIndex(final String modelName) { SearchIndexModel indexModel = new SearchIndexModel( INDEX_NAME,