diff --git a/test/collection/schema.py b/test/collection/schema.py index 7ae2ab8d4..1f4e808da 100644 --- a/test/collection/schema.py +++ b/test/collection/schema.py @@ -1,6 +1,43 @@ from typing import Literal, Optional +def single_vector_schema(vector_index_type: Literal["hnsw", "flat", "dynamic"] = "flat") -> dict: + """A legacy (single, unnamed vector) collection schema as returned by Weaviate.""" + return { + "class": "Something", + "invertedIndexConfig": { + "bm25": {"b": 0.75, "k1": 1.2}, + "cleanupIntervalSeconds": 60, + "stopwords": {"additions": None, "preset": "en", "removals": None}, + }, + "multiTenancyConfig": { + "autoTenantActivation": False, + "autoTenantCreation": False, + "enabled": False, + }, + "properties": [ + { + "dataType": ["text"], + "indexFilterable": True, + "indexRangeFilters": False, + "indexSearchable": True, + "name": "name", + "tokenization": "word", + } + ], + "replicationConfig": {"asyncEnabled": False, "factor": 1}, + "vectorIndexConfig": { + "vectorCacheMaxObjects": 1000000000000, + "distance": "cosine", + "pq": {"enabled": False}, + "bq": {"enabled": False}, + "sq": {"enabled": False}, + "rq": {"enabled": False}, + }, + "vectorIndexType": vector_index_type, + } + + def multi_vector_schema(quantizer: Optional[Literal["pq", "bq", "sq", "rq"]] = None) -> dict: return { "class": "Something", diff --git a/test/collection/test_config_update.py b/test/collection/test_config_update.py index 680337291..fd780ec8b 100644 --- a/test/collection/test_config_update.py +++ b/test/collection/test_config_update.py @@ -1,6 +1,6 @@ import pytest -from test.collection.schema import multi_vector_schema +from test.collection.schema import multi_vector_schema, single_vector_schema from weaviate.collections.classes.config import ( Reconfigure, _CollectionConfigUpdate, @@ -160,3 +160,84 @@ def test_replication_async_config_reset_all_fields() -> None: ) result = update.merge_with_existing(schema) assert result["asyncConfig"] == {} + + +# Regression tests for https://github.com/weaviate/weaviate-python-client/issues/1277 +# Changing the vector index *type* of an existing collection is server-side immutable. Previously +# such an attempt was silently ignored, leaving users to believe the change had been applied. + + +def test_changing_vector_index_type_single_vector_raises() -> None: + """Reproduces #1277: updating a flat index to a dynamic index must raise, not silently no-op.""" + schema = single_vector_schema("flat") + update = _CollectionConfigUpdate( + vectorizer_config=Reconfigure.VectorIndex.dynamic(), + ) + with pytest.raises(WeaviateInvalidInputError, match="immutable"): + update.merge_with_existing(schema) + + +def test_changing_vector_index_type_via_vector_index_config_raises() -> None: + """The deprecated `vector_index_config` argument must also reject a type change.""" + schema = single_vector_schema("flat") + update = _CollectionConfigUpdate( + vector_index_config=Reconfigure.VectorIndex.hnsw(), + ) + with pytest.raises(WeaviateInvalidInputError, match="immutable"): + update.merge_with_existing(schema) + + +def test_same_vector_index_type_single_vector_still_updates() -> None: + """A normal, allowed update to the same (flat) index type still merges successfully.""" + schema = single_vector_schema("flat") + update = _CollectionConfigUpdate( + vectorizer_config=Reconfigure.VectorIndex.flat(vector_cache_max_objects=42), + ) + new_schema = update.merge_with_existing(schema) + assert new_schema["vectorIndexType"] == "flat" + assert new_schema["vectorIndexConfig"]["vectorCacheMaxObjects"] == 42 + + +def test_changing_vector_index_type_named_vector_raises() -> None: + """Changing an existing named (hnsw) vector to flat must raise rather than silently no-op.""" + schema = multi_vector_schema() + update = _CollectionConfigUpdate( + vectorizer_config=[ + Reconfigure.NamedVectors.update( + name="boi", + vector_index_config=Reconfigure.VectorIndex.flat(), + ) + ] + ) + with pytest.raises(WeaviateInvalidInputError, match="immutable"): + update.merge_with_existing(schema) + + +def test_changing_vector_index_type_vector_config_raises() -> None: + """Changing an existing (hnsw) vector to dynamic via `vector_config` must raise.""" + schema = multi_vector_schema() + update = _CollectionConfigUpdate( + vector_config=Reconfigure.Vectors.update( + name="boi", + vector_index_config=Reconfigure.VectorIndex.dynamic(), + ) + ) + with pytest.raises(WeaviateInvalidInputError, match="immutable"): + update.merge_with_existing(schema) + + +def test_same_vector_index_type_named_vector_still_updates() -> None: + """A normal, allowed update to the same (hnsw) named vector index still merges successfully.""" + schema = multi_vector_schema() + update = _CollectionConfigUpdate( + vectorizer_config=[ + Reconfigure.NamedVectors.update( + name="boi", + vector_index_config=Reconfigure.VectorIndex.hnsw(ef=128), + ) + ] + ) + new_schema = update.merge_with_existing(schema) + assert new_schema["vectorConfig"]["boi"]["vectorIndexType"] == "hnsw" + assert new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["ef"] == 128 + assert new_schema["vectorConfig"]["yeh"] == schema["vectorConfig"]["yeh"] diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 0f6d974c0..f49109944 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1468,6 +1468,29 @@ def mutual_exclusivity( ) return v + @staticmethod + def __check_vector_index_type( + update: _VectorIndexConfigUpdate, + existing_vector_index_type: Optional[str], + ) -> None: + """Raise if an update would change the (immutable) vector index type of a collection. + + The vector index type (e.g. ``hnsw``, ``flat``, ``dynamic``) cannot be changed after a + collection has been created. Previously such an attempt was silently ignored, leaving the + user to believe the change had been applied. See https://github.com/weaviate/weaviate-python-client/issues/1277. + """ + if existing_vector_index_type is None: + return None + requested_vector_index_type = update.vector_index_type().value + if requested_vector_index_type != existing_vector_index_type: + raise WeaviateInvalidInputError( + f"Cannot update the vector index type of a collection from " + f"'{existing_vector_index_type}' to '{requested_vector_index_type}'. " + "The vector index type is immutable. To change it you must recreate the " + "collection with the desired vector index type and reindex its objects." + ) + return None + def __check_quantizers( self, quantizer: Optional[_QuantizerConfigUpdate], @@ -1544,6 +1567,7 @@ def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: schema.get("objectTTLConfig", {}) ) if self.vectorIndexConfig is not None: + self.__check_vector_index_type(self.vectorIndexConfig, schema.get("vectorIndexType")) self.__check_quantizers(self.vectorIndexConfig.quantizer, schema["vectorIndexConfig"]) schema["vectorIndexConfig"] = self.vectorIndexConfig.merge_with_existing( schema["vectorIndexConfig"] @@ -1572,6 +1596,7 @@ def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: ) if self.vectorizerConfig is not None: if isinstance(self.vectorizerConfig, _VectorIndexConfigUpdate): + self.__check_vector_index_type(self.vectorizerConfig, schema.get("vectorIndexType")) self.__check_quantizers( self.vectorizerConfig.quantizer, schema["vectorIndexConfig"] ) @@ -1584,6 +1609,10 @@ def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: raise WeaviateInvalidInputError( f"Vector config with name {vc.name} does not exist in the existing vector config" ) + self.__check_vector_index_type( + vc.vectorIndexConfig, + schema["vectorConfig"][vc.name].get("vectorIndexType"), + ) self.__check_quantizers( vc.vectorIndexConfig.quantizer, schema["vectorConfig"][vc.name]["vectorIndexConfig"], @@ -1607,6 +1636,10 @@ def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: raise WeaviateInvalidInputError( f"Vector config with name {vc.name} does not exist in the existing vector config" ) + self.__check_vector_index_type( + vc.vectorIndexConfig, + schema["vectorConfig"][vc.name].get("vectorIndexType"), + ) self.__check_quantizers( vc.vectorIndexConfig.quantizer, schema["vectorConfig"][vc.name]["vectorIndexConfig"],