From 2ac251f362e1af22273539be665aa347a0b5cbc0 Mon Sep 17 00:00:00 2001 From: peopleig Date: Tue, 14 Apr 2026 01:58:37 +0530 Subject: [PATCH 1/4] Update Python client to support batch insert and search --- client/python/USAGE.md | 60 +++++++ client/python/examples/async_usage.py | 2 +- client/python/examples/batch_insert_usage.py | 39 +++++ client/python/examples/search_query_usage.py | 69 ++++++++ client/python/pyproject.toml | 6 +- client/python/tests/test_client.py | 162 ++++++++++++------- client/python/vortexdb/__init__.py | 3 + client/python/vortexdb/client.py | 112 ++++++++----- client/python/vortexdb/models.py | 18 ++- client/python/vortexdb/protoutils.py | 73 ++++++--- 10 files changed, 422 insertions(+), 122 deletions(-) create mode 100644 client/python/examples/batch_insert_usage.py create mode 100644 client/python/examples/search_query_usage.py diff --git a/client/python/USAGE.md b/client/python/USAGE.md index 5a55402..372e3d2 100644 --- a/client/python/USAGE.md +++ b/client/python/USAGE.md @@ -56,6 +56,11 @@ async with AsyncVortexDB( payload=Payload.text("hello async vortex"), ) ``` +### Batch Insertion and Search Support + +The client now supports batch insertion and batch search queries. +Methods of usage and examples available in: +```examples/batch_insert_usage.py``` & ```examples/search_query_usage.py``` --- @@ -115,6 +120,22 @@ Raises --- +#### **Batch Insert** + +Insert multiple vectors with payloads in a single request +``` +batch_insert(*, items: list[tuple[DenseVector, Payload]]) -> list[str] +``` + +Returns +- List of `point_id` (UUID string) + +Raises +- `TypeError` if input structure is invalid +- gRPC-mapped errors (see Error Handling) + +--- + #### **Get** Fetch a point by its ID @@ -149,6 +170,32 @@ Raises --- +#### **Batch Search** + +Search for nearest neighbours for multiple queries in a single request +``` +batch_search( + *, + queries, + similarity: Similarity | None = None, + limit: int | None = None, +) -> list[list[str]] +``` + +Returns +- `TypeError` for invalid query formats +- `ValueError` if required parameters are missing + +Supported Input Formats: +The `queries` parameter is flexible and supports multiple formats: +- List of `SearchQuery` objects +- List of `(DenseVector, Similarity, Limit)` tuples +- List of `(DenseVector, Similarity)` tuples with a global `Limit` +- List of `(DenseVector, Limit)` tuples with a global `Similarity` +- List of `DenseVector` with global `Similarity` and `Limit` + +--- + #### **Delete** Delete a point by its ID @@ -214,6 +261,19 @@ All fields are directly accessible: --- +### `SearchQuery` + +``` +SearchQuery( + vector: DenseVector, + similarity: Similarity, + limit: int, +) +``` +Structured representation of a search request + +--- + ### `Similarity` Enum representing distance functions: diff --git a/client/python/examples/async_usage.py b/client/python/examples/async_usage.py index cce3335..882a926 100644 --- a/client/python/examples/async_usage.py +++ b/client/python/examples/async_usage.py @@ -6,7 +6,7 @@ async def main(): async with AsyncVortexDB( grpc_url="localhost:50051", - api_key="your-api-key", + api_key="my-secret-password", ) as db: point_id = await db.insert( vector=DenseVector([0.1, 0.2, 0.3]), diff --git a/client/python/examples/batch_insert_usage.py b/client/python/examples/batch_insert_usage.py new file mode 100644 index 0000000..3c3aa6c --- /dev/null +++ b/client/python/examples/batch_insert_usage.py @@ -0,0 +1,39 @@ +from vortexdb import VortexDB +from vortexdb import DenseVector, Payload, to_dense_vectors + + +def main(): + db = VortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) + + raw_vectors = [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + vectors = to_dense_vectors(raw_vectors) + + p1 = Payload.text("hello world") + p2 = Payload.image("/img/a.png") + p3 = Payload.text("foo bar") + + items = [ + (vectors[0], p1), + (vectors[1], p2), + (vectors[2], p3), + ] + + # Batch Insert + point_ids = db.batch_insert(items=items) + print("Inserted ids:\n", point_ids) + + for pid in point_ids: + db.delete(point_id=pid) + + db.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/client/python/examples/search_query_usage.py b/client/python/examples/search_query_usage.py new file mode 100644 index 0000000..097fa56 --- /dev/null +++ b/client/python/examples/search_query_usage.py @@ -0,0 +1,69 @@ +from vortexdb import VortexDB +from vortexdb import DenseVector, Similarity, SearchQuery, to_dense_vectors + + +def main(): + db = VortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) + + raw_vectors = [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + vectors = to_dense_vectors(raw_vectors) + + q = SearchQuery( + vector=vectors[0], + similarity=Similarity.COSINE, + limit=3, + ) + res = db.search(query=q) + print("Single SearchQuery:\n", res) + + # List of SearchQuery + queries = [ + SearchQuery(vectors[0], Similarity.HAMMING, 3), + SearchQuery(vectors[1], Similarity.EUCLIDEAN, 2), + q, + ] + res = db.batch_search(queries=queries) + print("\nBatch SearchQuery:\n", res) + + # List of vectors with global Similarity and Limit + res = db.batch_search( + queries=vectors, + similarity=Similarity.COSINE, + limit=3, + ) + print("\nList of DenseVectors:\n", res) + + # List of tuple (DenseVector, Similarity) with global Limit + queries = [ + (vectors[0], Similarity.COSINE), + (vectors[1], Similarity.MANHATTAN), + ] + res = db.batch_search( + queries=queries, + limit=3, + ) + print("\nList of (DenseVector, Similarity):\n", res) + + # List of tuple (DenseVector, Limit) with global Similarity + queries = [ + (vectors[0], 2), + (vectors[1], 4), + ] + res = db.batch_search( + queries=queries, + similarity=Similarity.COSINE, + ) + print("\nList of (DenseVector, Limit):\n", res) + + db.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/client/python/pyproject.toml b/client/python/pyproject.toml index 8528f91..8e11aad 100644 --- a/client/python/pyproject.toml +++ b/client/python/pyproject.toml @@ -24,13 +24,13 @@ classifiers = [ ] dependencies = [ - "grpcio>=1.60", - "protobuf>=4.25", + "grpcio>=1.81.1", + "protobuf>=6.33.5,<7.0.0", ] [project.optional-dependencies] dev = [ - "grpcio-tools>=1.60", + "grpcio-tools>=1.81.1,<2.0.0", "pytest>=7.0", ] diff --git a/client/python/tests/test_client.py b/client/python/tests/test_client.py index a374959..8a6db58 100644 --- a/client/python/tests/test_client.py +++ b/client/python/tests/test_client.py @@ -5,6 +5,7 @@ from vortexdb.connection import GRPCConnection from vortexdb.models import DenseVector, Payload, Similarity, ContentType, Point from vortexdb.exceptions import InvalidArgumentError +from vortexdb.models import SearchQuery @@ -45,7 +46,6 @@ def test_insert_success(client, mock_connection): assert point_id == "point-123" - def test_insert_rejects_invalid_vector(client): with pytest.raises(TypeError): client.insert( @@ -54,32 +54,39 @@ def test_insert_rejects_invalid_vector(client): ) -def test_insert_batch_success(client, mock_connection): - mock_connection.call.return_value = Mock( - ids=[ - Mock(id=Mock(value="p1")), - Mock(id=Mock(value="p2")), - ] - ) +# Batch Insert - point_ids = client.insert_batch( - points=[ - (DenseVector([1, 2, 3]), Payload.text("hello")), - (DenseVector([4, 5, 6]), Payload.text("world")), - ] - ) - - assert point_ids == ["p1", "p2"] +def test_batch_insert_success(client, mock_connection): + response = Mock() + response.ids = [ + Mock(id=Mock(value="p1")), + Mock(id=Mock(value="p2")), + ] + mock_connection.call.return_value = response + items = [ + (DenseVector([1, 2, 3]), Payload.text("a")), + (DenseVector([4, 5, 6]), Payload.text("b")), + ] + result = client.batch_insert(items=items) + assert result == ["p1", "p2"] + +def test_batch_insert_invalid_items_type(client): + with pytest.raises(TypeError): + client.batch_insert(items="not-a-list") +def test_batch_insert_invalid_tuple_structure(client): + items = [ + (DenseVector([1, 2, 3]),), # only one element + ] + with pytest.raises(TypeError): + client.batch_insert(items=items) -def test_insert_batch_rejects_invalid_vector(client): +def test_batch_insert_invalid_vector(client): + items = [ + ([1, 2, 3], Payload.text("a")), # not DenseVector + ] with pytest.raises(TypeError): - client.insert_batch( - points=[ - (DenseVector([1, 2, 3]), Payload.text("hello")), - ([4, 5, 6], Payload.text("world")), - ] - ) + client.batch_insert(items=items) # Get @@ -160,37 +167,11 @@ def test_search_invalid_vector(client): ) -def test_search_batch_success(client, mock_connection): - mock_connection.call.return_value = Mock( - results=[ - Mock( - result_point_ids=[ - Mock(id=Mock(value="p1")), - Mock(id=Mock(value="p2")), - ] - ), - Mock( - result_point_ids=[ - Mock(id=Mock(value="p3")), - ] - ), - ] - ) - - results = client.search_batch( - queries=[ - (DenseVector([1, 2, 3]), Similarity.COSINE, 2), - (DenseVector([4, 5, 6]), Similarity.COSINE, 1), - ] - ) - - assert results == [["p1", "p2"], ["p3"]] - -def test_search_batch_accepts_ef(client, mock_connection): +def test_batch_search_accepts_ef(client, mock_connection): mock_connection.call.return_value = Mock(results=[]) - client.search_batch( + client.batch_search( queries=[ (DenseVector([1, 2, 3]), Similarity.COSINE, 2), (DenseVector([4, 5, 6]), Similarity.COSINE, 1), @@ -201,16 +182,79 @@ def test_search_batch_accepts_ef(client, mock_connection): request = mock_connection.call.call_args.args[1] assert [query.ef for query in request.queries] == [256, 256] +# Batch Search -def test_search_batch_rejects_invalid_vector(client): - with pytest.raises(TypeError): - client.search_batch( - queries=[ - (DenseVector([1, 2, 3]), Similarity.COSINE, 2), - ([4, 5, 6], Similarity.COSINE, 1), - ] - ) +def test_batch_search_full_tuple(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + Mock(result_point_ids=[Mock(id=Mock(value="p2"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE, 2), + (DenseVector([4, 5, 6]), Similarity.EUCLIDEAN, 1), + ] + result = client.batch_search(queries=queries) + assert result == [["p1"], ["p2"]] + +def test_batch_search_vectors_with_global_params(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [DenseVector([1, 2, 3])] + result = client.batch_search( + queries=queries, + similarity=Similarity.MANHATTAN, + limit=2, + ) + assert result == [["p1"]] +def test_batch_search_vector_similarity_with_global_limit(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + result = client.batch_search( + queries=queries, + limit=2, + ) + assert result == [["p1"]] + +def test_batch_search_searchquery_objects(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + SearchQuery(DenseVector([1, 2, 3]), Similarity.COSINE, 2), + ] + result = client.batch_search(queries=queries) + assert result == [["p1"]] + +def test_batch_search_missing_globals_for_vector(client): + queries = [DenseVector([1, 2, 3])] + with pytest.raises(ValueError): + client.batch_search(queries=queries) + +def test_batch_search_missing_limit(client): + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + with pytest.raises(ValueError): + client.batch_search(queries=queries) + +def test_batch_search_invalid_format(client): + queries = ["invalid"] + with pytest.raises(TypeError): + client.batch_search(queries=queries) # Close diff --git a/client/python/vortexdb/__init__.py b/client/python/vortexdb/__init__.py index 9d9d694..3b80ddb 100644 --- a/client/python/vortexdb/__init__.py +++ b/client/python/vortexdb/__init__.py @@ -7,6 +7,8 @@ Payload, Point, Similarity, + SearchQuery, + to_dense_vectors, ) from vortexdb.exceptions import ( VortexDBError, @@ -25,6 +27,7 @@ "Payload", "Point", "Similarity", + "SearchQuery", "VortexDBError", "AuthenticationError", "NotFoundError", diff --git a/client/python/vortexdb/client.py b/client/python/vortexdb/client.py index 2104a61..682694c 100644 --- a/client/python/vortexdb/client.py +++ b/client/python/vortexdb/client.py @@ -7,11 +7,11 @@ Payload, Point, Similarity, + SearchQuery, ) from vortexdb import protoutils as proto - class VortexDB: """ High-level Python client for VortexDB """ @@ -52,27 +52,17 @@ def insert(self, *, vector: DenseVector, payload: Payload) -> str: return response.id.value - def insert_batch( - self, - *, - points: Sequence[tuple[DenseVector, Payload]], - ) -> List[str]: + def batch_insert(self, *, items: list[tuple[DenseVector, Payload]]) -> list[str]: """ - Insert multiple vectors with payloads. - Returns: List of point IDs + Insert multiple vectors. + Returns: list of point_id (str) """ - for vector, _ in points: - self._validate_dense_vector(vector) - - request = proto.build_batch_insert_request( - points=list(points), - ) + request = proto.build_batch_insert_request(items=items) response = self._conn.call( self._conn.stub.InsertVectorsBatch, request, ) - return [pid.id.value for pid in response.ids] def get(self, *, point_id: str) -> Point | None: @@ -91,7 +81,6 @@ def get(self, *, point_id: str) -> Point | None: return Point.from_proto(response) - def delete(self, *, point_id: str) -> None: """ Delete a point by ID. @@ -106,16 +95,28 @@ def delete(self, *, point_id: str) -> None: def search( self, *, - vector: DenseVector, - similarity: Similarity, - limit: int, + vector: DenseVector | None = None, + similarity: Similarity | None = None, + limit: int | None = None, + query: SearchQuery | None = None, ef: int | None = None, ) -> List[str]: """ Search for nearest neighbors. Returns: List of point IDs """ - self._validate_dense_vector(vector) + if query is not None: + if not isinstance(query, SearchQuery): + raise TypeError("query must be a SearchQuery") + vector = query.vector + similarity = query.similarity + limit = query.limit + else: + self._validate_dense_vector(vector) + if not isinstance(similarity, Similarity): + raise TypeError("similarity must be a Similarity enum") + if not isinstance(limit, int): + raise TypeError("limit must be an int") request = proto.build_search_request( vector=vector, @@ -123,37 +124,68 @@ def search( limit=limit, ef=ef, ) - response = self._conn.call( self._conn.stub.SearchPoints, request, ) - return [pid.id.value for pid in response.result_point_ids] - def search_batch( + def batch_search( self, *, - queries: Sequence[tuple[DenseVector, Similarity, int]], + queries, + similarity: Similarity | None = None, + limit: int | None = None, ef: int | None = None, ) -> List[List[str]]: """ - Search nearest neighbors for multiple query vectors. - Returns: List of result point ID lists + Flexible batch search. + + Accepts: + - List[SearchQuery] + - List[(DenseVector, Similarity, int)] + - List[(DenseVector, Similarity)] + global limit + - List[(DenseVector, int)] + global similarity + - List[DenseVector] + global similarity + limit """ - for vector, _, _ in queries: - self._validate_dense_vector(vector) - - request = proto.build_batch_search_request( - queries=list(queries), - ef=ef, - ) - - response = self._conn.call( - self._conn.stub.SearchPointsBatch, - request, - ) - + normalized = [] + + for i, q in enumerate(queries): + if hasattr(q, "vector") and hasattr(q, "similarity") and hasattr(q, "limit"): + normalized.append((q.vector, q.similarity, q.limit)) + continue + + if isinstance(q, DenseVector): + if similarity is None or limit is None: + raise ValueError( + f"queries[{i}] requires global similarity and limit" + ) + normalized.append((q, similarity, limit)) + continue + + if isinstance(q, (list, tuple)): + if len(q) == 3: + normalized.append(q) + continue + if len(q) == 2: + a, b = q + + if isinstance(a, DenseVector) and isinstance(b, Similarity): + if limit is None: + raise ValueError(f"queries[{i}] missing global limit") + normalized.append((a, b, limit)) + continue + + if isinstance(a, DenseVector) and isinstance(b, int): + if similarity is None: + raise ValueError(f"queries[{i}] missing global similarity") + normalized.append((a, similarity, b)) + continue + + raise TypeError(f"Invalid query format at index {i}") + + request = proto.build_batch_search_request(queries=normalized, ef=ef) + response = self._conn.call(self._conn.stub.SearchPointsBatch, request) return [ [pid.id.value for pid in result.result_point_ids] for result in response.results @@ -179,4 +211,4 @@ def __enter__(self) -> "VortexDB": return self def __exit__(self, exc_type, exc, tb) -> None: - self.close() + self.close() \ No newline at end of file diff --git a/client/python/vortexdb/models.py b/client/python/vortexdb/models.py index f2cbe19..0bfab5f 100644 --- a/client/python/vortexdb/models.py +++ b/client/python/vortexdb/models.py @@ -70,7 +70,9 @@ def to_proto(self) -> vector_db_pb2.DenseVector: def to_list(self) -> list[float]: return list(self.values) - +# & Helper Function for Batch of DenseVectors +def to_dense_vectors(arr): + return [DenseVector(x) for x in arr] @dataclass(frozen=True) @@ -129,3 +131,17 @@ def pretty(self) -> str: f" payload_type = {self.payload.content_type.name},\n" f" payload = '{self.payload.content}'" ) + +# I added this because using tuples will get messy if we increase fields in a search query +@dataclass(frozen=True) +class SearchQuery: + vector: DenseVector + similarity: Similarity + limit: int + + def to_proto(self) -> vector_db_pb2.SearchRequest: + return vector_db_pb2.SearchRequest( + query_vector=self.vector.to_proto(), + similarity=self.similarity.to_proto(), + limit=self.limit, + ) \ No newline at end of file diff --git a/client/python/vortexdb/protoutils.py b/client/python/vortexdb/protoutils.py index b0f73e5..d91306a 100644 --- a/client/python/vortexdb/protoutils.py +++ b/client/python/vortexdb/protoutils.py @@ -12,18 +12,35 @@ def build_insert_request( payload=payload.to_proto(), ) - def build_batch_insert_request( *, - points: list[tuple[DenseVector, Payload]], + items: list[tuple[DenseVector, Payload]], ) -> vector_db_pb2.InsertVectorsBatchRequest: - return vector_db_pb2.InsertVectorsBatchRequest( - vectors=[ - build_insert_request(vector=vector, payload=payload) - for vector, payload in points - ] - ) + if not isinstance(items, (list,tuple)): + raise TypeError("Items must be a list of (DenseVector, Payload) tuples") + + if not items: + raise ValueError("Items cannot be empty") + + requests = [] + for i, pair in enumerate(items): + if not isinstance(pair, (list,tuple)) or len(pair)!=2: + raise TypeError(f"items[{i}] must be a tuple of (DenseVector, Payload)") + + vector, payload = pair + if not isinstance(vector, DenseVector): + raise TypeError( + f"items[{i}][0] must be a DenseVector" + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + + if not isinstance(payload, Payload): + raise TypeError(f"items[{i}][1] must be Payload") + + requests.append(build_insert_request(vector=vector, payload=payload)) + + return vector_db_pb2.InsertVectorsBatchRequest(vectors = requests) def build_point_id_request(point_id: str) -> vector_db_pb2.PointID: return vector_db_pb2.PointID( @@ -49,16 +66,36 @@ def build_search_request( def build_batch_search_request( *, queries: list[tuple[DenseVector, Similarity, int]], - ef: int | None = None, + ef: int | None=None, ) -> vector_db_pb2.SearchPointsBatchRequest: - return vector_db_pb2.SearchPointsBatchRequest( - queries=[ - build_search_request( - vector=vector, - similarity=similarity, + if not isinstance(queries, (list,tuple)): + raise TypeError("Queries must be a list of (DenseVector, Similarity, Limit (int)) tuples") + + if not queries: + raise ValueError("Queries cannot be empty") + + requests = [] + + for i, trio in enumerate(queries): + if not isinstance(trio, (list,tuple)) or len(trio)!=3: + raise TypeError(f"queries[{i}] must be a tuple of (DenseVector, Similarity, Limit(int))") + + vector, similarity, limit = trio + if not isinstance(vector, DenseVector): + raise TypeError( + f"queries[{i}][0] must be a DenseVector" + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + if not isinstance(similarity, Similarity): + raise TypeError(f"queries[{i}][1] must be Similarity") + if not isinstance(limit, int): + raise TypeError(f"queries[{i}][2] must be an integer value") + + requests.append(vector_db_pb2.SearchRequest( + query_vector=vector.to_proto(), + similarity=similarity.to_proto(), limit=limit, ef=ef, - ) - for vector, similarity, limit in queries - ] - ) + )) + + return vector_db_pb2.SearchPointsBatchRequest(queries=requests) \ No newline at end of file From a471138ba53fb8a24a6d4e7daaa7e1879c9279b9 Mon Sep 17 00:00:00 2001 From: peopleig Date: Sat, 13 Jun 2026 06:18:08 +0530 Subject: [PATCH 2/4] Add batch support to the async client --- client/python/USAGE.md | 15 +- client/python/examples/async_batch_usage.py | 86 ++++++++ client/python/tests/test_async_client.py | 216 +++++++++++++++++++- client/python/vortexdb/async_client.py | 107 +++++++++- 4 files changed, 406 insertions(+), 18 deletions(-) create mode 100644 client/python/examples/async_batch_usage.py diff --git a/client/python/USAGE.md b/client/python/USAGE.md index 372e3d2..be2e5d5 100644 --- a/client/python/USAGE.md +++ b/client/python/USAGE.md @@ -41,10 +41,10 @@ Example available in: ### Async Client Support -For async applications, use `AsyncVortexDB`. It mirrors the synchronous client API and uses `grpc.aio` under the hood. +For async applications, use `AsyncVortexDB`. It mirrors the synchronous client API and uses `grpc.aio` under the hood, including full support for `batch_insert` and `batch_search`. -Example available in: -```examples/async_usage.py``` +Examples available in: +```examples/async_usage.py``` & ```examples/async_batch_usage.py``` ```python async with AsyncVortexDB( @@ -56,11 +56,12 @@ async with AsyncVortexDB( payload=Payload.text("hello async vortex"), ) ``` + ### Batch Insertion and Search Support -The client now supports batch insertion and batch search queries. +Both `VortexDB` and `AsyncVortexDB` support batch insertion and batch search queries. Methods of usage and examples available in: -```examples/batch_insert_usage.py``` & ```examples/search_query_usage.py``` +```examples/batch_insert_usage.py``` & ```examples/search_query_usage.py``` & ```examples/async_batch_usage.py``` --- @@ -76,8 +77,10 @@ Async client class for I/O-heavy applications. It has the same constructor and m ``` await db.insert(...) +await db.batch_insert(...) await db.get(...) await db.search(...) +await db.batch_search(...) await db.delete(...) await db.close() ``` @@ -371,4 +374,4 @@ python -m grpc_tools.protoc \ After running this: - `vector_db_pb2_grpc.py` and `vector_db_pb2.py` will be updated -- No other client code should need changes +- No other client code should need changes \ No newline at end of file diff --git a/client/python/examples/async_batch_usage.py b/client/python/examples/async_batch_usage.py new file mode 100644 index 0000000..bb93a72 --- /dev/null +++ b/client/python/examples/async_batch_usage.py @@ -0,0 +1,86 @@ +import asyncio + +from vortexdb import AsyncVortexDB +from vortexdb import DenseVector, Payload, Similarity, SearchQuery, to_dense_vectors + + +async def main(): + async with AsyncVortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) as db: + + raw_vectors = [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + vectors = to_dense_vectors(raw_vectors) + + p1 = Payload.text("hello world") + p2 = Payload.image("/img/a.png") + p3 = Payload.text("foo bar") + + items = [ + (vectors[0], p1), + (vectors[1], p2), + (vectors[2], p3), + ] + + # Batch Insert + point_ids = await db.batch_insert(items=items) + print("Inserted ids:\n", point_ids) + + q = SearchQuery( + vector=vectors[0], + similarity=Similarity.COSINE, + limit=3, + ) + res = await db.search(query=q) + print("\nSingle SearchQuery:\n", res) + + # List of SearchQuery + queries = [ + SearchQuery(vectors[0], Similarity.HAMMING, 3), + SearchQuery(vectors[1], Similarity.EUCLIDEAN, 2), + q, + ] + res = await db.batch_search(queries=queries) + print("\nBatch SearchQuery:\n", res) + + # List of vectors with global Similarity and Limit + res = await db.batch_search( + queries=vectors, + similarity=Similarity.COSINE, + limit=3, + ) + print("\nList of DenseVectors:\n", res) + + # List of tuple (DenseVector, Similarity) with global Limit + queries = [ + (vectors[0], Similarity.COSINE), + (vectors[1], Similarity.MANHATTAN), + ] + res = await db.batch_search( + queries=queries, + limit=3, + ) + print("\nList of (DenseVector, Similarity):\n", res) + + # List of tuple (DenseVector, Limit) with global Similarity + queries = [ + (vectors[0], 2), + (vectors[1], 4), + ] + res = await db.batch_search( + queries=queries, + similarity=Similarity.COSINE, + ) + print("\nList of (DenseVector, Limit):\n", res) + + for pid in point_ids: + await db.delete(point_id=pid) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/client/python/tests/test_async_client.py b/client/python/tests/test_async_client.py index 1667633..878cb42 100644 --- a/client/python/tests/test_async_client.py +++ b/client/python/tests/test_async_client.py @@ -5,7 +5,7 @@ from vortexdb.async_client import AsyncVortexDB from vortexdb.async_connection import AsyncGRPCConnection -from vortexdb.models import ContentType, DenseVector, Payload, Point, Similarity +from vortexdb.models import ContentType, DenseVector, Payload, Point, Similarity, SearchQuery @pytest.fixture @@ -58,6 +58,59 @@ async def run(): asyncio.run(run()) +# Batch Insert + +def test_async_batch_insert_success(client, mock_connection): + async def run(): + response = Mock() + response.ids = [ + Mock(id=Mock(value="p1")), + Mock(id=Mock(value="p2")), + ] + mock_connection.call.return_value = response + + items = [ + (DenseVector([1, 2, 3]), Payload.text("a")), + (DenseVector([4, 5, 6]), Payload.text("b")), + ] + result = await client.batch_insert(items=items) + assert result == ["p1", "p2"] + + asyncio.run(run()) + + +def test_async_batch_insert_invalid_items_type(client): + async def run(): + with pytest.raises(TypeError): + await client.batch_insert(items="not-a-list") + + asyncio.run(run()) + + +def test_async_batch_insert_invalid_tuple_structure(client): + async def run(): + items = [ + (DenseVector([1, 2, 3]),), # only one element + ] + with pytest.raises(TypeError): + await client.batch_insert(items=items) + + asyncio.run(run()) + + +def test_async_batch_insert_invalid_vector(client): + async def run(): + items = [ + ([1, 2, 3], Payload.text("a")), # not DenseVector + ] + with pytest.raises(TypeError): + await client.batch_insert(items=items) + + asyncio.run(run()) + + +# Get + def test_async_get_point_success(client, mock_connection): async def run(): proto_point = Mock() @@ -88,6 +141,8 @@ async def run(): asyncio.run(run()) +# Delete + def test_async_delete_success(client, mock_connection): async def run(): mock_connection.call.return_value = None @@ -99,6 +154,8 @@ async def run(): asyncio.run(run()) +# Search + def test_async_search_success(client, mock_connection): async def run(): mock_connection.call.return_value = Mock( @@ -119,6 +176,39 @@ async def run(): asyncio.run(run()) +def test_async_search_with_query_object(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + result_point_ids=[ + Mock(id=Mock(value="p1")), + ] + ) + + q = SearchQuery(DenseVector([1, 2, 3]), Similarity.COSINE, 2) + results = await client.search(query=q) + + assert results == ["p1"] + + asyncio.run(run()) + + +def test_async_search_accepts_ef(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock(result_point_ids=[]) + + await client.search( + vector=DenseVector([1, 2, 3]), + similarity=Similarity.COSINE, + limit=2, + ef=128, + ) + + request = mock_connection.call.call_args.args[1] + assert request.ef == 128 + + asyncio.run(run()) + + def test_async_search_invalid_vector(client): async def run(): with pytest.raises(TypeError): @@ -131,6 +221,128 @@ async def run(): asyncio.run(run()) +# Batch Search + +def test_async_batch_search_full_tuple(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + Mock(result_point_ids=[Mock(id=Mock(value="p2"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE, 2), + (DenseVector([4, 5, 6]), Similarity.EUCLIDEAN, 1), + ] + result = await client.batch_search(queries=queries) + assert result == [["p1"], ["p2"]] + + asyncio.run(run()) + + +def test_async_batch_search_searchquery_objects(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + SearchQuery(DenseVector([1, 2, 3]), Similarity.COSINE, 2), + ] + result = await client.batch_search(queries=queries) + assert result == [["p1"]] + + asyncio.run(run()) + + +def test_async_batch_search_vectors_with_global_params(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [DenseVector([1, 2, 3])] + result = await client.batch_search( + queries=queries, + similarity=Similarity.MANHATTAN, + limit=2, + ) + assert result == [["p1"]] + + asyncio.run(run()) + + +def test_async_batch_search_vector_similarity_with_global_limit(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + result = await client.batch_search( + queries=queries, + limit=2, + ) + assert result == [["p1"]] + + asyncio.run(run()) + + +def test_async_batch_search_accepts_ef(client, mock_connection): + async def run(): + mock_connection.call.return_value = Mock(results=[]) + + await client.batch_search( + queries=[ + (DenseVector([1, 2, 3]), Similarity.COSINE, 2), + (DenseVector([4, 5, 6]), Similarity.COSINE, 1), + ], + ef=256, + ) + + request = mock_connection.call.call_args.args[1] + assert [query.ef for query in request.queries] == [256, 256] + + asyncio.run(run()) + + +def test_async_batch_search_missing_globals_for_vector(client): + async def run(): + queries = [DenseVector([1, 2, 3])] + with pytest.raises(ValueError): + await client.batch_search(queries=queries) + + asyncio.run(run()) + + +def test_async_batch_search_missing_limit(client): + async def run(): + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + with pytest.raises(ValueError): + await client.batch_search(queries=queries) + + asyncio.run(run()) + + +def test_async_batch_search_invalid_format(client): + async def run(): + queries = ["invalid"] + with pytest.raises(TypeError): + await client.batch_search(queries=queries) + + asyncio.run(run()) + + +# Close / Context Manager + def test_async_close_closes_connection(client, mock_connection): async def run(): await client.close() @@ -155,4 +367,4 @@ async def run(): conn.close.assert_awaited_once() - asyncio.run(run()) + asyncio.run(run()) \ No newline at end of file diff --git a/client/python/vortexdb/async_client.py b/client/python/vortexdb/async_client.py index dd0d95c..3e6ac69 100644 --- a/client/python/vortexdb/async_client.py +++ b/client/python/vortexdb/async_client.py @@ -3,7 +3,7 @@ from vortexdb import protoutils as proto from vortexdb.async_connection import AsyncGRPCConnection from vortexdb.config import VortexDBConfig -from vortexdb.models import DenseVector, Payload, Point, Similarity +from vortexdb.models import DenseVector, Payload, Point, Similarity, SearchQuery class AsyncVortexDB: @@ -48,6 +48,20 @@ async def insert(self, *, vector: DenseVector, payload: Payload) -> str: return response.id.value + async def batch_insert(self, *, items: list[tuple[DenseVector, Payload]]) -> list[str]: + """ + Insert multiple vectors. + Returns: list of point_id (str) + """ + request = proto.build_batch_insert_request(items=items) + + response = await self._conn.call( + self._conn.stub.InsertVectorsBatch, + request, + ) + + return [pid.id.value for pid in response.ids] + async def get(self, *, point_id: str) -> Point | None: """ Retrieve a point by ID. @@ -78,20 +92,32 @@ async def delete(self, *, point_id: str) -> None: async def search( self, *, - vector: DenseVector, - similarity: Similarity, - limit: int, + vector: DenseVector | None = None, + similarity: Similarity | None = None, + limit: int | None = None, + query: SearchQuery | None = None, ef: int | None = None, ) -> List[str]: """ Search for nearest neighbors. Returns: List of point IDs """ - if not isinstance(vector, DenseVector): - raise TypeError( - "vector must be a DenseVector. " - "Use: DenseVector([1.0, 2.0, 3.0])" - ) + if query is not None: + if not isinstance(query, SearchQuery): + raise TypeError("query must be a SearchQuery") + vector = query.vector + similarity = query.similarity + limit = query.limit + else: + if not isinstance(vector, DenseVector): + raise TypeError( + "vector must be a DenseVector. " + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + if not isinstance(similarity, Similarity): + raise TypeError("similarity must be a Similarity enum") + if not isinstance(limit, int): + raise TypeError("limit must be an int") request = proto.build_search_request( vector=vector, @@ -107,6 +133,67 @@ async def search( return [pid.id.value for pid in response.result_point_ids] + async def batch_search( + self, + *, + queries, + similarity: Similarity | None = None, + limit: int | None = None, + ef: int | None = None, + ) -> List[List[str]]: + """ + Flexible batch search. + + Accepts: + - List[SearchQuery] + - List[(DenseVector, Similarity, int)] + - List[(DenseVector, Similarity)] + global limit + - List[(DenseVector, int)] + global similarity + - List[DenseVector] + global similarity + limit + """ + normalized = [] + + for i, q in enumerate(queries): + if hasattr(q, "vector") and hasattr(q, "similarity") and hasattr(q, "limit"): + normalized.append((q.vector, q.similarity, q.limit)) + continue + + if isinstance(q, DenseVector): + if similarity is None or limit is None: + raise ValueError( + f"queries[{i}] requires global similarity and limit" + ) + normalized.append((q, similarity, limit)) + continue + + if isinstance(q, (list, tuple)): + if len(q) == 3: + normalized.append(q) + continue + if len(q) == 2: + a, b = q + + if isinstance(a, DenseVector) and isinstance(b, Similarity): + if limit is None: + raise ValueError(f"queries[{i}] missing global limit") + normalized.append((a, b, limit)) + continue + + if isinstance(a, DenseVector) and isinstance(b, int): + if similarity is None: + raise ValueError(f"queries[{i}] missing global similarity") + normalized.append((a, similarity, b)) + continue + + raise TypeError(f"Invalid query format at index {i}") + + request = proto.build_batch_search_request(queries=normalized, ef=ef) + response = await self._conn.call(self._conn.stub.SearchPointsBatch, request) + return [ + [pid.id.value for pid in result.result_point_ids] + for result in response.results + ] + async def close(self) -> None: """ Close the async gRPC connection. @@ -117,4 +204,4 @@ async def __aenter__(self) -> "AsyncVortexDB": return self async def __aexit__(self, exc_type, exc, tb) -> None: - await self.close() + await self.close() \ No newline at end of file From 4242a3972be781e94d82e4cbed3bb6ecdbe96627 Mon Sep 17 00:00:00 2001 From: peopleig Date: Sat, 13 Jun 2026 06:29:33 +0530 Subject: [PATCH 3/4] Add master test script for all usage example files Run linter on client/python --- client/python/examples/all.py | 28 ++++++++++ client/python/examples/async_batch_usage.py | 5 +- client/python/examples/basic_usage.py | 4 +- client/python/examples/batch_insert_usage.py | 4 +- .../python/examples/context_manager_usage.py | 3 +- client/python/examples/search_query_usage.py | 4 +- client/python/pyproject.toml | 3 + client/python/tests/test_async_client.py | 21 ++++++- client/python/tests/test_client.py | 23 +++++++- client/python/tests/test_config.py | 5 ++ client/python/tests/test_connection.py | 5 ++ client/python/tests/test_models.py | 23 ++++---- client/python/vortexdb/__init__.py | 1 + client/python/vortexdb/async_client.py | 18 +++--- client/python/vortexdb/client.py | 20 ++++--- client/python/vortexdb/config.py | 4 +- client/python/vortexdb/connection.py | 6 +- client/python/vortexdb/exceptions.py | 2 +- client/python/vortexdb/models.py | 18 +++--- client/python/vortexdb/protoutils.py | 55 ++++++++++--------- 20 files changed, 171 insertions(+), 81 deletions(-) create mode 100644 client/python/examples/all.py diff --git a/client/python/examples/all.py b/client/python/examples/all.py new file mode 100644 index 0000000..05deb55 --- /dev/null +++ b/client/python/examples/all.py @@ -0,0 +1,28 @@ +# This file is like a master test. Runs all the examples +# Not exactly the purpose of the examples dir, +# but helps in checking if any code updates haven't broken the API + +from pathlib import Path +import subprocess +import pytest + +# Didn't know I could do this with pytest, so cool +# Just run: pytest ./all.py -v + +EXAMPLES_DIR = Path(__file__).parent +example_files = sorted(EXAMPLES_DIR.glob("*_usage.py")) + + +@pytest.mark.parametrize( + "script_path", + example_files, + ids=lambda p: p.stem, +) +def test(script_path): + """Run all example scripts to check if they crash or not""" + result = subprocess.run( + ["python3", str(script_path)], capture_output=True, text=True + ) + assert result.returncode == 0, ( + f"Script {script_path} failed with stderr:\n{result.stderr}" + ) diff --git a/client/python/examples/async_batch_usage.py b/client/python/examples/async_batch_usage.py index bb93a72..72d2493 100644 --- a/client/python/examples/async_batch_usage.py +++ b/client/python/examples/async_batch_usage.py @@ -1,7 +1,7 @@ import asyncio from vortexdb import AsyncVortexDB -from vortexdb import DenseVector, Payload, Similarity, SearchQuery, to_dense_vectors +from vortexdb import Payload, Similarity, SearchQuery, to_dense_vectors async def main(): @@ -9,7 +9,6 @@ async def main(): grpc_url="localhost:50051", api_key="my-secret-password", ) as db: - raw_vectors = [ [0.1, 0.2, 0.3], [0.4, 0.5, 0.6], @@ -83,4 +82,4 @@ async def main(): if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/client/python/examples/basic_usage.py b/client/python/examples/basic_usage.py index 4288c02..f58d8c5 100644 --- a/client/python/examples/basic_usage.py +++ b/client/python/examples/basic_usage.py @@ -1,5 +1,6 @@ from vortexdb import VortexDB -from vortexdb import DenseVector, Payload, Similarity # from vortexdb.models +from vortexdb import DenseVector, Payload, Similarity # from vortexdb.models + def main(): # Initialize client @@ -32,5 +33,6 @@ def main(): # Close connection db.close() + if __name__ == "__main__": main() diff --git a/client/python/examples/batch_insert_usage.py b/client/python/examples/batch_insert_usage.py index 3c3aa6c..e021b1a 100644 --- a/client/python/examples/batch_insert_usage.py +++ b/client/python/examples/batch_insert_usage.py @@ -1,5 +1,5 @@ from vortexdb import VortexDB -from vortexdb import DenseVector, Payload, to_dense_vectors +from vortexdb import Payload, to_dense_vectors def main(): @@ -36,4 +36,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/client/python/examples/context_manager_usage.py b/client/python/examples/context_manager_usage.py index 0f86b99..d2f1de2 100644 --- a/client/python/examples/context_manager_usage.py +++ b/client/python/examples/context_manager_usage.py @@ -1,11 +1,11 @@ from vortexdb import VortexDB, DenseVector, Payload, Similarity + def main(): with VortexDB( grpc_url="localhost:50051", api_key="my-secret-password", ) as db: - # Insert a vector point_id = db.insert( vector=DenseVector([0.1, 0.2, 0.3]), @@ -30,5 +30,6 @@ def main(): # At this point, the gRPC channel is closed automatically print("Connection closed") + if __name__ == "__main__": main() diff --git a/client/python/examples/search_query_usage.py b/client/python/examples/search_query_usage.py index 097fa56..3044056 100644 --- a/client/python/examples/search_query_usage.py +++ b/client/python/examples/search_query_usage.py @@ -1,5 +1,5 @@ from vortexdb import VortexDB -from vortexdb import DenseVector, Similarity, SearchQuery, to_dense_vectors +from vortexdb import Similarity, SearchQuery, to_dense_vectors def main(): @@ -66,4 +66,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/client/python/pyproject.toml b/client/python/pyproject.toml index 8e11aad..7f4f031 100644 --- a/client/python/pyproject.toml +++ b/client/python/pyproject.toml @@ -42,3 +42,6 @@ Repository = "https://github.com/sdslabs/VortexDB" [tool.pytest.ini_options] testpaths = ["tests"] + +[tool.ruff] +exclude = ["vortexdb/grpc"] diff --git a/client/python/tests/test_async_client.py b/client/python/tests/test_async_client.py index 878cb42..d2af9fd 100644 --- a/client/python/tests/test_async_client.py +++ b/client/python/tests/test_async_client.py @@ -5,7 +5,14 @@ from vortexdb.async_client import AsyncVortexDB from vortexdb.async_connection import AsyncGRPCConnection -from vortexdb.models import ContentType, DenseVector, Payload, Point, Similarity, SearchQuery +from vortexdb.models import ( + ContentType, + DenseVector, + Payload, + Point, + Similarity, + SearchQuery, +) @pytest.fixture @@ -60,6 +67,7 @@ async def run(): # Batch Insert + def test_async_batch_insert_success(client, mock_connection): async def run(): response = Mock() @@ -111,6 +119,7 @@ async def run(): # Get + def test_async_get_point_success(client, mock_connection): async def run(): proto_point = Mock() @@ -143,6 +152,7 @@ async def run(): # Delete + def test_async_delete_success(client, mock_connection): async def run(): mock_connection.call.return_value = None @@ -156,6 +166,7 @@ async def run(): # Search + def test_async_search_success(client, mock_connection): async def run(): mock_connection.call.return_value = Mock( @@ -223,6 +234,7 @@ async def run(): # Batch Search + def test_async_batch_search_full_tuple(client, mock_connection): async def run(): mock_connection.call.return_value = Mock( @@ -275,7 +287,9 @@ async def run(): asyncio.run(run()) -def test_async_batch_search_vector_similarity_with_global_limit(client, mock_connection): +def test_async_batch_search_vector_similarity_with_global_limit( + client, mock_connection +): async def run(): mock_connection.call.return_value = Mock( results=[ @@ -343,6 +357,7 @@ async def run(): # Close / Context Manager + def test_async_close_closes_connection(client, mock_connection): async def run(): await client.close() @@ -367,4 +382,4 @@ async def run(): conn.close.assert_awaited_once() - asyncio.run(run()) \ No newline at end of file + asyncio.run(run()) diff --git a/client/python/tests/test_client.py b/client/python/tests/test_client.py index 8a6db58..b639da5 100644 --- a/client/python/tests/test_client.py +++ b/client/python/tests/test_client.py @@ -4,13 +4,12 @@ from vortexdb.client import VortexDB from vortexdb.connection import GRPCConnection from vortexdb.models import DenseVector, Payload, Similarity, ContentType, Point -from vortexdb.exceptions import InvalidArgumentError from vortexdb.models import SearchQuery - # Fixtures for a mock connection and client layer + @pytest.fixture def mock_connection(monkeypatch): """ @@ -31,6 +30,7 @@ def client(mock_connection): # Insert + def test_insert_success(client, mock_connection): response = Mock() response.id = Mock() @@ -56,6 +56,7 @@ def test_insert_rejects_invalid_vector(client): # Batch Insert + def test_batch_insert_success(client, mock_connection): response = Mock() response.ids = [ @@ -70,10 +71,12 @@ def test_batch_insert_success(client, mock_connection): result = client.batch_insert(items=items) assert result == ["p1", "p2"] + def test_batch_insert_invalid_items_type(client): with pytest.raises(TypeError): client.batch_insert(items="not-a-list") + def test_batch_insert_invalid_tuple_structure(client): items = [ (DenseVector([1, 2, 3]),), # only one element @@ -81,6 +84,7 @@ def test_batch_insert_invalid_tuple_structure(client): with pytest.raises(TypeError): client.batch_insert(items=items) + def test_batch_insert_invalid_vector(client): items = [ ([1, 2, 3], Payload.text("a")), # not DenseVector @@ -91,6 +95,7 @@ def test_batch_insert_invalid_vector(client): # Get + def test_get_point_success(client, mock_connection): proto_point = Mock() proto_point.id.id.value = "point-123" @@ -117,6 +122,7 @@ def test_get_point_not_found(client, mock_connection): # Delete + def test_delete_success(client, mock_connection): mock_connection.call.return_value = None @@ -127,6 +133,7 @@ def test_delete_success(client, mock_connection): # Search + def test_search_success(client, mock_connection): mock_connection.call.return_value = Mock( result_point_ids=[ @@ -167,7 +174,6 @@ def test_search_invalid_vector(client): ) - def test_batch_search_accepts_ef(client, mock_connection): mock_connection.call.return_value = Mock(results=[]) @@ -182,8 +188,10 @@ def test_batch_search_accepts_ef(client, mock_connection): request = mock_connection.call.call_args.args[1] assert [query.ef for query in request.queries] == [256, 256] + # Batch Search + def test_batch_search_full_tuple(client, mock_connection): mock_connection.call.return_value = Mock( results=[ @@ -198,6 +206,7 @@ def test_batch_search_full_tuple(client, mock_connection): result = client.batch_search(queries=queries) assert result == [["p1"], ["p2"]] + def test_batch_search_vectors_with_global_params(client, mock_connection): mock_connection.call.return_value = Mock( results=[ @@ -212,6 +221,7 @@ def test_batch_search_vectors_with_global_params(client, mock_connection): ) assert result == [["p1"]] + def test_batch_search_vector_similarity_with_global_limit(client, mock_connection): mock_connection.call.return_value = Mock( results=[ @@ -227,6 +237,7 @@ def test_batch_search_vector_similarity_with_global_limit(client, mock_connectio ) assert result == [["p1"]] + def test_batch_search_searchquery_objects(client, mock_connection): mock_connection.call.return_value = Mock( results=[ @@ -239,11 +250,13 @@ def test_batch_search_searchquery_objects(client, mock_connection): result = client.batch_search(queries=queries) assert result == [["p1"]] + def test_batch_search_missing_globals_for_vector(client): queries = [DenseVector([1, 2, 3])] with pytest.raises(ValueError): client.batch_search(queries=queries) + def test_batch_search_missing_limit(client): queries = [ (DenseVector([1, 2, 3]), Similarity.COSINE), @@ -251,17 +264,21 @@ def test_batch_search_missing_limit(client): with pytest.raises(ValueError): client.batch_search(queries=queries) + def test_batch_search_invalid_format(client): queries = ["invalid"] with pytest.raises(TypeError): client.batch_search(queries=queries) + # Close + def test_close_closes_connection(client, mock_connection): client.close() mock_connection.close.assert_called_once() + def test_context_manager_closes_connection(monkeypatch): conn = Mock(spec=GRPCConnection) monkeypatch.setattr("vortexdb.client.GRPCConnection", lambda _: conn) diff --git a/client/python/tests/test_config.py b/client/python/tests/test_config.py index 1017250..f23ab2b 100644 --- a/client/python/tests/test_config.py +++ b/client/python/tests/test_config.py @@ -16,6 +16,7 @@ def clean_env(monkeypatch): # Checking from_env + def test_config_requires_api_key(clean_env): with pytest.raises(ConfigurationError): VortexDBConfig.from_env() @@ -32,8 +33,10 @@ def test_config_from_explicit_args(clean_env): assert cfg.api_key == "secret" assert cfg.timeout == 10.0 + # Env vars fallback + def test_config_from_env_vars(clean_env, monkeypatch): monkeypatch.setenv("VORTEXDB_GRPC_URL", "127.0.0.1:1234") monkeypatch.setenv("VORTEXDB_API_KEY", "env-secret") @@ -48,6 +51,7 @@ def test_config_from_env_vars(clean_env, monkeypatch): # Defaults + def test_config_default_grpc_url(clean_env, monkeypatch): monkeypatch.setenv("VORTEXDB_API_KEY", "secret") @@ -66,6 +70,7 @@ def test_config_default_timeout(clean_env, monkeypatch): # Invalid Timeout + def test_config_invalid_timeout(clean_env, monkeypatch): monkeypatch.setenv("VORTEXDB_API_KEY", "secret") monkeypatch.setenv("VORTEXDB_TIMEOUT", "not-a-number") diff --git a/client/python/tests/test_connection.py b/client/python/tests/test_connection.py index cfe10a7..6d5c108 100644 --- a/client/python/tests/test_connection.py +++ b/client/python/tests/test_connection.py @@ -16,6 +16,7 @@ # Fake gRPC error, required for testing + class FakeRpcError(grpc.RpcError): """ RpcError implementation for unit testing. @@ -35,6 +36,7 @@ def details(self): # Pytest fixtures for config and channel + @pytest.fixture def config(): return VortexDBConfig( @@ -53,6 +55,7 @@ def connection(config): # Basic connection testing + def test_channel_created_with_correct_url(config): with patch("grpc.insecure_channel") as mock_channel: GRPCConnection(config) @@ -78,6 +81,7 @@ def test_successful_rpc_call(connection): # Error mapping test + @pytest.mark.parametrize( "status_code,expected_exception", [ @@ -105,6 +109,7 @@ def test_unknown_grpc_error_maps_to_internal_error(connection): # Clean connection closure test + def test_close_closes_channel(config): with patch("grpc.insecure_channel") as mock_channel: mock_channel.return_value = Mock() diff --git a/client/python/tests/test_models.py b/client/python/tests/test_models.py index e78d686..796631f 100644 --- a/client/python/tests/test_models.py +++ b/client/python/tests/test_models.py @@ -12,6 +12,7 @@ # DenseVector Tests + def test_dense_vector_valid(): a = [1, 2.5, 3] v = DenseVector(a) @@ -47,6 +48,7 @@ def test_dense_vector_to_proto(): # Similarity Test + def test_similarity_to_proto(): assert Similarity.EUCLIDEAN.to_proto() == vector_db_pb2.Euclidean assert Similarity.MANHATTAN.to_proto() == vector_db_pb2.Manhattan @@ -56,6 +58,7 @@ def test_similarity_to_proto(): # ContentType Tests + def test_content_type_to_proto(): assert ContentType.TEXT.to_proto() == vector_db_pb2.Text assert ContentType.IMAGE.to_proto() == vector_db_pb2.Image @@ -73,6 +76,7 @@ def test_content_type_from_proto_invalid(): # Payload Tests + def test_payload_text_factory(): p = Payload.text("hello") assert p.content_type == ContentType.TEXT @@ -91,24 +95,20 @@ def test_payload_to_proto(): assert proto.content == "hello" assert proto.content_type == vector_db_pb2.Text + def test_payload_rejects_invalid_content_type(): with pytest.raises(TypeError): Payload("text", "hello") - # Point Test + def test_point_from_proto(): proto = vector_db_pb2.Point( - id=vector_db_pb2.PointID( - id=vector_db_pb2.UUID(value="point-123") - ), + id=vector_db_pb2.PointID(id=vector_db_pb2.UUID(value="point-123")), vector=vector_db_pb2.DenseVector(values=[1, 2, 3]), - payload=vector_db_pb2.Payload( - content_type=vector_db_pb2.Text, - content="hello" - ) + payload=vector_db_pb2.Payload(content_type=vector_db_pb2.Text, content="hello"), ) point = Point.from_proto(proto) @@ -118,12 +118,11 @@ def test_point_from_proto(): assert point.payload.content_type == ContentType.TEXT assert point.payload.content == "hello" + def test_point_from_proto_without_payload(): proto = vector_db_pb2.Point( - id=vector_db_pb2.PointID( - id=vector_db_pb2.UUID(value="p1") - ), - vector=vector_db_pb2.DenseVector(values=[1,2,3]), + id=vector_db_pb2.PointID(id=vector_db_pb2.UUID(value="p1")), + vector=vector_db_pb2.DenseVector(values=[1, 2, 3]), payload=None, ) diff --git a/client/python/vortexdb/__init__.py b/client/python/vortexdb/__init__.py index 3b80ddb..9cc9dd4 100644 --- a/client/python/vortexdb/__init__.py +++ b/client/python/vortexdb/__init__.py @@ -28,6 +28,7 @@ "Point", "Similarity", "SearchQuery", + "to_dense_vectors", "VortexDBError", "AuthenticationError", "NotFoundError", diff --git a/client/python/vortexdb/async_client.py b/client/python/vortexdb/async_client.py index 3e6ac69..3b7c2f5 100644 --- a/client/python/vortexdb/async_client.py +++ b/client/python/vortexdb/async_client.py @@ -32,8 +32,7 @@ async def insert(self, *, vector: DenseVector, payload: Payload) -> str: """ if not isinstance(vector, DenseVector): raise TypeError( - "vector must be a DenseVector. " - "Use: DenseVector([1.0, 2.0, 3.0])" + "vector must be a DenseVector. Use: DenseVector([1.0, 2.0, 3.0])" ) request = proto.build_insert_request( @@ -48,7 +47,9 @@ async def insert(self, *, vector: DenseVector, payload: Payload) -> str: return response.id.value - async def batch_insert(self, *, items: list[tuple[DenseVector, Payload]]) -> list[str]: + async def batch_insert( + self, *, items: list[tuple[DenseVector, Payload]] + ) -> list[str]: """ Insert multiple vectors. Returns: list of point_id (str) @@ -111,8 +112,7 @@ async def search( else: if not isinstance(vector, DenseVector): raise TypeError( - "vector must be a DenseVector. " - "Use: DenseVector([1.0, 2.0, 3.0])" + "vector must be a DenseVector. Use: DenseVector([1.0, 2.0, 3.0])" ) if not isinstance(similarity, Similarity): raise TypeError("similarity must be a Similarity enum") @@ -154,7 +154,11 @@ async def batch_search( normalized = [] for i, q in enumerate(queries): - if hasattr(q, "vector") and hasattr(q, "similarity") and hasattr(q, "limit"): + if ( + hasattr(q, "vector") + and hasattr(q, "similarity") + and hasattr(q, "limit") + ): normalized.append((q.vector, q.similarity, q.limit)) continue @@ -204,4 +208,4 @@ async def __aenter__(self) -> "AsyncVortexDB": return self async def __aexit__(self, exc_type, exc, tb) -> None: - await self.close() \ No newline at end of file + await self.close() diff --git a/client/python/vortexdb/client.py b/client/python/vortexdb/client.py index 682694c..9984279 100644 --- a/client/python/vortexdb/client.py +++ b/client/python/vortexdb/client.py @@ -1,4 +1,4 @@ -from typing import List, Sequence +from typing import List from vortexdb.connection import GRPCConnection from vortexdb.config import VortexDBConfig @@ -12,8 +12,9 @@ from vortexdb import protoutils as proto + class VortexDB: - """ High-level Python client for VortexDB """ + """High-level Python client for VortexDB""" def __init__( self, @@ -22,7 +23,7 @@ def __init__( api_key: str | None = None, timeout: float | None = None, ): - # Config order followed - args -> env vars -> defaults + # Config order followed - args -> env vars -> defaults self._config = VortexDBConfig.from_env( grpc_url=grpc_url, api_key=api_key, @@ -31,7 +32,7 @@ def __init__( self._conn = GRPCConnection(self._config) -# The basic operations + # The basic operations def insert(self, *, vector: DenseVector, payload: Payload) -> str: """ @@ -151,7 +152,11 @@ def batch_search( normalized = [] for i, q in enumerate(queries): - if hasattr(q, "vector") and hasattr(q, "similarity") and hasattr(q, "limit"): + if ( + hasattr(q, "vector") + and hasattr(q, "similarity") + and hasattr(q, "limit") + ): normalized.append((q.vector, q.similarity, q.limit)) continue @@ -195,8 +200,7 @@ def batch_search( def _validate_dense_vector(vector: DenseVector) -> None: if not isinstance(vector, DenseVector): raise TypeError( - "vector must be a DenseVector. " - "Use: DenseVector([1.0, 2.0, 3.0])" + "vector must be a DenseVector. Use: DenseVector([1.0, 2.0, 3.0])" ) def close(self) -> None: @@ -211,4 +215,4 @@ def __enter__(self) -> "VortexDB": return self def __exit__(self, exc_type, exc, tb) -> None: - self.close() \ No newline at end of file + self.close() diff --git a/client/python/vortexdb/config.py b/client/python/vortexdb/config.py index 46bd508..d85be84 100644 --- a/client/python/vortexdb/config.py +++ b/client/python/vortexdb/config.py @@ -5,6 +5,8 @@ DEFAULT_GRPC_HOST = "localhost" DEFAULT_GRPC_PORT = 50051 DEFAULT_TIMEOUT = 5.0 + + @dataclass(frozen=True) class VortexDBConfig: """Configuration for the VortexDB Python client""" @@ -20,7 +22,7 @@ def from_env( api_key: str | None = None, timeout: float | None = None, ) -> "VortexDBConfig": - """ Load configuration from explicit arguments with environment variable fallback """ + """Load configuration from explicit arguments with environment variable fallback""" resolved_grpc_url = ( grpc_url diff --git a/client/python/vortexdb/connection.py b/client/python/vortexdb/connection.py index 66716d0..bd348ac 100644 --- a/client/python/vortexdb/connection.py +++ b/client/python/vortexdb/connection.py @@ -8,7 +8,7 @@ class GRPCConnection: - """ gRPC connection wrapper for VortexDB""" + """gRPC connection wrapper for VortexDB""" def __init__(self, config: VortexDBConfig): self._config = config @@ -26,7 +26,7 @@ def call( rpc: Callable[..., Any], request: Any, ) -> Any: - """ Execute a gRPC call with standard error handling """ + """Execute a gRPC call with standard error handling""" try: return rpc( request, @@ -38,5 +38,5 @@ def call( raise map_grpc_error(e) from e def close(self) -> None: - """ Close the underlying gRPC channel """ + """Close the underlying gRPC channel""" self._channel.close() diff --git a/client/python/vortexdb/exceptions.py b/client/python/vortexdb/exceptions.py index 7a4498b..07d914a 100644 --- a/client/python/vortexdb/exceptions.py +++ b/client/python/vortexdb/exceptions.py @@ -25,6 +25,6 @@ class ServiceUnavailableError(VortexDBError): class InternalServerError(VortexDBError): """Internal error in the server""" + class ConfigurationError(VortexDBError): """Invalid or missing client configuration.""" - diff --git a/client/python/vortexdb/models.py b/client/python/vortexdb/models.py index 0bfab5f..0ba1e6c 100644 --- a/client/python/vortexdb/models.py +++ b/client/python/vortexdb/models.py @@ -4,11 +4,12 @@ from vortexdb.grpc import vector_db_pb2 -# I found this to be a good idea, because +# I found this to be a good idea, because # 1. readability # 2. will help in HTTP client # 3. transport conversion at the very end, won't break if proto enum changes + class Similarity(Enum): EUCLIDEAN = "euclidean" MANHATTAN = "manhattan" @@ -57,19 +58,18 @@ def __post_init__(self): for v in self.values: if not isinstance(v, (int, float)): - raise TypeError( - "DenseVector values must be numeric (int or float)" - ) + raise TypeError("DenseVector values must be numeric (int or float)") # force float normalization object.__setattr__(self, "values", [float(v) for v in self.values]) - + def to_proto(self) -> vector_db_pb2.DenseVector: return vector_db_pb2.DenseVector(values=self.values) - + def to_list(self) -> list[float]: return list(self.values) + # & Helper Function for Batch of DenseVectors def to_dense_vectors(arr): return [DenseVector(x) for x in arr] @@ -92,7 +92,6 @@ def __post_init__(self): if not isinstance(self.content_type, ContentType): raise TypeError("content_type must be ContentType enum") - def to_proto(self) -> vector_db_pb2.Payload: return vector_db_pb2.Payload( content_type=self.content_type.to_proto(), @@ -122,7 +121,7 @@ def from_proto(proto: vector_db_pb2.Point) -> "Point": vector=DenseVector(list(proto.vector.values)), payload=payload_obj, ) - + def pretty(self) -> str: return ( f"\nPoint:\n id = {self.id},\n" @@ -132,6 +131,7 @@ def pretty(self) -> str: f" payload = '{self.payload.content}'" ) + # I added this because using tuples will get messy if we increase fields in a search query @dataclass(frozen=True) class SearchQuery: @@ -144,4 +144,4 @@ def to_proto(self) -> vector_db_pb2.SearchRequest: query_vector=self.vector.to_proto(), similarity=self.similarity.to_proto(), limit=self.limit, - ) \ No newline at end of file + ) diff --git a/client/python/vortexdb/protoutils.py b/client/python/vortexdb/protoutils.py index d91306a..c6aae69 100644 --- a/client/python/vortexdb/protoutils.py +++ b/client/python/vortexdb/protoutils.py @@ -12,40 +12,39 @@ def build_insert_request( payload=payload.to_proto(), ) + def build_batch_insert_request( *, items: list[tuple[DenseVector, Payload]], ) -> vector_db_pb2.InsertVectorsBatchRequest: - if not isinstance(items, (list,tuple)): + if not isinstance(items, (list, tuple)): raise TypeError("Items must be a list of (DenseVector, Payload) tuples") - + if not items: raise ValueError("Items cannot be empty") - + requests = [] for i, pair in enumerate(items): - if not isinstance(pair, (list,tuple)) or len(pair)!=2: + if not isinstance(pair, (list, tuple)) or len(pair) != 2: raise TypeError(f"items[{i}] must be a tuple of (DenseVector, Payload)") - + vector, payload = pair if not isinstance(vector, DenseVector): raise TypeError( - f"items[{i}][0] must be a DenseVector" - "Use: DenseVector([1.0, 2.0, 3.0])" + f"items[{i}][0] must be a DenseVectorUse: DenseVector([1.0, 2.0, 3.0])" ) - + if not isinstance(payload, Payload): raise TypeError(f"items[{i}][1] must be Payload") - + requests.append(build_insert_request(vector=vector, payload=payload)) - return vector_db_pb2.InsertVectorsBatchRequest(vectors = requests) + return vector_db_pb2.InsertVectorsBatchRequest(vectors=requests) + def build_point_id_request(point_id: str) -> vector_db_pb2.PointID: - return vector_db_pb2.PointID( - id=vector_db_pb2.UUID(value=point_id) - ) + return vector_db_pb2.PointID(id=vector_db_pb2.UUID(value=point_id)) def build_search_request( @@ -66,19 +65,23 @@ def build_search_request( def build_batch_search_request( *, queries: list[tuple[DenseVector, Similarity, int]], - ef: int | None=None, + ef: int | None = None, ) -> vector_db_pb2.SearchPointsBatchRequest: - if not isinstance(queries, (list,tuple)): - raise TypeError("Queries must be a list of (DenseVector, Similarity, Limit (int)) tuples") - + if not isinstance(queries, (list, tuple)): + raise TypeError( + "Queries must be a list of (DenseVector, Similarity, Limit (int)) tuples" + ) + if not queries: raise ValueError("Queries cannot be empty") - + requests = [] for i, trio in enumerate(queries): - if not isinstance(trio, (list,tuple)) or len(trio)!=3: - raise TypeError(f"queries[{i}] must be a tuple of (DenseVector, Similarity, Limit(int))") + if not isinstance(trio, (list, tuple)) or len(trio) != 3: + raise TypeError( + f"queries[{i}] must be a tuple of (DenseVector, Similarity, Limit(int))" + ) vector, similarity, limit = trio if not isinstance(vector, DenseVector): @@ -90,12 +93,14 @@ def build_batch_search_request( raise TypeError(f"queries[{i}][1] must be Similarity") if not isinstance(limit, int): raise TypeError(f"queries[{i}][2] must be an integer value") - - requests.append(vector_db_pb2.SearchRequest( + + requests.append( + vector_db_pb2.SearchRequest( query_vector=vector.to_proto(), similarity=similarity.to_proto(), limit=limit, ef=ef, - )) - - return vector_db_pb2.SearchPointsBatchRequest(queries=requests) \ No newline at end of file + ) + ) + + return vector_db_pb2.SearchPointsBatchRequest(queries=requests) From 1b7468ffdd2b5a802d9fdbfc414cf7cd8e2c6fa0 Mon Sep 17 00:00:00 2001 From: peopleig Date: Mon, 15 Jun 2026 06:16:25 +0530 Subject: [PATCH 4/4] Add test for api parity between sync and async classes --- client/python/tests/test_api_parity.py | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 client/python/tests/test_api_parity.py diff --git a/client/python/tests/test_api_parity.py b/client/python/tests/test_api_parity.py new file mode 100644 index 0000000..ee8e611 --- /dev/null +++ b/client/python/tests/test_api_parity.py @@ -0,0 +1,40 @@ +# VortexDB and AsyncVortexDB should expose the same public methods +# with the same signatures +import inspect + +from vortexdb.client import VortexDB +from vortexdb.async_client import AsyncVortexDB + + +def public_methods(cls): + return { + name: value + for name, value in vars(cls).items() + if callable(value) and not name.startswith("_") + } + + +def test_sync_async_client_api_parity(): + sync_methods = public_methods(VortexDB) + async_methods = public_methods(AsyncVortexDB) + + sync_names = set(sync_methods) + async_names = set(async_methods) + + assert sync_names == async_names, ( + "Sync and async clients expose different methods. " + f"Only sync: {sorted(sync_names - async_names)}. " + f"Only async: {sorted(async_names - sync_names)}." + ) + + mismatches = [ + f" {name}: sync{inspect.signature(sync_methods[name])}" + f" != async{inspect.signature(async_methods[name])}" + for name in sync_names + if inspect.signature(sync_methods[name]) + != inspect.signature(async_methods[name]) + ] + + assert not mismatches, ( + "Sync and async methods have mismatched signatures:\n" + "\n".join(mismatches) + )