From cd11eec928baae2a8a0c8327b1d1caa8696f4829 Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Mon, 26 Jan 2026 15:46:02 -0500 Subject: [PATCH 1/9] first working sql tests --- ...FT - internal] SQL-Redis _ MLP proposal.md | 124 +++++ docs/user_guide/12_sql_to_redis_queries.ipynb | 361 ++++++++++++ pyproject.toml | 6 + redisvl/index/index.py | 48 +- redisvl/query/__init__.py | 2 + redisvl/query/sql.py | 41 ++ tests/integration/test_sql_redis.py | 517 ++++++++++++++++++ uv.lock | 40 +- 8 files changed, 1136 insertions(+), 3 deletions(-) create mode 100644 [DRAFT - internal] SQL-Redis _ MLP proposal.md create mode 100644 docs/user_guide/12_sql_to_redis_queries.ipynb create mode 100644 redisvl/query/sql.py create mode 100644 tests/integration/test_sql_redis.py diff --git a/[DRAFT - internal] SQL-Redis _ MLP proposal.md b/[DRAFT - internal] SQL-Redis _ MLP proposal.md new file mode 100644 index 00000000..2a458941 --- /dev/null +++ b/[DRAFT - internal] SQL-Redis _ MLP proposal.md @@ -0,0 +1,124 @@ +# **Objective** + +Make sql-like commands available to be translated into Redis queries via redisvl to cut down on syntax overhead for engineers. + +Ex: + +```py +from redisvl.query import SQLQuery + +sql_query = SQLQuery(""" + SELECT title, author, price + FROM my_book_index + WHERE category = "scify" +""" +) + +response = redis_index.query(sql_query) +``` + +This code would then produce the equivalent redis query to be executed against the database: + +```py +FT.search my_book_index + "@category:{scify}" + LOAD 3 @title @author @price + DIALECT 2 +``` + +# **Scope** + +### **Disclaimers:** + +* Redis is a nosql database therefore this conversion will **not allow** for SQL-like joins and other SQL specific querying patterns due to core data modeling differences. +* Helper classes will be for **query-side** only \- meaning it will **not** be in scope to create or modify indexes via a SQL syntax. +* We will also limit initial scope to target **specific SQL dialect**. + * Target dialect: \ + +### **In scope query examples:** + +| Goal Functionality | Redis Example | SQL equivalent | +| :---- | :---- | :---- | +| Group by count query. | FT.AGGREGATE KM\_2345 @created\_time:\[0 \+inf\] GROUPBY 7 @file\_id @file\_name @doc\_base\_id @created\_by @created\_time @last\_updated\_by @last\_updated\_time REDUCE COUNT 0 AS count LIMIT 0 1000000 TIMEOUT 10000 | SELECT file\_id, file\_name, doc\_base\_id, created\_by, created\_time, last\_updated\_by, last\_updated\_time, COUNT(\*) AS count FROM KM\_2345 WHERE created\_time \>= 0 GROUP BY file\_id, file\_name, doc\_base\_id, created\_by, created\_time, last\_updated\_by, last\_updated\_time LIMIT 1000000; | +| Get list of events based on filter | FT.AGGREGATE KM\_1234 (@EventDate:\[1755144000 1768971599\]) @created\_time:\[0 \+inf\] GROUPBY 1 @doc\_base\_id REDUCE COUNT\_DISTINCT 1 @file\_id AS count\_distinct\_file\_id REDUCE TOLIST 1 @file\_name AS tolist\_file\_name LIMIT 0 1000000 TIMEOUT 10000 | SELECT doc\_base\_id, COUNT(DISTINCT file\_id) AS count\_distinct\_file\_id, ARRAY\_AGG(DISTINCT file\_name) AS tolist\_file\_name FROM KM\_1234 WHERE EventDate BETWEEN 1755144000 AND 1768971599 AND created\_time \>= 0 GROUP BY doc\_base\_id LIMIT 1000000; | +| Filter and group count based query | FT.AGGREGATE KM\_53c4bf8a-8435-4e99-9ec2-e800faf677f3 (@page\_id:{517805590}) @created\_time:\[0 \+inf\] GROUPBY 12 @doc\_base\_id @created\_time @last\_updated\_time @file\_name @file\_id @created\_by @last\_updated\_by @space\_key @title @link @attachment\_file\_name @is\_attachment REDUCE COUNT 0 AS count SORTBY 2 @created\_time DESC LIMIT 0 1000000 TIMEOUT 10000 | SELECT doc\_base\_id, created\_time, last\_updated\_time, file\_name, file\_id, created\_by, last\_updated\_by, space\_key, title, link, attachment\_file\_name, is\_attachment, COUNT(\*) AS count FROM KM\_53c4bf8a\_8435\_4e99\_9ec2\_e800faf677f3 WHERE page\_id \= '517805590' AND created\_time \>= 0 GROUP BY doc\_base\_id, created\_time, last\_updated\_time, file\_name, file\_id, created\_by, last\_updated\_by, space\_key, title, link, attachment\_file\_name, is\_attachment ORDER BY created\_time DESC LIMIT 1000000; | +| additional examples | | | +| ft.search with filters and sorting | `FT.SEARCH books "((@stock:[(50 +inf] @price:[-inf (20]) @description:(classic))" RETURN 1 title DIALECT 2 LIMIT 0 10` | `SELECT title FROM books WHERE stock > 50 AND price < 20 AND description_tsv @@ plainto_tsquery('english', 'classic') LIMIT 10;` | +| ft.aggregate with filters, reducers, and sorting | FT.AGGREGATE books "@stock:\[70 \+inf\]" SCORER TFIDF DIALECT 2 GROUPBY 1 @genre REDUCE AVG 1 price AS avg\_price | SELECT genre, AVG(price) AS avg\_price FROM books WHERE stock \> 70 GROUP BY genre; | +| Pure BM25 based test search | FT.SEARCH books "@description:(thrilling | book | get | lost | beach)" SCORER BM25STD WITHSCORES RETURN 2 title description DIALECT 2 LIMIT 0 20 | `SELECT title, description, ts_rank( description_tsv, plainto_tsquery('english', 'thrilling book get lost beach') ) AS score FROM books WHERE description_tsv @@ plainto_tsquery('english', 'thrilling book get lost beach') ORDER BY score DESC LIMIT 20;` | +| \ | | | + +### **Break down by clause, operator, and datatype:** + +* Supported clauses: + * SELECT (explicit column list only) + * FROM (single index) + * WHERE (boolean logic, operators) + * ORDER BY + * LIMIT / OFFSET + * ISMISSING / EXISTS + * GROUP BY + * With supported [reducers](https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/aggregations/) + * COUNT + * COUNT\_DISTINCT + * SUM + * MIN + * MAX + * AVG + * STDDEV + * QUANTILE + * TOLIST + * FIRSTVALUE +* Supported operators: + * \=, \!= + * \<, \<=, \>, \>= + * IN + * AND, OR, NOT +* Supported data types: + * TAG + * NUMERIC + * TEXT + * VECTOR + * DATE + * GEO + +# **Deliverables** + +Per the objective, the main deliverable of this work will be a redisvl class allowing for the easy translation between in scope SQL queries and Redis search equivalents. It will be similar if not directly extended from redisvl.query.BaseQuery source code available [here](https://github.com/redis/redis-vl-python/blob/82776afc450818d4358cee7e6071eb5c0eacc2d9/redisvl/query/query.py#L25-L26). + +# **Advanced queries (i.e no standard SQL equivalent)** + +For vector and other types of queries there may not be direct SQL equivalent statements. For these cases there needs to be agreed upon convention or agreement that the team adopt the client pattern. + +### RedisVL client-based example: + +```py +from redisvl.query import HybridQuery + +user_query = "Thrilling book that I can get lost in at the beach" +vector = hf.embed(user_query, as_buffer=True) + +query = HybridQuery( + text=user_query, + text_field_name="description", + vector=vector, + vector_field_name="vector", + combination_method="LINEAR", + yield_text_score_as="text_score", + yield_vsim_score_as="vector_similarity", + yield_combined_score_as="hybrid_score", + return_fields=["title"], +) + +results = index.query(query) +``` + +### Illustrative SQL-translation examples: + +| Redis functionality | Redis example | SQL equivalent (Illustrative) | +| ----- | ----- | ----- | +| Vector search with filters and sorting | FT.SEARCH books "(@genre:{Science\\\\ Fiction} @price:\[-inf 20\])=\>\[KNN 3 @vector $vector AS vector\_distance\]" RETURN 3 title genre vector\_distance SORTBY vector\_distance ASC DIALECT 2 LIMIT 0 3 PARAMS 2 vector \<384-dimension embedding binary data\> | `SELECT title, genre, embedding <=> :query_vector AS vector_distance FROM books WHERE genre = 'Science Fiction' AND price <= 20 ORDER BY embedding <=> :query_vector LIMIT 3;` | +| Hybrid query BM25 \+ vector | FT.HYBRID books \# Text Search Component SEARCH "(\~@description:(thrilling | book | get | lost | beach))" SCORER BM25STD YIELD\_SCORE\_AS text\_score \# Vector Search Component VSIM @vector $vector YIELD\_SCORE\_AS vector\_similarity \# Score Combination COMBINE LINEAR 6 ALPHA 0.3 \# text weight BETA 0.7 \# vector weight YIELD\_SCORE\_AS hybrid\_score \# Output LOAD 1 @title LIMIT 0 10 PARAMS 2 vector \<384-dimension embedding binary\> | `SELECT title, HYBRID_SCORE( ts_rank(description, plainto_tsquery(:q)), vector <=> :query_vector, TEXT_WEIGHT 0.3, VECTOR_WEIGHT 0.7 ) AS hybrid_score FROM books WHERE description @@ plainto_tsquery(:q) ORDER BY hybrid_score DESC LIMIT 10;` | +| Aggregate Hybrid search with filters and sorting (pre 8.4) | FT.AGGREGATE books "(\~@description:(thrilling | book | get | lost | beach))=\>\[KNN 20 @vector $vector AS vector\_distance\]" SCORER BM25 ADDSCORES LOAD 2 title description DIALECT 2 APPLY "(2 \- @vector\_distance) / 2" AS vector\_similarity APPLY "@\_\_score" AS text\_score APPLY "0.3 \* @text\_score \+ 0.7 \* @vector\_similarity" AS hybrid\_score SORTBY 2 @hybrid\_score DESC MAX 20 PARAMS 2 vector \<384-dimension embedding binary\> | `SELECT title, HYBRID_SCORE( ts_rank(description, plainto_tsquery(:q)), vector <=> :query_vector, TEXT_WEIGHT 0.3, VECTOR_WEIGHT 0.7 ) AS hybrid_score FROM books WHERE description @@ plainto_tsquery(:q) ORDER BY hybrid_score DESC LIMIT 10;` | + + diff --git a/docs/user_guide/12_sql_to_redis_queries.ipynb b/docs/user_guide/12_sql_to_redis_queries.ipynb new file mode 100644 index 00000000..147e3ec2 --- /dev/null +++ b/docs/user_guide/12_sql_to_redis_queries.ipynb @@ -0,0 +1,361 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SQLQuery class\n", + "\n", + "Pass a sql string to the SQLQuery class and it will be translated to a Redis query." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Schema Dictionary:**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "schema = {\n", + " \"index\": {\n", + " \"name\": \"user_simple\",\n", + " \"prefix\": \"user_simple_docs\",\n", + " },\n", + " \"fields\": [\n", + " {\"name\": \"user\", \"type\": \"tag\"},\n", + " {\"name\": \"credit_score\", \"type\": \"tag\"},\n", + " {\"name\": \"job\", \"type\": \"text\"},\n", + " {\"name\": \"age\", \"type\": \"numeric\"},\n", + " {\n", + " \"name\": \"user_embedding\",\n", + " \"type\": \"vector\",\n", + " \"attrs\": {\n", + " \"dims\": 3,\n", + " \"distance_metric\": \"cosine\",\n", + " \"algorithm\": \"flat\",\n", + " \"datatype\": \"float32\"\n", + " }\n", + " }\n", + " ]\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample Dataset Preparation\n", + "\n", + "Below, create a mock dataset with `user`, `job`, `age`, `credit_score`, and\n", + "`user_embedding` fields. The `user_embedding` vectors are synthetic examples\n", + "for demonstration purposes.\n", + "\n", + "For more information on creating real-world embeddings, refer to this\n", + "[article](https://mlops.community/vector-similarity-search-from-basics-to-production/)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "\n", + "data = [\n", + " {\n", + " 'user': 'john',\n", + " 'age': 1,\n", + " 'job': 'engineer',\n", + " 'credit_score': 'high',\n", + " 'user_embedding': np.array([0.1, 0.1, 0.5], dtype=np.float32).tobytes()\n", + " },\n", + " {\n", + " 'user': 'mary',\n", + " 'age': 2,\n", + " 'job': 'doctor',\n", + " 'credit_score': 'low',\n", + " 'user_embedding': np.array([0.1, 0.1, 0.5], dtype=np.float32).tobytes()\n", + " },\n", + " {\n", + " 'user': 'joe',\n", + " 'age': 3,\n", + " 'job': 'dentist',\n", + " 'credit_score': 'medium',\n", + " 'user_embedding': np.array([0.9, 0.9, 0.1], dtype=np.float32).tobytes()\n", + " }\n", + "]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a `SearchIndex`\n", + "\n", + "With the schema and sample dataset ready, create a `SearchIndex`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bring your own Redis connection instance\n", + "\n", + "This is ideal in scenarios where you have custom settings on the connection instance or if your application will share a connection pool:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.index import SearchIndex\n", + "from redis import Redis\n", + "\n", + "client = Redis.from_url(\"redis://localhost:6379\")\n", + "index = SearchIndex.from_dict(schema, redis_client=client, validate_on_load=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let the index manage the connection instance\n", + "\n", + "This is ideal for simple cases:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\", validate_on_load=True)\n", + "\n", + "# If you don't specify a client or Redis URL, the index will attempt to\n", + "# connect to Redis at the default address \"redis://localhost:6379\"." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the index\n", + "\n", + "Now that we are connected to Redis, we need to run the create command." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "index.create(overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">Note that at this point, the index has no entries. Data loading follows." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data to `SearchIndex`\n", + "\n", + "Load the sample dataset to Redis.\n", + "\n", + "### Validate data entries on load\n", + "RedisVL uses pydantic validation under the hood to ensure loaded data is valid and confirms to your schema. This setting is optional and can be configured in the `SearchIndex` class." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['user_simple_docs:01KFXYVY5NDYGV245XDGFFHW59', 'user_simple_docs:01KFXYVY5QST3G9HQ8F39X39Z7', 'user_simple_docs:01KFXYVY5RF0DJM4JRCXDPF2A2']\n" + ] + } + ], + "source": [ + "keys = index.load(data)\n", + "\n", + "print(keys)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a `SQLQuery` Object" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.query import SQLQuery\n", + "from jupyterutils import result_print\n", + "\n", + "sql_query = SQLQuery(\n", + " \"\"\"\n", + " SELECT user, credit_score, job, age\n", + " FROM user_simple\n", + " WHERE age > 1\n", + " \"\"\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Executing the query" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "ImportError", + "evalue": "sql-redis is required for SQL query support. Install it with: pip install redisvl[sql]", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:911\u001b[39m, in \u001b[36mSearchIndex._sql_query\u001b[39m\u001b[34m(self, sql_query)\u001b[39m\n\u001b[32m 910\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m911\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msql_redis\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mexecutor\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Executor\n\u001b[32m 912\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msql_redis\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mschema\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m SchemaRegistry\n", + "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'sql_redis'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[31mImportError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m results = \u001b[43mindex\u001b[49m\u001b[43m.\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql_query\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2\u001b[39m result_print(results)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:1171\u001b[39m, in \u001b[36mSearchIndex.query\u001b[39m\u001b[34m(self, query)\u001b[39m\n\u001b[32m 1169\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._aggregate(query)\n\u001b[32m 1170\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(query, SQLQuery):\n\u001b[32m-> \u001b[39m\u001b[32m1171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sql_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1172\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(query, HybridQuery):\n\u001b[32m 1173\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._hybrid_search(query)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:914\u001b[39m, in \u001b[36mSearchIndex._sql_query\u001b[39m\u001b[34m(self, sql_query)\u001b[39m\n\u001b[32m 912\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msql_redis\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mschema\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m SchemaRegistry\n\u001b[32m 913\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m914\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[32m 915\u001b[39m \u001b[33m\"\u001b[39m\u001b[33msql-redis is required for SQL query support. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 916\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mInstall it with: pip install redisvl[sql]\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 917\u001b[39m )\n\u001b[32m 919\u001b[39m registry = SchemaRegistry(\u001b[38;5;28mself\u001b[39m._redis_client)\n\u001b[32m 920\u001b[39m registry.load_all() \u001b[38;5;66;03m# Loads index schemas from Redis\u001b[39;00m\n", + "\u001b[31mImportError\u001b[39m: sql-redis is required for SQL query support. Install it with: pip install redisvl[sql]" + ] + } + ], + "source": [ + "results = index.query(sql_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleanup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below we will clean up after our work. First, you can flush all data from Redis associated with the index by\n", + "using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n", + "\n", + "But if you want to clean up everything, including the index, just use `.delete()`\n", + "which will by default remove the index AND the underlying data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Clear all data from Redis associated with the index\n", + "await index.clear()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Butm the index is still in place\n", + "await index.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove / delete the index in its entirety\n", + "await index.delete()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "redisvl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/pyproject.toml b/pyproject.toml index 9286b066..6eb3c14a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,9 @@ bedrock = [ pillow = [ "pillow>=11.3.0", ] +sql = [ + "sql-redis @ file:///Users/robert.shelton/Documents/sql-redis/dist/sql_redis-0.1.0-py3-none-any.whl", +] [project.urls] Homepage = "https://github.com/redis/redis-vl-python" @@ -64,6 +67,9 @@ rvl = "redisvl.cli.runner:main" requires = ["hatchling"] build-backend = "hatchling.build" +[tool.hatch.metadata] +allow-direct-references = true + [dependency-groups] dev = [ "black>=25.1.0,<26", diff --git a/redisvl/index/index.py b/redisvl/index/index.py index 4bc66f67..2ed8a42a 100644 --- a/redisvl/index/index.py +++ b/redisvl/index/index.py @@ -31,6 +31,7 @@ from redisvl.query.hybrid import HybridQuery from redisvl.query.query import VectorQuery +from redisvl.query.sql import SQLQuery from redisvl.redis.utils import ( _keys_share_hash_tag, async_cluster_create_index, @@ -917,6 +918,49 @@ def _aggregate(self, aggregation_query: AggregationQuery) -> List[Dict[str, Any] storage_type=self.schema.index.storage_type, ) + def _sql_query(self, sql_query: SQLQuery) -> List[Dict[str, Any]]: + """Execute a SQL query and return results. + + Args: + sql_query: The SQLQuery object containing the SQL statement. + + Returns: + List of dictionaries containing the query results. + + Raises: + ImportError: If sql-redis package is not installed. + """ + try: + from sql_redis.executor import Executor + from sql_redis.schema import SchemaRegistry + except ImportError: + raise ImportError( + "sql-redis is required for SQL query support. " + "Install it with: pip install redisvl[sql]" + ) + + registry = SchemaRegistry(self._redis_client) + registry.load_all() # Loads index schemas from Redis + + executor = Executor(self._redis_client, registry) + + # Execute the query with any params + result = executor.execute(sql_query.sql, params=sql_query.params) + + # Decode bytes to strings in the results (Redis may return bytes) + decoded_rows = [] + for row in result.rows: + decoded_row = {} + for key, value in row.items(): + # Decode key if bytes + str_key = key.decode("utf-8") if isinstance(key, bytes) else key + # Decode value if bytes + str_value = value.decode("utf-8") if isinstance(value, bytes) else value + decoded_row[str_key] = str_value + decoded_rows.append(decoded_row) + + return decoded_rows + def aggregate(self, *args, **kwargs) -> "AggregateResult": """Perform an aggregation operation against the index. @@ -1118,7 +1162,7 @@ def _query(self, query: BaseQuery) -> List[Dict[str, Any]]: return process_results(results, query=query, schema=self.schema) def query( - self, query: Union[BaseQuery, AggregationQuery, HybridQuery] + self, query: Union[BaseQuery, AggregationQuery, HybridQuery, SQLQuery] ) -> List[Dict[str, Any]]: """Execute a query on the index. @@ -1146,6 +1190,8 @@ def query( """ if isinstance(query, AggregationQuery): return self._aggregate(query) + elif isinstance(query, SQLQuery): + return self._sql_query(query) elif isinstance(query, HybridQuery): return self._hybrid_search(query) else: diff --git a/redisvl/query/__init__.py b/redisvl/query/__init__.py index aa84633e..3f78c755 100644 --- a/redisvl/query/__init__.py +++ b/redisvl/query/__init__.py @@ -15,6 +15,7 @@ VectorQuery, VectorRangeQuery, ) +from redisvl.query.sql import SQLQuery __all__ = [ "BaseQuery", @@ -29,4 +30,5 @@ "AggregateHybridQuery", "MultiVectorQuery", "Vector", + "SQLQuery", ] diff --git a/redisvl/query/sql.py b/redisvl/query/sql.py new file mode 100644 index 00000000..03e55975 --- /dev/null +++ b/redisvl/query/sql.py @@ -0,0 +1,41 @@ +"""SQL Query class for executing SQL-like queries against Redis.""" + +from typing import Any, Dict, Optional + + +class SQLQuery: + """A query class that translates SQL-like syntax into Redis queries. + + This class allows users to write SQL SELECT statements that are + automatically translated into Redis FT.SEARCH or FT.AGGREGATE commands. + + .. code-block:: python + + from redisvl.query import SQLQuery + from redisvl.index import SearchIndex + + index = SearchIndex.from_existing("products", redis_url="redis://localhost:6379") + + sql_query = SQLQuery(''' + SELECT title, price, category + FROM products + WHERE category = 'electronics' AND price < 100 + ''') + + results = index.query(sql_query) + + Note: + Requires the optional `sql-redis` package. Install with: + ``pip install redisvl[sql]`` + """ + + def __init__(self, sql: str, params: Optional[Dict[str, Any]] = None): + """Initialize a SQLQuery. + + Args: + sql: The SQL SELECT statement to execute. + params: Optional dictionary of parameters for parameterized queries. + Useful for passing vector data for similarity searches. + """ + self.sql = sql + self.params = params or {} diff --git a/tests/integration/test_sql_redis.py b/tests/integration/test_sql_redis.py new file mode 100644 index 00000000..ae447df5 --- /dev/null +++ b/tests/integration/test_sql_redis.py @@ -0,0 +1,517 @@ +"""Integration tests for SQLQuery class. + +These tests verify that SQLQuery can translate SQL-like syntax +into proper Redis queries and return expected results. +""" + +import uuid + +import pytest + +from redisvl.index import SearchIndex +from redisvl.query import SQLQuery + + +@pytest.fixture +def sql_index(redis_url, worker_id): + """Create a products index for SQL query testing.""" + unique_id = str(uuid.uuid4())[:8] + index_name = f"sql_products_{worker_id}_{unique_id}" + + index = SearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": f"product_{worker_id}_{unique_id}", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text", "attrs": {"sortable": True}}, + {"name": "name", "type": "text", "attrs": {"sortable": True}}, + {"name": "price", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "stock", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "rating", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "category", "type": "tag", "attrs": {"sortable": True}}, + {"name": "tags", "type": "tag"}, + ], + }, + redis_url=redis_url, + ) + + index.create(overwrite=True) + + # Load test data + products = [ + { + "title": "Gaming laptop Pro", + "name": "Gaming Laptop", + "price": 899, + "stock": 10, + "rating": 4.5, + "category": "electronics", + "tags": "sale,featured", + }, + { + "title": "Budget laptop Basic", + "name": "Budget Laptop", + "price": 499, + "stock": 25, + "rating": 3.8, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Premium laptop Ultra", + "name": "Premium Laptop", + "price": 1299, + "stock": 5, + "rating": 4.9, + "category": "electronics", + "tags": "featured", + }, + { + "title": "Python Programming", + "name": "Python Book", + "price": 45, + "stock": 100, + "rating": 4.7, + "category": "books", + "tags": "bestseller", + }, + { + "title": "Redis in Action", + "name": "Redis Book", + "price": 55, + "stock": 50, + "rating": 4.6, + "category": "books", + "tags": "featured", + }, + { + "title": "Data Science Guide", + "name": "DS Book", + "price": 65, + "stock": 30, + "rating": 4.4, + "category": "books", + "tags": "sale", + }, + { + "title": "Wireless Mouse", + "name": "Mouse", + "price": 29, + "stock": 200, + "rating": 4.2, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Mechanical Keyboard", + "name": "Keyboard", + "price": 149, + "stock": 75, + "rating": 4.6, + "category": "electronics", + "tags": "featured", + }, + { + "title": "USB Hub", + "name": "Hub", + "price": 25, + "stock": 150, + "rating": 3.9, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Monitor Stand", + "name": "Stand", + "price": 89, + "stock": 40, + "rating": 4.1, + "category": "accessories", + "tags": "sale,featured", + }, + { + "title": "Desk Lamp", + "name": "Lamp", + "price": 35, + "stock": 80, + "rating": 4.0, + "category": "accessories", + "tags": "sale", + }, + { + "title": "Notebook Set", + "name": "Notebooks", + "price": 15, + "stock": 300, + "rating": 4.3, + "category": "stationery", + "tags": "bestseller", + }, + ] + + index.load(products) + + yield index + + # Cleanup + index.delete(drop=True) + + +class TestSQLQueryBasic: + """Tests for basic SQL SELECT queries.""" + + def test_import_sql_query(self): + """Verify SQLQuery can be imported from redisvl.query.""" + from redisvl.query import SQLQuery + + assert SQLQuery is not None + + def test_select_all_fields(self, sql_index): + """Test SELECT * returns all fields.""" + sql_query = SQLQuery(f"SELECT * FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) > 0 + # Verify results contain expected fields + assert "title" in results[0] + assert "price" in results[0] + + def test_select_specific_fields(self, sql_index): + """Test SELECT with specific field list.""" + sql_query = SQLQuery(f"SELECT title, price FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) > 0 + # Results should contain requested fields + assert "title" in results[0] + assert "price" in results[0] + + +class TestSQLQueryWhere: + """Tests for SQL WHERE clause filtering.""" + + def test_where_tag_equals(self, sql_index): + """Test WHERE with tag field equality.""" + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] == "electronics" + + def test_where_numeric_comparison(self, sql_index): + """Test WHERE with numeric field comparison.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price < 50 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) < 50 + + def test_where_combined_and(self, sql_index): + """Test WHERE with AND combining multiple conditions.""" + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'electronics' AND price < 100 + """ + ) + results = sql_index.query(sql_query) + + for result in results: + assert result["category"] == "electronics" + assert float(result["price"]) < 100 + + def test_where_numeric_range(self, sql_index): + """Test WHERE with numeric range (BETWEEN equivalent).""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price >= 25 AND price <= 50 + """ + ) + results = sql_index.query(sql_query) + + for result in results: + price = float(result["price"]) + assert 25 <= price <= 50 + + +class TestSQLQueryOrderBy: + """Tests for SQL ORDER BY clause.""" + + def test_order_by_asc(self, sql_index): + """Test ORDER BY ascending.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + ORDER BY price ASC + """ + ) + results = sql_index.query(sql_query) + + prices = [float(r["price"]) for r in results] + assert prices == sorted(prices) + + def test_order_by_desc(self, sql_index): + """Test ORDER BY descending.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + ORDER BY price DESC + """ + ) + results = sql_index.query(sql_query) + + prices = [float(r["price"]) for r in results] + assert prices == sorted(prices, reverse=True) + + +class TestSQLQueryLimit: + """Tests for SQL LIMIT and OFFSET clauses.""" + + def test_limit(self, sql_index): + """Test LIMIT clause.""" + sql_query = SQLQuery(f"SELECT title FROM {sql_index.name} LIMIT 3") + results = sql_index.query(sql_query) + + assert len(results) == 3 + + def test_limit_with_offset(self, sql_index): + """Test LIMIT with OFFSET for pagination.""" + # First page + sql_query1 = SQLQuery( + f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 0" + ) + results1 = sql_index.query(sql_query1) + + # Second page + sql_query2 = SQLQuery( + f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 3" + ) + results2 = sql_index.query(sql_query2) + + assert len(results1) == 3 + assert len(results2) == 3 + # Pages should have different results + titles1 = {r["title"] for r in results1} + titles2 = {r["title"] for r in results2} + assert titles1.isdisjoint(titles2) + + +class TestSQLQueryAggregation: + """Tests for SQL aggregation (GROUP BY, COUNT, AVG, etc.).""" + + def test_count_all(self, sql_index): + """Test COUNT(*) aggregation.""" + sql_query = SQLQuery(f"SELECT COUNT(*) as total FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert int(results[0]["total"]) == 12 # 12 products in test data + + def test_group_by_with_count(self, sql_index): + """Test GROUP BY with COUNT.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + # Should have groups for electronics, books, accessories, stationery + categories = {r["category"] for r in results} + assert "electronics" in categories + assert "books" in categories + + def test_group_by_with_avg(self, sql_index): + """Test GROUP BY with AVG.""" + sql_query = SQLQuery( + f""" + SELECT category, AVG(price) as avg_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + # All results should have category and avg_price + for result in results: + assert "category" in result + assert "avg_price" in result + assert float(result["avg_price"]) > 0 + + def test_group_by_with_filter(self, sql_index): + """Test GROUP BY with WHERE filter.""" + sql_query = SQLQuery( + f""" + SELECT category, AVG(price) as avg_price + FROM {sql_index.name} + WHERE stock > 50 + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "avg_price" in result + + +class TestSQLQueryIntegration: + """End-to-end integration tests matching proposal examples.""" + + def test_proposal_example_basic(self, sql_index): + """Test the basic example from the MLP proposal.""" + # Example from proposal doc (adapted for our test data) + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'books' + """ + ) + + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] == "books" + assert "title" in result + assert "price" in result + + +@pytest.fixture +def vector_index(redis_url, worker_id): + """Create a books index with vector embeddings for SQL query testing.""" + import numpy as np + + unique_id = str(uuid.uuid4())[:8] + index_name = f"sql_books_{worker_id}_{unique_id}" + + index = SearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": f"book_{worker_id}_{unique_id}", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text", "attrs": {"sortable": True}}, + {"name": "genre", "type": "tag", "attrs": {"sortable": True}}, + {"name": "price", "type": "numeric", "attrs": {"sortable": True}}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "dims": 4, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32", + }, + }, + ], + }, + redis_url=redis_url, + ) + + index.create(overwrite=True) + + # Create test books with embeddings + books = [ + { + "title": "Dune", + "genre": "Science Fiction", + "price": 15, + "embedding": np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes(), + }, + { + "title": "Foundation", + "genre": "Science Fiction", + "price": 18, + "embedding": np.array([0.15, 0.25, 0.35, 0.45], dtype=np.float32).tobytes(), + }, + { + "title": "Neuromancer", + "genre": "Science Fiction", + "price": 12, + "embedding": np.array([0.2, 0.3, 0.4, 0.5], dtype=np.float32).tobytes(), + }, + { + "title": "The Hobbit", + "genre": "Fantasy", + "price": 14, + "embedding": np.array([0.9, 0.8, 0.7, 0.6], dtype=np.float32).tobytes(), + }, + { + "title": "1984", + "genre": "Dystopian", + "price": 25, + "embedding": np.array([0.5, 0.5, 0.5, 0.5], dtype=np.float32).tobytes(), + }, + ] + + index.load(books) + + yield index + + # Cleanup + index.delete(drop=True) + + +class TestSQLQueryVectorSearch: + """Tests for SQL vector similarity search using cosine_distance().""" + + def test_vector_cosine_similarity(self, vector_index): + """Test vector search with cosine_distance() function - pgvector style.""" + import numpy as np + + # Query vector similar to Science Fiction books + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT + title, + genre, + price, + cosine_distance(embedding, :query_vector) AS vector_distance + FROM {vector_index.name} + WHERE genre = 'Science Fiction' + AND price <= 20 + ORDER BY cosine_distance(embedding, :query_vector) + LIMIT 3 + """, + params={"query_vector": query_vector}, + ) + + results = vector_index.query(sql_query) + + # Should return Science Fiction books under $20 + assert len(results) > 0 + assert len(results) <= 3 + for result in results: + assert result["genre"] == "Science Fiction" + assert float(result["price"]) <= 20 diff --git a/uv.lock b/uv.lock index ad61bd62..e8a3d86e 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.9.2, <3.14" resolution-markers = [ "python_full_version >= '3.13'", @@ -4299,6 +4299,9 @@ pillow = [ sentence-transformers = [ { name = "sentence-transformers" }, ] +sql = [ + { name = "sql-redis" }, +] vertexai = [ { name = "google-cloud-aiplatform" }, { name = "protobuf" }, @@ -4355,11 +4358,12 @@ requires-dist = [ { name = "pyyaml", specifier = ">=5.4,<7.0" }, { name = "redis", specifier = ">=5.0,<7.2" }, { name = "sentence-transformers", marker = "extra == 'sentence-transformers'", specifier = ">=3.4.0,<4" }, + { name = "sql-redis", marker = "extra == 'sql'", path = "../sql-redis/dist/sql_redis-0.1.0-py3-none-any.whl" }, { name = "tenacity", specifier = ">=8.2.2" }, { name = "urllib3", marker = "extra == 'bedrock'", specifier = "<2.2.0" }, { name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.2.2" }, ] -provides-extras = ["mistralai", "openai", "nltk", "cohere", "voyageai", "sentence-transformers", "langcache", "vertexai", "bedrock", "pillow"] +provides-extras = ["mistralai", "openai", "nltk", "cohere", "voyageai", "sentence-transformers", "langcache", "vertexai", "bedrock", "pillow", "sql"] [package.metadata.requires-dev] dev = [ @@ -5263,6 +5267,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" }, ] +[[package]] +name = "sql-redis" +version = "0.1.0" +source = { path = "../sql-redis/dist/sql_redis-0.1.0-py3-none-any.whl" } +dependencies = [ + { name = "redis", version = "7.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "redis", version = "7.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sqlglot" }, +] +wheels = [ + { filename = "sql_redis-0.1.0-py3-none-any.whl", hash = "sha256:a0a22c98a0fc72918c2647bf33a8a88d6067208be66a35366226e4e1e175d0eb" }, +] + +[package.metadata] +requires-dist = [ + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, + { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" }, + { name = "redis", specifier = ">=5.0.0" }, + { name = "sqlglot", specifier = ">=26.0.0" }, + { name = "testcontainers", extras = ["redis"], marker = "extra == 'dev'", specifier = ">=4.0.0" }, +] +provides-extras = ["dev"] + [[package]] name = "sqlalchemy" version = "2.0.44" @@ -5316,6 +5343,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/5e/6a29fa884d9fb7ddadf6b69490a9d45fded3b38541713010dad16b77d015/sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05", size = 1928718, upload-time = "2025-10-10T15:29:45.32Z" }, ] +[[package]] +name = "sqlglot" +version = "28.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/46/b6/f188b9616bef49943353f3622d726af30fdb08acbd081deef28ba43ceb48/sqlglot-28.6.0.tar.gz", hash = "sha256:8c0a432a6745c6c7965bbe99a17667c5a3ca1d524a54b31997cf5422b1727f6a", size = 5676522, upload-time = "2026-01-13T17:39:24.389Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/a6/21b1e19994296ba4a34bc7abaf4fcb40d7e7787477bdfde58cd843594459/sqlglot-28.6.0-py3-none-any.whl", hash = "sha256:8af76e825dc8456a49f8ce049d69bbfcd116655dda3e53051754789e2edf8eba", size = 575186, upload-time = "2026-01-13T17:39:22.327Z" }, +] + [[package]] name = "stack-data" version = "0.6.3" From 5dbdc05d38c4216aec02f63ff659eff411b2670a Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Wed, 28 Jan 2026 14:29:46 -0500 Subject: [PATCH 2/9] test spreadsheet approach --- ...FT - internal] SQL-Redis _ MLP proposal.md | 124 ----- docs/user_guide/02_hybrid_queries.ipynb | 214 ++++---- docs/user_guide/12_sql_to_redis_queries.ipynb | 475 ++++++++++++++--- redisvl/query/sql.py | 75 +++ tests/integration/test_sql_redis.py | 497 +++++++++++++++++- uv.lock | 6 +- 6 files changed, 1104 insertions(+), 287 deletions(-) delete mode 100644 [DRAFT - internal] SQL-Redis _ MLP proposal.md diff --git a/[DRAFT - internal] SQL-Redis _ MLP proposal.md b/[DRAFT - internal] SQL-Redis _ MLP proposal.md deleted file mode 100644 index 2a458941..00000000 --- a/[DRAFT - internal] SQL-Redis _ MLP proposal.md +++ /dev/null @@ -1,124 +0,0 @@ -# **Objective** - -Make sql-like commands available to be translated into Redis queries via redisvl to cut down on syntax overhead for engineers. - -Ex: - -```py -from redisvl.query import SQLQuery - -sql_query = SQLQuery(""" - SELECT title, author, price - FROM my_book_index - WHERE category = "scify" -""" -) - -response = redis_index.query(sql_query) -``` - -This code would then produce the equivalent redis query to be executed against the database: - -```py -FT.search my_book_index - "@category:{scify}" - LOAD 3 @title @author @price - DIALECT 2 -``` - -# **Scope** - -### **Disclaimers:** - -* Redis is a nosql database therefore this conversion will **not allow** for SQL-like joins and other SQL specific querying patterns due to core data modeling differences. -* Helper classes will be for **query-side** only \- meaning it will **not** be in scope to create or modify indexes via a SQL syntax. -* We will also limit initial scope to target **specific SQL dialect**. - * Target dialect: \ - -### **In scope query examples:** - -| Goal Functionality | Redis Example | SQL equivalent | -| :---- | :---- | :---- | -| Group by count query. | FT.AGGREGATE KM\_2345 @created\_time:\[0 \+inf\] GROUPBY 7 @file\_id @file\_name @doc\_base\_id @created\_by @created\_time @last\_updated\_by @last\_updated\_time REDUCE COUNT 0 AS count LIMIT 0 1000000 TIMEOUT 10000 | SELECT file\_id, file\_name, doc\_base\_id, created\_by, created\_time, last\_updated\_by, last\_updated\_time, COUNT(\*) AS count FROM KM\_2345 WHERE created\_time \>= 0 GROUP BY file\_id, file\_name, doc\_base\_id, created\_by, created\_time, last\_updated\_by, last\_updated\_time LIMIT 1000000; | -| Get list of events based on filter | FT.AGGREGATE KM\_1234 (@EventDate:\[1755144000 1768971599\]) @created\_time:\[0 \+inf\] GROUPBY 1 @doc\_base\_id REDUCE COUNT\_DISTINCT 1 @file\_id AS count\_distinct\_file\_id REDUCE TOLIST 1 @file\_name AS tolist\_file\_name LIMIT 0 1000000 TIMEOUT 10000 | SELECT doc\_base\_id, COUNT(DISTINCT file\_id) AS count\_distinct\_file\_id, ARRAY\_AGG(DISTINCT file\_name) AS tolist\_file\_name FROM KM\_1234 WHERE EventDate BETWEEN 1755144000 AND 1768971599 AND created\_time \>= 0 GROUP BY doc\_base\_id LIMIT 1000000; | -| Filter and group count based query | FT.AGGREGATE KM\_53c4bf8a-8435-4e99-9ec2-e800faf677f3 (@page\_id:{517805590}) @created\_time:\[0 \+inf\] GROUPBY 12 @doc\_base\_id @created\_time @last\_updated\_time @file\_name @file\_id @created\_by @last\_updated\_by @space\_key @title @link @attachment\_file\_name @is\_attachment REDUCE COUNT 0 AS count SORTBY 2 @created\_time DESC LIMIT 0 1000000 TIMEOUT 10000 | SELECT doc\_base\_id, created\_time, last\_updated\_time, file\_name, file\_id, created\_by, last\_updated\_by, space\_key, title, link, attachment\_file\_name, is\_attachment, COUNT(\*) AS count FROM KM\_53c4bf8a\_8435\_4e99\_9ec2\_e800faf677f3 WHERE page\_id \= '517805590' AND created\_time \>= 0 GROUP BY doc\_base\_id, created\_time, last\_updated\_time, file\_name, file\_id, created\_by, last\_updated\_by, space\_key, title, link, attachment\_file\_name, is\_attachment ORDER BY created\_time DESC LIMIT 1000000; | -| additional examples | | | -| ft.search with filters and sorting | `FT.SEARCH books "((@stock:[(50 +inf] @price:[-inf (20]) @description:(classic))" RETURN 1 title DIALECT 2 LIMIT 0 10` | `SELECT title FROM books WHERE stock > 50 AND price < 20 AND description_tsv @@ plainto_tsquery('english', 'classic') LIMIT 10;` | -| ft.aggregate with filters, reducers, and sorting | FT.AGGREGATE books "@stock:\[70 \+inf\]" SCORER TFIDF DIALECT 2 GROUPBY 1 @genre REDUCE AVG 1 price AS avg\_price | SELECT genre, AVG(price) AS avg\_price FROM books WHERE stock \> 70 GROUP BY genre; | -| Pure BM25 based test search | FT.SEARCH books "@description:(thrilling | book | get | lost | beach)" SCORER BM25STD WITHSCORES RETURN 2 title description DIALECT 2 LIMIT 0 20 | `SELECT title, description, ts_rank( description_tsv, plainto_tsquery('english', 'thrilling book get lost beach') ) AS score FROM books WHERE description_tsv @@ plainto_tsquery('english', 'thrilling book get lost beach') ORDER BY score DESC LIMIT 20;` | -| \ | | | - -### **Break down by clause, operator, and datatype:** - -* Supported clauses: - * SELECT (explicit column list only) - * FROM (single index) - * WHERE (boolean logic, operators) - * ORDER BY - * LIMIT / OFFSET - * ISMISSING / EXISTS - * GROUP BY - * With supported [reducers](https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/aggregations/) - * COUNT - * COUNT\_DISTINCT - * SUM - * MIN - * MAX - * AVG - * STDDEV - * QUANTILE - * TOLIST - * FIRSTVALUE -* Supported operators: - * \=, \!= - * \<, \<=, \>, \>= - * IN - * AND, OR, NOT -* Supported data types: - * TAG - * NUMERIC - * TEXT - * VECTOR - * DATE - * GEO - -# **Deliverables** - -Per the objective, the main deliverable of this work will be a redisvl class allowing for the easy translation between in scope SQL queries and Redis search equivalents. It will be similar if not directly extended from redisvl.query.BaseQuery source code available [here](https://github.com/redis/redis-vl-python/blob/82776afc450818d4358cee7e6071eb5c0eacc2d9/redisvl/query/query.py#L25-L26). - -# **Advanced queries (i.e no standard SQL equivalent)** - -For vector and other types of queries there may not be direct SQL equivalent statements. For these cases there needs to be agreed upon convention or agreement that the team adopt the client pattern. - -### RedisVL client-based example: - -```py -from redisvl.query import HybridQuery - -user_query = "Thrilling book that I can get lost in at the beach" -vector = hf.embed(user_query, as_buffer=True) - -query = HybridQuery( - text=user_query, - text_field_name="description", - vector=vector, - vector_field_name="vector", - combination_method="LINEAR", - yield_text_score_as="text_score", - yield_vsim_score_as="vector_similarity", - yield_combined_score_as="hybrid_score", - return_fields=["title"], -) - -results = index.query(query) -``` - -### Illustrative SQL-translation examples: - -| Redis functionality | Redis example | SQL equivalent (Illustrative) | -| ----- | ----- | ----- | -| Vector search with filters and sorting | FT.SEARCH books "(@genre:{Science\\\\ Fiction} @price:\[-inf 20\])=\>\[KNN 3 @vector $vector AS vector\_distance\]" RETURN 3 title genre vector\_distance SORTBY vector\_distance ASC DIALECT 2 LIMIT 0 3 PARAMS 2 vector \<384-dimension embedding binary data\> | `SELECT title, genre, embedding <=> :query_vector AS vector_distance FROM books WHERE genre = 'Science Fiction' AND price <= 20 ORDER BY embedding <=> :query_vector LIMIT 3;` | -| Hybrid query BM25 \+ vector | FT.HYBRID books \# Text Search Component SEARCH "(\~@description:(thrilling | book | get | lost | beach))" SCORER BM25STD YIELD\_SCORE\_AS text\_score \# Vector Search Component VSIM @vector $vector YIELD\_SCORE\_AS vector\_similarity \# Score Combination COMBINE LINEAR 6 ALPHA 0.3 \# text weight BETA 0.7 \# vector weight YIELD\_SCORE\_AS hybrid\_score \# Output LOAD 1 @title LIMIT 0 10 PARAMS 2 vector \<384-dimension embedding binary\> | `SELECT title, HYBRID_SCORE( ts_rank(description, plainto_tsquery(:q)), vector <=> :query_vector, TEXT_WEIGHT 0.3, VECTOR_WEIGHT 0.7 ) AS hybrid_score FROM books WHERE description @@ plainto_tsquery(:q) ORDER BY hybrid_score DESC LIMIT 10;` | -| Aggregate Hybrid search with filters and sorting (pre 8.4) | FT.AGGREGATE books "(\~@description:(thrilling | book | get | lost | beach))=\>\[KNN 20 @vector $vector AS vector\_distance\]" SCORER BM25 ADDSCORES LOAD 2 title description DIALECT 2 APPLY "(2 \- @vector\_distance) / 2" AS vector\_similarity APPLY "@\_\_score" AS text\_score APPLY "0.3 \* @text\_score \+ 0.7 \* @vector\_similarity" AS hybrid\_score SORTBY 2 @hybrid\_score DESC MAX 20 PARAMS 2 vector \<384-dimension embedding binary\> | `SELECT title, HYBRID_SCORE( ts_rank(description, plainto_tsquery(:q)), vector <=> :query_vector, TEXT_WEIGHT 0.3, VECTOR_WEIGHT 0.7 ) AS hybrid_score FROM books WHERE description @@ plainto_tsquery(:q) ORDER BY hybrid_score DESC LIMIT 10;` | - - diff --git a/docs/user_guide/02_hybrid_queries.ipynb b/docs/user_guide/02_hybrid_queries.ipynb index e7f8d225..b76f0c51 100644 --- a/docs/user_guide/02_hybrid_queries.ipynb +++ b/docs/user_guide/02_hybrid_queries.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 94, "metadata": {}, "outputs": [ { @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 95, "metadata": {}, "outputs": [], "source": [ @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 96, "metadata": {}, "outputs": [], "source": [ @@ -92,18 +92,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 52, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13:00:56 [RedisVL] INFO Indices:\n", - "13:00:56 [RedisVL] INFO 1. user_queries\n" - ] - } - ], + "outputs": [], "source": [ "# use the CLI to see the created index\n", "!rvl index listall" @@ -111,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 97, "metadata": {}, "outputs": [], "source": [ @@ -121,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 98, "metadata": {}, "outputs": [ { @@ -130,7 +121,7 @@ "7" ] }, - "execution_count": 6, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } @@ -160,13 +151,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
" ], "text/plain": [ "" @@ -174,6 +165,16 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'@credit_score:{high}=>[KNN 10 @user_embedding $vector AS vector_distance] RETURN 7 user credit_score age job office_location last_updated vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 10'" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -190,18 +191,39 @@ ")\n", "\n", "results = index.query(v)\n", - "result_print(results)" + "result_print(results)\n", + "str(v)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'@credit_score:{high}=>[KNN 10 @user_embedding $vector AS vector_distance]'" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "v.query_string()" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0derricklow14doctor-122.4194,37.77491741627789
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0derricklow14doctor-122.4194,37.77491741627789
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -242,13 +264,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -268,13 +290,13 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -305,13 +327,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -340,13 +362,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -367,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 61, "metadata": {}, "outputs": [ { @@ -393,13 +415,13 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -428,7 +450,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 63, "metadata": {}, "outputs": [ { @@ -441,7 +463,7 @@ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -466,7 +488,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -479,7 +501,7 @@ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
" ], "text/plain": [ "" @@ -505,7 +527,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 65, "metadata": {}, "outputs": [ { @@ -518,7 +540,7 @@ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
" ], "text/plain": [ "" @@ -554,7 +576,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 66, "metadata": {}, "outputs": [ { @@ -582,13 +604,13 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -608,7 +630,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 68, "metadata": {}, "outputs": [ { @@ -634,7 +656,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 69, "metadata": {}, "outputs": [ { @@ -660,7 +682,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 70, "metadata": {}, "outputs": [ { @@ -686,13 +708,13 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -719,14 +741,14 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'id': 'user_queries_docs:01JY4J5VC91SV4C91BM4D0FCV2',\n", - " 'score': 0.9090908893868948,\n", + "[{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90PY',\n", + " 'score': 1.8181817787737895,\n", " 'vector_distance': '0',\n", " 'user': 'john',\n", " 'credit_score': 'high',\n", @@ -734,7 +756,7 @@ " 'job': 'engineer',\n", " 'office_location': '-122.4194,37.7749',\n", " 'last_updated': '1741627789'},\n", - " {'id': 'user_queries_docs:01JY4J5VC90DRSFJ0WKXXN49JT',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90PZ',\n", " 'score': 0.0,\n", " 'vector_distance': '0',\n", " 'user': 'derrick',\n", @@ -743,8 +765,8 @@ " 'job': 'doctor',\n", " 'office_location': '-122.4194,37.7749',\n", " 'last_updated': '1741627789'},\n", - " {'id': 'user_queries_docs:01JY4J5VC9QTPMCD60YP40Q6PW',\n", - " 'score': 0.9090908893868948,\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q1',\n", + " 'score': 1.8181817787737895,\n", " 'vector_distance': '0.109129190445',\n", " 'user': 'tyler',\n", " 'credit_score': 'high',\n", @@ -752,25 +774,25 @@ " 'job': 'engineer',\n", " 'office_location': '-122.0839,37.3861',\n", " 'last_updated': '1742232589'},\n", - " {'id': 'user_queries_docs:01JY4J5VC9FW7QQNJKDJ4Z7PRG',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q2',\n", " 'score': 0.0,\n", - " 'vector_distance': '0.158808946609',\n", + " 'vector_distance': '0.158808887005',\n", " 'user': 'tim',\n", " 'credit_score': 'high',\n", " 'age': '12',\n", " 'job': 'dermatologist',\n", " 'office_location': '-122.0839,37.3861',\n", " 'last_updated': '1739644189'},\n", - " {'id': 'user_queries_docs:01JY4J5VC940DJ9F47EJ6KN2MH',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q3',\n", " 'score': 0.0,\n", - " 'vector_distance': '0.217882037163',\n", + " 'vector_distance': '0.217881977558',\n", " 'user': 'taimur',\n", " 'credit_score': 'low',\n", " 'age': '15',\n", " 'job': 'CEO',\n", " 'office_location': '-122.0839,37.3861',\n", " 'last_updated': '1742232589'},\n", - " {'id': 'user_queries_docs:01JY4J5VC9D53KQD7ZTRP14KCE',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q0',\n", " 'score': 0.0,\n", " 'vector_distance': '0.266666650772',\n", " 'user': 'nancy',\n", @@ -779,7 +801,7 @@ " 'job': 'doctor',\n", " 'office_location': '-122.4194,37.7749',\n", " 'last_updated': '1710696589'},\n", - " {'id': 'user_queries_docs:01JY4J5VC9806MD90GBZNP0MNY',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q4',\n", " 'score': 0.0,\n", " 'vector_distance': '0.653301358223',\n", " 'user': 'joe',\n", @@ -790,7 +812,7 @@ " 'last_updated': '1742232589'}]" ] }, - "execution_count": 24, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } @@ -813,7 +835,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 73, "metadata": {}, "outputs": [ { @@ -841,13 +863,13 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.45454544469344740johnhigh18engineer-122.4194,37.77491741627789
0.45454544469344740derricklow14doctor-122.4194,37.77491741627789
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.45454544469344740.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.45454544469344740.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.45454544469344740.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.45454544469344740johnhigh18engineer-122.4194,37.77491741627789
0.45454544469344740derricklow14doctor-122.4194,37.77491741627789
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.45454544469344740.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.45454544469344740.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.45454544469344740.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -867,13 +889,13 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.00.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.00.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.00.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.00.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.00.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.00.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.00.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.00.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -904,7 +926,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 76, "metadata": {}, "outputs": [ { @@ -948,13 +970,13 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 77, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
" + "
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808887005timhigh12dermatologist-122.0839,37.3861
0.217881977558taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
" ], "text/plain": [ "" @@ -992,7 +1014,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ @@ -1007,7 +1029,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 79, "metadata": {}, "outputs": [ { @@ -1032,7 +1054,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 80, "metadata": {}, "outputs": [ { @@ -1057,7 +1079,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 81, "metadata": {}, "outputs": [ { @@ -1082,13 +1104,13 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808887005timhigh12dermatologist-122.0839,37.3861
0.217881977558taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -1116,7 +1138,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 83, "metadata": {}, "outputs": [ { @@ -1158,7 +1180,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 84, "metadata": {}, "outputs": [ { @@ -1192,13 +1214,13 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.158808946609timhigh12dermatologist
" + "
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.158808887005timhigh12dermatologist
" ], "text/plain": [ "" @@ -1233,7 +1255,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 86, "metadata": {}, "outputs": [ { @@ -1264,7 +1286,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 87, "metadata": {}, "outputs": [ { @@ -1304,7 +1326,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 88, "metadata": {}, "outputs": [ { @@ -1345,7 +1367,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 89, "metadata": {}, "outputs": [ { @@ -1354,7 +1376,7 @@ "'@job:(\"engineer\")=>[KNN 5 @user_embedding $vector AS vector_distance] RETURN 6 user credit_score age job office_location vector_distance SORTBY age DESC DIALECT 3 LIMIT 0 5'" ] }, - "execution_count": 41, + "execution_count": 89, "metadata": {}, "output_type": "execute_result" } @@ -1366,7 +1388,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 90, "metadata": {}, "outputs": [ { @@ -1375,7 +1397,7 @@ "'@credit_score:{high}'" ] }, - "execution_count": 42, + "execution_count": 90, "metadata": {}, "output_type": "execute_result" } @@ -1388,7 +1410,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 91, "metadata": {}, "outputs": [ { @@ -1397,7 +1419,7 @@ "'((@credit_score:{high} @age:[18 +inf]) @age:[-inf 100])'" ] }, - "execution_count": 43, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -1422,17 +1444,17 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'id': 'user_queries_docs:01JY4J5VC91SV4C91BM4D0FCV2', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\\x00\\x00\\x00?', 'last_updated': '1741627789'}\n", - "{'id': 'user_queries_docs:01JY4J5VC9D53KQD7ZTRP14KCE', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\\x00\\x00\\x00?', 'last_updated': '1710696589'}\n", - "{'id': 'user_queries_docs:01JY4J5VC9QTPMCD60YP40Q6PW', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\\x00\\x00\\x00?', 'last_updated': '1742232589'}\n", - "{'id': 'user_queries_docs:01JY4J5VC9FW7QQNJKDJ4Z7PRG', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\\x00\\x00\\x00?', 'last_updated': '1739644189'}\n" + "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90PY', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\\x00\\x00\\x00?', 'last_updated': '1741627789'}\n", + "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q0', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\\x00\\x00\\x00?', 'last_updated': '1710696589'}\n", + "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q1', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\\x00\\x00\\x00?', 'last_updated': '1742232589'}\n", + "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q2', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\\x00\\x00\\x00?', 'last_updated': '1739644189'}\n" ] } ], @@ -1444,7 +1466,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ @@ -1455,7 +1477,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "redisvl", "language": "python", "name": "python3" }, @@ -1469,10 +1491,10 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.11.9" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/user_guide/12_sql_to_redis_queries.ipynb b/docs/user_guide/12_sql_to_redis_queries.ipynb index 147e3ec2..6c9e77f9 100644 --- a/docs/user_guide/12_sql_to_redis_queries.ipynb +++ b/docs/user_guide/12_sql_to_redis_queries.ipynb @@ -7,19 +7,39 @@ "source": [ "# SQLQuery class\n", "\n", - "Pass a sql string to the SQLQuery class and it will be translated to a Redis query." + "It may arise that you want to use SQL-like queries to interact with your Redis vector database. While Redis does not natively support SQL, the `redisvl` library provides a `SQLQuery` class that allows you to write SQL-like queries that are automatically translated into Redis queries.\n", + "\n", + "The `SQLQuery` class is a wrapper around the `sql-redis` package, which provides a SQL-to-Redis query translator. The `sql-redis` package is not installed by default with `redisvl`, so you will need to install with the optional syntax:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "zsh:1: no matches found: redisvl[sql]\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install redisvl[sql]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Schema Dictionary:**" + "## Create an index to search" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -64,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -74,25 +94,40 @@ "data = [\n", " {\n", " 'user': 'john',\n", - " 'age': 1,\n", + " 'age': 34,\n", " 'job': 'engineer',\n", " 'credit_score': 'high',\n", - " 'user_embedding': np.array([0.1, 0.1, 0.5], dtype=np.float32).tobytes()\n", + " 'user_embedding': np.array([0.4, 0.1, 0.5], dtype=np.float32).tobytes()\n", + " },\n", + " {\n", + " 'user': 'bill',\n", + " 'age': 54,\n", + " 'job': 'engineer',\n", + " 'credit_score': 'low',\n", + " 'user_embedding': np.array([0.3, 0.1, 0.5], dtype=np.float32).tobytes()\n", " },\n", " {\n", " 'user': 'mary',\n", - " 'age': 2,\n", + " 'age': 24,\n", " 'job': 'doctor',\n", " 'credit_score': 'low',\n", " 'user_embedding': np.array([0.1, 0.1, 0.5], dtype=np.float32).tobytes()\n", " },\n", " {\n", " 'user': 'joe',\n", - " 'age': 3,\n", + " 'age': 17,\n", " 'job': 'dentist',\n", " 'credit_score': 'medium',\n", " 'user_embedding': np.array([0.9, 0.9, 0.1], dtype=np.float32).tobytes()\n", " }\n", + " ,\n", + " {\n", + " 'user': 'stacy',\n", + " 'age': 61,\n", + " 'job': 'dentist',\n", + " 'credit_score': 'high',\n", + " 'user_embedding': np.array([0.9, 1.0, 0.1], dtype=np.float32).tobytes()\n", + " }\n", "]" ] }, @@ -117,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -139,14 +174,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\", validate_on_load=True)\n", - "\n", - "# If you don't specify a client or Redis URL, the index will attempt to\n", - "# connect to Redis at the default address \"redis://localhost:6379\"." + "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\", validate_on_load=True)" ] }, { @@ -160,20 +192,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "index.create(overwrite=True)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - ">Note that at this point, the index has no entries. Data loading follows." - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -188,14 +213,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['user_simple_docs:01KFXYVY5NDYGV245XDGFFHW59', 'user_simple_docs:01KFXYVY5QST3G9HQ8F39X39Z7', 'user_simple_docs:01KFXYVY5RF0DJM4JRCXDPF2A2']\n" + "['user_simple_docs:01KG0JR6VWCHVRCX78T96VT6GE', 'user_simple_docs:01KG0JR6VWCHVRCX78T96VT6GF', 'user_simple_docs:01KG0JR6VXSJEHX9P3ZMR3917Y', 'user_simple_docs:01KG0JR6VXSJEHX9P3ZMR3917Z', 'user_simple_docs:01KG0JR6VXSJEHX9P3ZMR39180']\n" ] } ], @@ -210,25 +235,53 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Create a `SQLQuery` Object" + "## Create a `SQLQuery` Object\n", + "\n", + "First, let's test a simple select statement such as the one below." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "from redisvl.query import SQLQuery\n", - "from jupyterutils import result_print\n", "\n", - "sql_query = SQLQuery(\n", - " \"\"\"\n", + "sql_str = \"\"\"\n", " SELECT user, credit_score, job, age\n", " FROM user_simple\n", - " WHERE age > 1\n", + " WHERE age > 17\n", " \"\"\"\n", - ")" + "\n", + "sql_query = SQLQuery(sql_str) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Check the created query string" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'FT.SEARCH user_simple \"@age:[(17 +inf]\" RETURN 4 user credit_score job age'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")" ] }, { @@ -240,48 +293,256 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 23, "metadata": {}, "outputs": [ { - "ename": "ImportError", - "evalue": "sql-redis is required for SQL query support. Install it with: pip install redisvl[sql]", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:911\u001b[39m, in \u001b[36mSearchIndex._sql_query\u001b[39m\u001b[34m(self, sql_query)\u001b[39m\n\u001b[32m 910\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m911\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msql_redis\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mexecutor\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Executor\n\u001b[32m 912\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msql_redis\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mschema\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m SchemaRegistry\n", - "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'sql_redis'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[31mImportError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m results = \u001b[43mindex\u001b[49m\u001b[43m.\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql_query\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2\u001b[39m result_print(results)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:1171\u001b[39m, in \u001b[36mSearchIndex.query\u001b[39m\u001b[34m(self, query)\u001b[39m\n\u001b[32m 1169\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._aggregate(query)\n\u001b[32m 1170\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(query, SQLQuery):\n\u001b[32m-> \u001b[39m\u001b[32m1171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sql_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1172\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(query, HybridQuery):\n\u001b[32m 1173\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._hybrid_search(query)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:914\u001b[39m, in \u001b[36mSearchIndex._sql_query\u001b[39m\u001b[34m(self, sql_query)\u001b[39m\n\u001b[32m 912\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msql_redis\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mschema\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m SchemaRegistry\n\u001b[32m 913\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m914\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[32m 915\u001b[39m \u001b[33m\"\u001b[39m\u001b[33msql-redis is required for SQL query support. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 916\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mInstall it with: pip install redisvl[sql]\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 917\u001b[39m )\n\u001b[32m 919\u001b[39m registry = SchemaRegistry(\u001b[38;5;28mself\u001b[39m._redis_client)\n\u001b[32m 920\u001b[39m registry.load_all() \u001b[38;5;66;03m# Loads index schemas from Redis\u001b[39;00m\n", - "\u001b[31mImportError\u001b[39m: sql-redis is required for SQL query support. Install it with: pip install redisvl[sql]" - ] + "data": { + "text/plain": [ + "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'}]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "results = index.query(sql_query)\n", - "result_print(results)" + "results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Cleanup" + "## Additional query support\n", + "\n", + "### Conditional operators" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@age:[(17 +inf] @credit_score:{high}\" RETURN 4 user credit_score job age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'}]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, credit_score, job, age\n", + " FROM user_simple\n", + " WHERE age > 17 and credit_score = 'high'\n", + " \"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"((@credit_score:{high})|(@credit_score:{low}))\" RETURN 4 user credit_score job age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'}]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, credit_score, job, age\n", + " FROM user_simple\n", + " WHERE credit_score = 'high' or credit_score = 'low'\n", + " \"\"\"\n", + "\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@user:{mary|john}\" RETURN 4 user credit_score job age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'}]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, credit_score, job, age\n", + " FROM user_simple\n", + " WHERE user IN ('mary', 'john')\n", + " \"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@age:[40 60]\" RETURN 4 user credit_score job age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, credit_score, job, age\n", + " FROM user_simple\n", + " WHERE age BETWEEN 40 and 60\n", + " \"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Below we will clean up after our work. First, you can flush all data from Redis associated with the index by\n", - "using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n", + "### Aggregations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.AGGREGATE user_simple \"*\" LOAD 2 age credit_score GROUPBY 1 @credit_score REDUCE MAX 1 @age AS max_age REDUCE AVG 1 @age AS avg_age REDUCE MIN 1 @age AS min_age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'credit_score': 'high', 'max_age': '61', 'avg_age': '47.5', 'min_age': '34'},\n", + " {'credit_score': 'medium', 'max_age': '17', 'avg_age': '17', 'min_age': '17'},\n", + " {'credit_score': 'low', 'max_age': '54', 'avg_age': '39', 'min_age': '24'}]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# TODO: check all operations these aren't working currently\n", + "# STDEV(age) as std_age\n", + "# FIRSTVALUE(age) as first_value_age\n", + "# COUNT(age) as count_age\n", "\n", - "But if you want to clean up everything, including the index, just use `.delete()`\n", - "which will by default remove the index AND the underlying data." + "sql_str = \"\"\"\n", + " SELECT\n", + " user,\n", + " MAX(age) as max_age,\n", + " AVG(age) as avg_age,\n", + " MIN(age) as min_age,\n", + " FROM user_simple\n", + " GROUP BY credit_score\n", + " \"\"\"\n", + "\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Vector search" ] }, { @@ -289,20 +550,47 @@ "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"*=>[KNN 10 @user_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 2 user vector_distance SORTBY vector_distance ASC\n" + ] + }, { "data": { "text/plain": [ - "10" + "[{'vector_distance': '0.14079028368', 'user': 'joe'},\n", + " {'vector_distance': '0.14079028368', 'user': 'stacy'},\n", + " {'vector_distance': '0.222222208977', 'user': 'john'},\n", + " {'vector_distance': '0.222222208977', 'user': 'bill'},\n", + " {'vector_distance': '0.222222208977', 'user': 'mary'}]" ] }, - "execution_count": 20, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Clear all data from Redis associated with the index\n", - "await index.clear()" + "# TODO: this also doesn't give me a means to specify what distance type I mean\n", + "# it should also support the pgvector type syntax\n", + "sql_str = \"\"\"\n", + " SELECT user, vector_distance(user_embedding, :vec) AS vector_distance\n", + " FROM user_simple\n", + " ORDER BY vector_distance ASC\n", + " \"\"\"\n", + "\n", + "# pass vector as parameter\n", + "# TODO: I think this can function closer to the vector query\n", + "vec = np.array([1, 1, 1], dtype=np.float32).tobytes()\n", + "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", + "\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "\n", + "results" ] }, { @@ -310,20 +598,85 @@ "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"*=>[KNN 10 @user_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 2 user vector_distance SORTBY vector_distance DESC\n" + ] + }, { "data": { "text/plain": [ - "True" + "[{'vector_distance': '0.352897465229', 'user': 'stacy'},\n", + " {'vector_distance': '0.352897465229', 'user': 'joe'},\n", + " {'vector_distance': '0.164599537849', 'user': 'mary'},\n", + " {'vector_distance': '0.164599537849', 'user': 'bill'},\n", + " {'vector_distance': '0.164599537849', 'user': 'john'}]" ] }, - "execution_count": 21, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], + "source": [ + "# TODO: this also doesn't give me a means to specify what distance type I mean\n", + "# it should also support the pgvector type syntax\n", + "sql_str = \"\"\"\n", + " SELECT user, cosine_distance(user_embedding, :vec) AS vector_distance\n", + " FROM user_simple\n", + " ORDER BY vector_distance DESC\n", + " \"\"\"\n", + "\n", + "# pass vector as parameter\n", + "# TODO: I think this can function closer to the vector query\n", + "vec = np.array([0.5, 0.1, 0.5], dtype=np.float32).tobytes()\n", + "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", + "\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleanup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below we will clean up after our work. First, you can flush all data from Redis associated with the index by\n", + "using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n", + "\n", + "But if you want to clean up everything, including the index, just use `.delete()`\n", + "which will by default remove the index AND the underlying data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Clear all data from Redis associated with the index\n", + "# await index.clear()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Butm the index is still in place\n", - "await index.exists()" + "# await index.exists()" ] }, { @@ -333,7 +686,7 @@ "outputs": [], "source": [ "# Remove / delete the index in its entirety\n", - "await index.delete()" + "# await index.delete()" ] } ], diff --git a/redisvl/query/sql.py b/redisvl/query/sql.py index 03e55975..5f21c166 100644 --- a/redisvl/query/sql.py +++ b/redisvl/query/sql.py @@ -39,3 +39,78 @@ def __init__(self, sql: str, params: Optional[Dict[str, Any]] = None): """ self.sql = sql self.params = params or {} + + def redis_query_string( + self, + redis_client: Optional[Any] = None, + redis_url: str = "redis://localhost:6379", + ) -> str: + """Translate the SQL query to a Redis command string. + + This method uses the sql-redis translator to convert the SQL statement + into the equivalent Redis FT.SEARCH or FT.AGGREGATE command. + + Args: + redis_client: A Redis client connection used to load index schemas. + If not provided, a connection will be created using redis_url. + redis_url: The Redis URL to connect to if redis_client is not provided. + Defaults to "redis://localhost:6379". + + Returns: + The Redis command string (e.g., 'FT.SEARCH products "@category:{electronics}"'). + + Raises: + ImportError: If sql-redis package is not installed. + + Example: + .. code-block:: python + + from redisvl.query import SQLQuery + + sql_query = SQLQuery("SELECT * FROM products WHERE category = 'electronics'") + + # Using redis_url + redis_cmd = sql_query.redis_query_string(redis_url="redis://localhost:6379") + + # Or using an existing client + from redis import Redis + client = Redis() + redis_cmd = sql_query.redis_query_string(redis_client=client) + + print(redis_cmd) + # Output: FT.SEARCH products "@category:{electronics}" + """ + try: + from sql_redis.schema import SchemaRegistry + from sql_redis.translator import Translator + except ImportError: + raise ImportError( + "sql-redis is required for SQL query support. " + "Install it with: pip install redisvl[sql]" + ) + + # Get or create Redis client + if redis_client is None: + from redis import Redis + + redis_client = Redis.from_url(redis_url) + + # Load schemas from Redis + registry = SchemaRegistry(redis_client) + registry.load_all() + + # Translate SQL to Redis command + translator = Translator(registry) + + # Substitute non-bytes params in SQL before translation + sql = self.sql + for key, value in self.params.items(): + placeholder = f":{key}" + if isinstance(value, (int, float)): + sql = sql.replace(placeholder, str(value)) + elif isinstance(value, str): + sql = sql.replace(placeholder, f"'{value}'") + # bytes (vectors) are handled separately + + translated = translator.translate(sql) + return translated.to_command_string() diff --git a/tests/integration/test_sql_redis.py b/tests/integration/test_sql_redis.py index ae447df5..edb6df4a 100644 --- a/tests/integration/test_sql_redis.py +++ b/tests/integration/test_sql_redis.py @@ -189,6 +189,59 @@ def test_select_specific_fields(self, sql_index): assert "title" in results[0] assert "price" in results[0] + def test_redis_query_string_with_client(self, sql_index): + """Test redis_query_string() with redis_client returns the Redis command string.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + + # Get the Redis command string using redis_client + redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client) + + # Verify it's a valid FT.SEARCH command + assert redis_cmd.startswith("FT.SEARCH") + assert sql_index.name in redis_cmd + assert "electronics" in redis_cmd + + def test_redis_query_string_with_url(self, sql_index, redis_url): + """Test redis_query_string() with redis_url returns the Redis command string.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + + # Get the Redis command string using redis_url + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify it's a valid FT.SEARCH command + assert redis_cmd.startswith("FT.SEARCH") + assert sql_index.name in redis_cmd + assert "electronics" in redis_cmd + + def test_redis_query_string_aggregate(self, sql_index): + """Test redis_query_string() returns FT.AGGREGATE for aggregation queries.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count + FROM {sql_index.name} + GROUP BY category + """ + ) + + redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client) + + # Verify it's a valid FT.AGGREGATE command + assert redis_cmd.startswith("FT.AGGREGATE") + assert sql_index.name in redis_cmd + assert "GROUPBY" in redis_cmd + class TestSQLQueryWhere: """Tests for SQL WHERE clause filtering.""" @@ -254,6 +307,173 @@ def test_where_numeric_range(self, sql_index): assert 25 <= price <= 50 +class TestSQLQueryTagOperators: + """Tests for SQL tag field operators.""" + + def test_tag_not_equals(self, sql_index): + """Test tag != operator.""" + sql_query = SQLQuery( + f""" + SELECT title, category + FROM {sql_index.name} + WHERE category != 'electronics' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] != "electronics" + + def test_tag_in(self, sql_index): + """Test tag IN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, category + FROM {sql_index.name} + WHERE category IN ('books', 'accessories') + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] in ("books", "accessories") + + +class TestSQLQueryNumericOperators: + """Tests for SQL numeric field operators.""" + + def test_numeric_greater_than(self, sql_index): + """Test numeric > operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price > 100 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) > 100 + + def test_numeric_equals(self, sql_index): + """Test numeric = operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price = 45 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert float(result["price"]) == 45 + + def test_numeric_not_equals(self, sql_index): + """Test numeric != operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price != 45 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) != 45 + + @pytest.mark.xfail(reason="Numeric IN operator not yet supported in sql-redis") + def test_numeric_in(self, sql_index): + """Test numeric IN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price IN (45, 55, 65) + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert float(result["price"]) in (45, 55, 65) + + def test_numeric_between(self, sql_index): + """Test numeric BETWEEN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price BETWEEN 40 AND 60 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + price = float(result["price"]) + assert 40 <= price <= 60 + + +class TestSQLQueryTextOperators: + """Tests for SQL text field operators.""" + + def test_text_equals(self, sql_index): + """Test text = operator (full-text search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert "laptop" in result["title"].lower() + + def test_text_not_equals(self, sql_index): + """Test text != operator (negated full-text search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title != 'laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + # Results should not contain 'laptop' as a primary match + assert "laptop" not in result["title"].lower() + + @pytest.mark.xfail(reason="Text IN operator not yet supported in sql-redis") + def test_text_in(self, sql_index): + """Test text IN operator (multiple term search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title IN ('Python', 'Redis') + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + title_lower = result["title"].lower() + assert "python" in title_lower or "redis" in title_lower + + class TestSQLQueryOrderBy: """Tests for SQL ORDER BY clause.""" @@ -379,6 +599,197 @@ def test_group_by_with_filter(self, sql_index): assert "category" in result assert "avg_price" in result + def test_group_by_with_sum(self, sql_index): + """Test GROUP BY with SUM reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, SUM(price) as total_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "total_price" in result + assert float(result["total_price"]) > 0 + + def test_group_by_with_min(self, sql_index): + """Test GROUP BY with MIN reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, MIN(price) as min_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "min_price" in result + assert float(result["min_price"]) > 0 + + def test_group_by_with_max(self, sql_index): + """Test GROUP BY with MAX reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, MAX(price) as max_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "max_price" in result + assert float(result["max_price"]) > 0 + + def test_global_sum(self, sql_index): + """Test global SUM aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT SUM(price) as total + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "total" in results[0] + assert float(results[0]["total"]) > 0 + + def test_global_min(self, sql_index): + """Test global MIN aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT MIN(price) as min_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "min_price" in results[0] + assert float(results[0]["min_price"]) > 0 + + def test_global_max(self, sql_index): + """Test global MAX aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT MAX(price) as max_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "max_price" in results[0] + assert float(results[0]["max_price"]) > 0 + + def test_multiple_reducers(self, sql_index): + """Test multiple reducers in a single query.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count, SUM(price) as total, AVG(price) as avg_price, MIN(price) as min_price, MAX(price) as max_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "count" in result + assert "total" in result + assert "avg_price" in result + assert "min_price" in result + assert "max_price" in result + + @pytest.mark.xfail( + reason="COUNT(DISTINCT) parsed but not correctly translated to COUNT_DISTINCTISH" + ) + def test_count_distinct(self, sql_index): + """Test COUNT_DISTINCT reducer.""" + sql_query = SQLQuery( + f""" + SELECT COUNT(DISTINCT category) as unique_categories + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "unique_categories" in results[0] + # Should have 4 unique categories: electronics, books, accessories, stationery + assert int(results[0]["unique_categories"]) == 4 + + @pytest.mark.xfail(reason="STDDEV not yet supported in sql-redis parser") + def test_stddev(self, sql_index): + """Test STDDEV reducer.""" + sql_query = SQLQuery( + f""" + SELECT STDDEV(price) as price_stddev + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "price_stddev" in results[0] + + @pytest.mark.xfail(reason="QUANTILE not yet supported in sql-redis parser") + def test_quantile(self, sql_index): + """Test QUANTILE reducer.""" + sql_query = SQLQuery( + f""" + SELECT QUANTILE(price, 0.5) as median_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "median_price" in results[0] + + @pytest.mark.xfail(reason="TOLIST not yet supported in sql-redis parser") + def test_tolist(self, sql_index): + """Test TOLIST reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, TOLIST(title) as titles + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "titles" in result + + @pytest.mark.xfail(reason="FIRST_VALUE not yet supported in sql-redis parser") + def test_first_value(self, sql_index): + """Test FIRST_VALUE reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, FIRST_VALUE(title) as first_title + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "first_title" in result + class TestSQLQueryIntegration: """End-to-end integration tests matching proposal examples.""" @@ -482,7 +893,33 @@ def vector_index(redis_url, worker_id): class TestSQLQueryVectorSearch: - """Tests for SQL vector similarity search using cosine_distance().""" + """Tests for SQL vector similarity search using cosine_distance() and vector_distance().""" + + def test_vector_distance_function(self, vector_index): + """Test vector search with vector_distance() function.""" + import numpy as np + + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, vector_distance(embedding, :vec) AS score + FROM {vector_index.name} + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + results = vector_index.query(sql_query) + + assert len(results) > 0 + assert len(results) <= 3 + for result in results: + assert "title" in result + assert "score" in result + # Score should be a valid non-negative distance value + score = float(result["score"]) + assert score >= 0 def test_vector_cosine_similarity(self, vector_index): """Test vector search with cosine_distance() function - pgvector style.""" @@ -515,3 +952,61 @@ def test_vector_cosine_similarity(self, vector_index): for result in results: assert result["genre"] == "Science Fiction" assert float(result["price"]) <= 20 + # Verify vector_distance is returned (like VectorQuery with return_score=True) + assert "vector_distance" in result + # Distance should be a valid non-negative value + distance = float(result["vector_distance"]) + assert distance >= 0 + + def test_vector_redis_query_string(self, vector_index, redis_url): + """Test redis_query_string() returns correct KNN query for vector search.""" + import numpy as np + + # Query vector + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, cosine_distance(embedding, :vec) AS vector_distance + FROM {vector_index.name} + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + # Get the Redis command string + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify it's a valid FT.SEARCH with KNN syntax + assert redis_cmd.startswith("FT.SEARCH") + assert vector_index.name in redis_cmd + assert "KNN 3" in redis_cmd + assert "@embedding" in redis_cmd + assert "$vector" in redis_cmd + assert "vector_distance" in redis_cmd + + def test_vector_search_with_prefilter_redis_query_string( + self, vector_index, redis_url + ): + """Test redis_query_string() returns correct prefiltered KNN query.""" + import numpy as np + + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, genre, cosine_distance(embedding, :vec) AS vector_distance + FROM {vector_index.name} + WHERE genre = 'Science Fiction' + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify prefilter syntax: (filter)=>[KNN ...] + assert redis_cmd.startswith("FT.SEARCH") + assert "Science Fiction" in redis_cmd or "Science\\ Fiction" in redis_cmd + assert "=>[KNN" in redis_cmd + assert "KNN 3" in redis_cmd diff --git a/uv.lock b/uv.lock index e8a3d86e..53315560 100644 --- a/uv.lock +++ b/uv.lock @@ -5277,18 +5277,14 @@ dependencies = [ { name = "sqlglot" }, ] wheels = [ - { filename = "sql_redis-0.1.0-py3-none-any.whl", hash = "sha256:a0a22c98a0fc72918c2647bf33a8a88d6067208be66a35366226e4e1e175d0eb" }, + { filename = "sql_redis-0.1.0-py3-none-any.whl", hash = "sha256:9e5be7a8d90c3e52f1cfe5abc8c7be8e9a42eee9b20a3ca53874bc1026c8c2b2" }, ] [package.metadata] requires-dist = [ - { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, - { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" }, { name = "redis", specifier = ">=5.0.0" }, { name = "sqlglot", specifier = ">=26.0.0" }, - { name = "testcontainers", extras = ["redis"], marker = "extra == 'dev'", specifier = ">=4.0.0" }, ] -provides-extras = ["dev"] [[package]] name = "sqlalchemy" From d555f8cc741660530c12c5a068d393fe544f8822 Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Wed, 28 Jan 2026 16:11:22 -0500 Subject: [PATCH 3/9] working primary reducers --- docs/user_guide/12_sql_to_redis_queries.ipynb | 169 +++++++++++------- tests/integration/test_sql_redis.py | 22 +-- 2 files changed, 121 insertions(+), 70 deletions(-) diff --git a/docs/user_guide/12_sql_to_redis_queries.ipynb b/docs/user_guide/12_sql_to_redis_queries.ipynb index 6c9e77f9..d55505b0 100644 --- a/docs/user_guide/12_sql_to_redis_queries.ipynb +++ b/docs/user_guide/12_sql_to_redis_queries.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -152,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -174,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -192,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -213,14 +213,14 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['user_simple_docs:01KG0JR6VWCHVRCX78T96VT6GE', 'user_simple_docs:01KG0JR6VWCHVRCX78T96VT6GF', 'user_simple_docs:01KG0JR6VXSJEHX9P3ZMR3917Y', 'user_simple_docs:01KG0JR6VXSJEHX9P3ZMR3917Z', 'user_simple_docs:01KG0JR6VXSJEHX9P3ZMR39180']\n" + "['user_simple_docs:01KG36PSJRQC864FDJM7HDDE6V', 'user_simple_docs:01KG36PSJTC8329D806J7KW0PJ', 'user_simple_docs:01KG36PSJTC8329D806J7KW0PK', 'user_simple_docs:01KG36PSJTC8329D806J7KW0PM', 'user_simple_docs:01KG36PSJTC8329D806J7KW0PN']\n" ] } ], @@ -242,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -266,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -275,7 +275,7 @@ "'FT.SEARCH user_simple \"@age:[(17 +inf]\" RETURN 4 user credit_score job age'" ] }, - "execution_count": 22, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -293,19 +293,25 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'}]" + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" ] }, - "execution_count": 23, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -326,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -340,10 +346,14 @@ "data": { "text/plain": [ "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'}]" ] }, - "execution_count": 24, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -366,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -380,12 +390,18 @@ "data": { "text/plain": [ "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'}]" + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" ] }, - "execution_count": 25, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -406,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -420,10 +436,14 @@ "data": { "text/plain": [ "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'}]" ] }, - "execution_count": 38, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -446,7 +466,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -459,10 +479,12 @@ { "data": { "text/plain": [ - "[{'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" + "[{'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" ] }, - "execution_count": 39, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -487,46 +509,72 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Aggregations" + "### Aggregations\n", + "\n", + "See docs for redis supported reducer functions: [https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/aggregations/#supported-groupby-reducers](docs)." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.AGGREGATE user_simple \"*\" LOAD 2 age credit_score GROUPBY 1 @credit_score REDUCE MAX 1 @age AS max_age REDUCE AVG 1 @age AS avg_age REDUCE MIN 1 @age AS min_age\n" + "Resulting redis query: FT.AGGREGATE user_simple \"*\" LOAD 2 age credit_score GROUPBY 1 @credit_score REDUCE COUNT 0 AS count_age REDUCE COUNT_DISTINCT 1 @age AS count_distinct_age REDUCE MIN 1 @age AS min_age REDUCE MAX 1 @age AS max_age REDUCE AVG 1 @age AS avg_age REDUCE STDDEV 1 @age AS std_age REDUCE FIRST_VALUE 1 @age AS fist_value_age REDUCE TOLIST 1 @age AS to_list_age\n" ] }, { "data": { "text/plain": [ - "[{'credit_score': 'high', 'max_age': '61', 'avg_age': '47.5', 'min_age': '34'},\n", - " {'credit_score': 'medium', 'max_age': '17', 'avg_age': '17', 'min_age': '17'},\n", - " {'credit_score': 'low', 'max_age': '54', 'avg_age': '39', 'min_age': '24'}]" + "[{'credit_score': 'high',\n", + " 'count_age': '6',\n", + " 'count_distinct_age': '2',\n", + " 'min_age': '34',\n", + " 'max_age': '61',\n", + " 'avg_age': '47.5',\n", + " 'std_age': '14.7885090526',\n", + " 'fist_value_age': '34',\n", + " 'to_list_age': [b'34', b'61']},\n", + " {'credit_score': 'medium',\n", + " 'count_age': '3',\n", + " 'count_distinct_age': '1',\n", + " 'min_age': '17',\n", + " 'max_age': '17',\n", + " 'avg_age': '17',\n", + " 'std_age': '0',\n", + " 'fist_value_age': '17',\n", + " 'to_list_age': [b'17']},\n", + " {'credit_score': 'low',\n", + " 'count_age': '6',\n", + " 'count_distinct_age': '2',\n", + " 'min_age': '24',\n", + " 'max_age': '54',\n", + " 'avg_age': '39',\n", + " 'std_age': '16.4316767252',\n", + " 'fist_value_age': '54',\n", + " 'to_list_age': [b'24', b'54']}]" ] }, - "execution_count": 37, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# TODO: check all operations these aren't working currently\n", - "# STDEV(age) as std_age\n", - "# FIRSTVALUE(age) as first_value_age\n", - "# COUNT(age) as count_age\n", - "\n", "sql_str = \"\"\"\n", " SELECT\n", " user,\n", + " COUNT(age) as count_age,\n", + " COUNT_DISTINCT(age) as count_distinct_age,\n", + " MIN(age) as min_age,\n", " MAX(age) as max_age,\n", " AVG(age) as avg_age,\n", - " MIN(age) as min_age,\n", + " STDEV(age) as std_age,\n", + " FIRST_VALUE(age) as fist_value_age,\n", + " ARRAY_AGG(age) as to_list_age\n", " FROM user_simple\n", " GROUP BY credit_score\n", " \"\"\"\n", @@ -560,29 +608,29 @@ { "data": { "text/plain": [ - "[{'vector_distance': '0.14079028368', 'user': 'joe'},\n", - " {'vector_distance': '0.14079028368', 'user': 'stacy'},\n", - " {'vector_distance': '0.222222208977', 'user': 'john'},\n", - " {'vector_distance': '0.222222208977', 'user': 'bill'},\n", - " {'vector_distance': '0.222222208977', 'user': 'mary'}]" + "[{'vector_distance': '0.10912925005', 'user': 'john'},\n", + " {'vector_distance': '0.10912925005', 'user': 'john'},\n", + " {'vector_distance': '0.10912925005', 'user': 'john'},\n", + " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", + " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", + " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", + " {'vector_distance': '0.14079028368', 'user': 'joe'},\n", + " {'vector_distance': '0.14079028368', 'user': 'joe'},\n", + " {'vector_distance': '0.14079028368', 'user': 'joe'},\n", + " {'vector_distance': '0.14407902956', 'user': 'stacy'}]" ] }, - "execution_count": 22, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# TODO: this also doesn't give me a means to specify what distance type I mean\n", - "# it should also support the pgvector type syntax\n", "sql_str = \"\"\"\n", " SELECT user, vector_distance(user_embedding, :vec) AS vector_distance\n", " FROM user_simple\n", " ORDER BY vector_distance ASC\n", " \"\"\"\n", - "\n", - "# pass vector as parameter\n", - "# TODO: I think this can function closer to the vector query\n", "vec = np.array([1, 1, 1], dtype=np.float32).tobytes()\n", "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", "\n", @@ -608,29 +656,30 @@ { "data": { "text/plain": [ - "[{'vector_distance': '0.352897465229', 'user': 'stacy'},\n", - " {'vector_distance': '0.352897465229', 'user': 'joe'},\n", + "[{'vector_distance': '0.352897465229', 'user': 'joe'},\n", " {'vector_distance': '0.164599537849', 'user': 'mary'},\n", - " {'vector_distance': '0.164599537849', 'user': 'bill'},\n", - " {'vector_distance': '0.164599537849', 'user': 'john'}]" + " {'vector_distance': '0.164599537849', 'user': 'mary'},\n", + " {'vector_distance': '0.164599537849', 'user': 'mary'},\n", + " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", + " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", + " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", + " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", + " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", + " {'vector_distance': '0.00608772039413', 'user': 'john'}]" ] }, - "execution_count": 24, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# TODO: this also doesn't give me a means to specify what distance type I mean\n", - "# it should also support the pgvector type syntax\n", "sql_str = \"\"\"\n", " SELECT user, cosine_distance(user_embedding, :vec) AS vector_distance\n", " FROM user_simple\n", " ORDER BY vector_distance DESC\n", " \"\"\"\n", "\n", - "# pass vector as parameter\n", - "# TODO: I think this can function closer to the vector query\n", "vec = np.array([0.5, 0.1, 0.5], dtype=np.float32).tobytes()\n", "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", "\n", @@ -661,7 +710,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -671,7 +720,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -681,7 +730,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ diff --git a/tests/integration/test_sql_redis.py b/tests/integration/test_sql_redis.py index edb6df4a..2739a792 100644 --- a/tests/integration/test_sql_redis.py +++ b/tests/integration/test_sql_redis.py @@ -712,14 +712,11 @@ def test_multiple_reducers(self, sql_index): assert "min_price" in result assert "max_price" in result - @pytest.mark.xfail( - reason="COUNT(DISTINCT) parsed but not correctly translated to COUNT_DISTINCTISH" - ) def test_count_distinct(self, sql_index): - """Test COUNT_DISTINCT reducer.""" + """Test COUNT_DISTINCT reducer using Redis-specific syntax.""" sql_query = SQLQuery( f""" - SELECT COUNT(DISTINCT category) as unique_categories + SELECT COUNT_DISTINCT(category) as unique_categories FROM {sql_index.name} """ ) @@ -730,7 +727,6 @@ def test_count_distinct(self, sql_index): # Should have 4 unique categories: electronics, books, accessories, stationery assert int(results[0]["unique_categories"]) == 4 - @pytest.mark.xfail(reason="STDDEV not yet supported in sql-redis parser") def test_stddev(self, sql_index): """Test STDDEV reducer.""" sql_query = SQLQuery( @@ -743,6 +739,9 @@ def test_stddev(self, sql_index): assert len(results) == 1 assert "price_stddev" in results[0] + # Verify it's a valid numeric value + stddev_value = float(results[0]["price_stddev"]) + assert stddev_value >= 0 # Standard deviation is always non-negative @pytest.mark.xfail(reason="QUANTILE not yet supported in sql-redis parser") def test_quantile(self, sql_index): @@ -758,12 +757,11 @@ def test_quantile(self, sql_index): assert len(results) == 1 assert "median_price" in results[0] - @pytest.mark.xfail(reason="TOLIST not yet supported in sql-redis parser") def test_tolist(self, sql_index): - """Test TOLIST reducer.""" + """Test TOLIST reducer via ARRAY_AGG SQL function.""" sql_query = SQLQuery( f""" - SELECT category, TOLIST(title) as titles + SELECT category, ARRAY_AGG(title) as titles FROM {sql_index.name} GROUP BY category """ @@ -773,8 +771,9 @@ def test_tolist(self, sql_index): assert len(results) > 0 for result in results: assert "titles" in result + # TOLIST returns a comma-separated string or list of values + assert result["titles"] is not None - @pytest.mark.xfail(reason="FIRST_VALUE not yet supported in sql-redis parser") def test_first_value(self, sql_index): """Test FIRST_VALUE reducer.""" sql_query = SQLQuery( @@ -789,6 +788,9 @@ def test_first_value(self, sql_index): assert len(results) > 0 for result in results: assert "first_title" in result + # Verify it's a non-empty string + assert isinstance(result["first_title"], str) + assert len(result["first_title"]) > 0 class TestSQLQueryIntegration: From 1c5dcdbe11d112f7704e0f1e753f697b7ec12ac2 Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Wed, 28 Jan 2026 16:39:24 -0500 Subject: [PATCH 4/9] support quantile --- tests/integration/test_sql_redis.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_sql_redis.py b/tests/integration/test_sql_redis.py index 2739a792..202a003d 100644 --- a/tests/integration/test_sql_redis.py +++ b/tests/integration/test_sql_redis.py @@ -743,7 +743,6 @@ def test_stddev(self, sql_index): stddev_value = float(results[0]["price_stddev"]) assert stddev_value >= 0 # Standard deviation is always non-negative - @pytest.mark.xfail(reason="QUANTILE not yet supported in sql-redis parser") def test_quantile(self, sql_index): """Test QUANTILE reducer.""" sql_query = SQLQuery( @@ -756,6 +755,9 @@ def test_quantile(self, sql_index): assert len(results) == 1 assert "median_price" in results[0] + # Verify it's a valid numeric value + median_value = float(results[0]["median_price"]) + assert median_value >= 0 def test_tolist(self, sql_index): """Test TOLIST reducer via ARRAY_AGG SQL function.""" From 3ee26e26b87da76881ec3b9d61052e7ab9d7b79b Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Wed, 28 Jan 2026 16:43:23 -0500 Subject: [PATCH 5/9] update example --- docs/user_guide/12_sql_to_redis_queries.ipynb | 67 +++++++++++-------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/docs/user_guide/12_sql_to_redis_queries.ipynb b/docs/user_guide/12_sql_to_redis_queries.ipynb index d55505b0..31c14cb0 100644 --- a/docs/user_guide/12_sql_to_redis_queries.ipynb +++ b/docs/user_guide/12_sql_to_redis_queries.ipynb @@ -220,7 +220,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['user_simple_docs:01KG36PSJRQC864FDJM7HDDE6V', 'user_simple_docs:01KG36PSJTC8329D806J7KW0PJ', 'user_simple_docs:01KG36PSJTC8329D806J7KW0PK', 'user_simple_docs:01KG36PSJTC8329D806J7KW0PM', 'user_simple_docs:01KG36PSJTC8329D806J7KW0PN']\n" + "['user_simple_docs:01KG38752729EAJBSF2DFHPVY2', 'user_simple_docs:01KG3875289XV6SX004PQP2WHP', 'user_simple_docs:01KG3875289XV6SX004PQP2WHQ', 'user_simple_docs:01KG387529AZ7SSJ47NMM4XSPB', 'user_simple_docs:01KG387529AZ7SSJ47NMM4XSPC']\n" ] } ], @@ -301,14 +301,14 @@ "text/plain": [ "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'}]" ] }, "execution_count": 10, @@ -347,6 +347,8 @@ "text/plain": [ "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", @@ -391,14 +393,14 @@ "text/plain": [ "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'}]" ] }, "execution_count": 12, @@ -436,6 +438,8 @@ "data": { "text/plain": [ "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", @@ -481,6 +485,7 @@ "text/plain": [ "[{'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" ] }, @@ -516,49 +521,52 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.AGGREGATE user_simple \"*\" LOAD 2 age credit_score GROUPBY 1 @credit_score REDUCE COUNT 0 AS count_age REDUCE COUNT_DISTINCT 1 @age AS count_distinct_age REDUCE MIN 1 @age AS min_age REDUCE MAX 1 @age AS max_age REDUCE AVG 1 @age AS avg_age REDUCE STDDEV 1 @age AS std_age REDUCE FIRST_VALUE 1 @age AS fist_value_age REDUCE TOLIST 1 @age AS to_list_age\n" + "Resulting redis query: FT.AGGREGATE user_simple \"*\" LOAD 2 age credit_score GROUPBY 1 @credit_score REDUCE COUNT 0 AS count_age REDUCE COUNT_DISTINCT 1 @age AS count_distinct_age REDUCE MIN 1 @age AS min_age REDUCE MAX 1 @age AS max_age REDUCE AVG 1 @age AS avg_age REDUCE STDDEV 1 @age AS std_age REDUCE FIRST_VALUE 1 @age AS fist_value_age REDUCE TOLIST 1 @age AS to_list_age REDUCE QUANTILE 2 @age 0.99 AS quantile_age\n" ] }, { "data": { "text/plain": [ "[{'credit_score': 'high',\n", - " 'count_age': '6',\n", + " 'count_age': '8',\n", " 'count_distinct_age': '2',\n", " 'min_age': '34',\n", " 'max_age': '61',\n", " 'avg_age': '47.5',\n", - " 'std_age': '14.7885090526',\n", + " 'std_age': '14.4321070633',\n", " 'fist_value_age': '34',\n", - " 'to_list_age': [b'34', b'61']},\n", + " 'to_list_age': [b'34', b'61'],\n", + " 'quantile_age': '61'},\n", " {'credit_score': 'medium',\n", - " 'count_age': '3',\n", + " 'count_age': '4',\n", " 'count_distinct_age': '1',\n", " 'min_age': '17',\n", " 'max_age': '17',\n", " 'avg_age': '17',\n", " 'std_age': '0',\n", " 'fist_value_age': '17',\n", - " 'to_list_age': [b'17']},\n", + " 'to_list_age': [b'17'],\n", + " 'quantile_age': '17'},\n", " {'credit_score': 'low',\n", - " 'count_age': '6',\n", + " 'count_age': '8',\n", " 'count_distinct_age': '2',\n", " 'min_age': '24',\n", " 'max_age': '54',\n", " 'avg_age': '39',\n", - " 'std_age': '16.4316767252',\n", + " 'std_age': '16.0356745147',\n", " 'fist_value_age': '54',\n", - " 'to_list_age': [b'24', b'54']}]" + " 'to_list_age': [b'24', b'54'],\n", + " 'quantile_age': '54'}]" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -574,7 +582,8 @@ " AVG(age) as avg_age,\n", " STDEV(age) as std_age,\n", " FIRST_VALUE(age) as fist_value_age,\n", - " ARRAY_AGG(age) as to_list_age\n", + " ARRAY_AGG(age) as to_list_age,\n", + " QUANTILE(age, 0.99) as quantile_age\n", " FROM user_simple\n", " GROUP BY credit_score\n", " \"\"\"\n", @@ -595,7 +604,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -611,13 +620,13 @@ "[{'vector_distance': '0.10912925005', 'user': 'john'},\n", " {'vector_distance': '0.10912925005', 'user': 'john'},\n", " {'vector_distance': '0.10912925005', 'user': 'john'},\n", + " {'vector_distance': '0.10912925005', 'user': 'john'},\n", + " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", " {'vector_distance': '0.14079028368', 'user': 'joe'},\n", - " {'vector_distance': '0.14079028368', 'user': 'joe'},\n", - " {'vector_distance': '0.14079028368', 'user': 'joe'},\n", - " {'vector_distance': '0.14407902956', 'user': 'stacy'}]" + " {'vector_distance': '0.14079028368', 'user': 'joe'}]" ] }, "execution_count": 16, @@ -643,7 +652,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -656,13 +665,13 @@ { "data": { "text/plain": [ - "[{'vector_distance': '0.352897465229', 'user': 'joe'},\n", - " {'vector_distance': '0.164599537849', 'user': 'mary'},\n", - " {'vector_distance': '0.164599537849', 'user': 'mary'},\n", + "[{'vector_distance': '0.164599537849', 'user': 'mary'},\n", " {'vector_distance': '0.164599537849', 'user': 'mary'},\n", " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", + " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", + " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", " {'vector_distance': '0.00608772039413', 'user': 'john'}]" From b0dc8ab022fccb0277282de4156e82daa5abbb9a Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Tue, 3 Feb 2026 10:34:06 -0500 Subject: [PATCH 6/9] phrase wip --- docs/user_guide/12_sql_to_redis_queries.ipynb | 496 ++++++++++++++---- tests/integration/test_sql_redis.py | 88 ++++ 2 files changed, 493 insertions(+), 91 deletions(-) diff --git a/docs/user_guide/12_sql_to_redis_queries.ipynb b/docs/user_guide/12_sql_to_redis_queries.ipynb index 31c14cb0..0d70fafe 100644 --- a/docs/user_guide/12_sql_to_redis_queries.ipynb +++ b/docs/user_guide/12_sql_to_redis_queries.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -39,10 +39,29 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading weights: 100%|██████████| 199/199 [00:00<00:00, 1515.30it/s, Materializing param=pooler.dense.weight] \n", + "MPNetModel LOAD REPORT from: sentence-transformers/all-mpnet-base-v2\n", + "Key | Status | | \n", + "------------------------+------------+--+-\n", + "embeddings.position_ids | UNEXPECTED | | \n", + "\n", + "Notes:\n", + "- UNEXPECTED\t:can be ignored when loading from different task/architecture; not ok if you expect identical arch.\n" + ] + } + ], "source": [ + "from redisvl.utils.vectorize import HFTextVectorizer\n", + "\n", + "hf = HFTextVectorizer()\n", + "\n", "schema = {\n", " \"index\": {\n", " \"name\": \"user_simple\",\n", @@ -51,20 +70,22 @@ " \"fields\": [\n", " {\"name\": \"user\", \"type\": \"tag\"},\n", " {\"name\": \"credit_score\", \"type\": \"tag\"},\n", - " {\"name\": \"job\", \"type\": \"text\"},\n", + " {\"name\": \"job\", \"type\": \"tag\"},\n", + " {\"name\": \"job_description\", \"type\": \"text\"},\n", " {\"name\": \"age\", \"type\": \"numeric\"},\n", + " ]\n", + "}\n", + "\n", " {\n", - " \"name\": \"user_embedding\",\n", + " \"name\": \"job_embedding\",\n", " \"type\": \"vector\",\n", " \"attrs\": {\n", - " \"dims\": 3,\n", + " \"dims\": len(hf.embed(\"get embed length\")),\n", " \"distance_metric\": \"cosine\",\n", " \"algorithm\": \"flat\",\n", " \"datatype\": \"float32\"\n", " }\n", - " }\n", - " ]\n", - "}" + " }" ] }, { @@ -84,50 +105,54 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "\n", - "\n", "data = [\n", " {\n", " 'user': 'john',\n", " 'age': 34,\n", - " 'job': 'engineer',\n", + " 'job': 'software engineer',\n", " 'credit_score': 'high',\n", - " 'user_embedding': np.array([0.4, 0.1, 0.5], dtype=np.float32).tobytes()\n", + " 'job_description': 'Designs, develops, and maintains software applications and systems.'\n", " },\n", " {\n", " 'user': 'bill',\n", " 'age': 54,\n", " 'job': 'engineer',\n", " 'credit_score': 'low',\n", - " 'user_embedding': np.array([0.3, 0.1, 0.5], dtype=np.float32).tobytes()\n", + " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.'\n", " },\n", " {\n", " 'user': 'mary',\n", " 'age': 24,\n", " 'job': 'doctor',\n", " 'credit_score': 'low',\n", - " 'user_embedding': np.array([0.1, 0.1, 0.5], dtype=np.float32).tobytes()\n", + " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.'\n", " },\n", " {\n", " 'user': 'joe',\n", " 'age': 17,\n", " 'job': 'dentist',\n", " 'credit_score': 'medium',\n", - " 'user_embedding': np.array([0.9, 0.9, 0.1], dtype=np.float32).tobytes()\n", - " }\n", - " ,\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.'\n", + " },\n", " {\n", " 'user': 'stacy',\n", " 'age': 61,\n", - " 'job': 'dentist',\n", + " 'job': 'project manager',\n", " 'credit_score': 'high',\n", - " 'user_embedding': np.array([0.9, 1.0, 0.1], dtype=np.float32).tobytes()\n", + " 'job_description': 'Plans, organizes, and oversees projects from inception to completion.'\n", " }\n", + "]\n", + "\n", + "data = [\n", + " { \n", + " **d,\n", + " \"user_embedding\": hf.embed(f\"{d['job_description']=} {d['job']=}\", as_buffer=True),\n", + " } \n", + " for d in data\n", "]" ] }, @@ -152,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -174,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -192,11 +217,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "index.create(overwrite=True)" + "index.create(overwrite=True, drop=True)" ] }, { @@ -213,14 +238,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['user_simple_docs:01KG38752729EAJBSF2DFHPVY2', 'user_simple_docs:01KG3875289XV6SX004PQP2WHP', 'user_simple_docs:01KG3875289XV6SX004PQP2WHQ', 'user_simple_docs:01KG387529AZ7SSJ47NMM4XSPB', 'user_simple_docs:01KG387529AZ7SSJ47NMM4XSPC']\n" + "['user_simple_docs:01KG8A5H6C8KQEZK0H56NEXEVG', 'user_simple_docs:01KG8A5H6GCPS1HD6S2FKWMBRS', 'user_simple_docs:01KG8A5H6H51C0X27K7RP5YKV5', 'user_simple_docs:01KG8A5H6H51C0X27K7RP5YKV6', 'user_simple_docs:01KG8A5H6H51C0X27K7RP5YKV7']\n" ] } ], @@ -242,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -266,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -275,7 +300,7 @@ "'FT.SEARCH user_simple \"@age:[(17 +inf]\" RETURN 4 user credit_score job age'" ] }, - "execution_count": 9, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -293,25 +318,25 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + "[{'user': 'john',\n", + " 'credit_score': 'high',\n", + " 'job': 'software engineer',\n", + " 'age': '34'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'}]" + " {'user': 'stacy',\n", + " 'credit_score': 'high',\n", + " 'job': 'project manager',\n", + " 'age': '61'}]" ] }, - "execution_count": 10, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -332,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -345,17 +370,17 @@ { "data": { "text/plain": [ - "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'}]" + "[{'user': 'john',\n", + " 'credit_score': 'high',\n", + " 'job': 'software engineer',\n", + " 'age': '34'},\n", + " {'user': 'stacy',\n", + " 'credit_score': 'high',\n", + " 'job': 'project manager',\n", + " 'age': '61'}]" ] }, - "execution_count": 11, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -365,7 +390,7 @@ " SELECT user, credit_score, job, age\n", " FROM user_simple\n", " WHERE age > 17 and credit_score = 'high'\n", - " \"\"\"\n", + "\"\"\"\n", "\n", "# could maybe be nice to set a connection string at the class level\n", "# this would deviate from our other query like classes though so thinking on it\n", @@ -378,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -393,17 +418,17 @@ "text/plain": [ "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", + " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", + " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'}]" ] }, - "execution_count": 12, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -424,39 +449,37 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"@user:{mary|john}\" RETURN 4 user credit_score job age\n" + "Resulting redis query: FT.SEARCH user_simple \"@job:{software engineer|engineer|pancake tester}\" RETURN 4 user credit_score job age\n" ] }, { "data": { "text/plain": [ - "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'}]" + "[{'user': 'john',\n", + " 'credit_score': 'high',\n", + " 'job': 'software engineer',\n", + " 'age': '34'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" ] }, - "execution_count": 13, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# job is a tag field therefore this syntax works\n", "sql_str = \"\"\"\n", " SELECT user, credit_score, job, age\n", " FROM user_simple\n", - " WHERE user IN ('mary', 'john')\n", + " WHERE job IN ('software engineer', 'engineer', 'pancake tester')\n", " \"\"\"\n", "\n", "# could maybe be nice to set a connection string at the class level\n", @@ -468,9 +491,247 @@ "results" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Text based searches\n", + "\n", + "See [the docs](https://redis.io/docs/latest/develop/ai/search-and-query/query/full-text/) for available text queries in Redis.\n", + "\n", + "For more on exact matching see [here](https://redis.io/docs/latest/develop/ai/search-and-query/query/exact-match/)" + ] + }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:sci*\" RETURN 5 user credit_score job job_description age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'bill',\n", + " 'credit_score': 'low',\n", + " 'job': 'engineer',\n", + " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.',\n", + " 'age': '54'}]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Prefix\n", + "sql_str = \"\"\"\n", + " SELECT user, credit_score, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = 'sci*'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:*care\" RETURN 5 user credit_score job job_description age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'joe',\n", + " 'credit_score': 'medium',\n", + " 'job': 'dentist',\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n", + " 'age': '17'}]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Suffix\n", + "sql_str = \"\"\"\n", + " SELECT user, credit_score, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = '*care'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:%diagnose%\" RETURN 5 user credit_score job job_description age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'mary',\n", + " 'credit_score': 'low',\n", + " 'job': 'doctor',\n", + " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions.',\n", + " 'age': '24'}]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fuzzy\n", + "sql_str = \"\"\"\n", + " SELECT user, credit_score, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = '%diagnose%'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"healthcare including\"\" RETURN 5 user credit_score job job_description age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'joe',\n", + " 'credit_score': 'medium',\n", + " 'job': 'dentist',\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n", + " 'age': '17'}]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Phrase no stop words\n", + "sql_str = \"\"\"\n", + " SELECT user, credit_score, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = 'healthcare including'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"diagnosing $and treating\"\" RETURN 5 user credit_score job job_description age\n" + ] + }, + { + "ename": "ResponseError", + "evalue": "Syntax error at offset 29 near and", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mResponseError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[32]\u001b[39m\u001b[32m, line 14\u001b[39m\n\u001b[32m 12\u001b[39m redis_query = sql_query.redis_query_string(redis_url=\u001b[33m\"\u001b[39m\u001b[33mredis://localhost:6379\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 13\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mResulting redis query: \u001b[39m\u001b[33m\"\u001b[39m, redis_query)\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m results = \u001b[43mindex\u001b[49m\u001b[43m.\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql_query\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 15\u001b[39m results\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:1171\u001b[39m, in \u001b[36mSearchIndex.query\u001b[39m\u001b[34m(self, query)\u001b[39m\n\u001b[32m 1169\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._aggregate(query)\n\u001b[32m 1170\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(query, SQLQuery):\n\u001b[32m-> \u001b[39m\u001b[32m1171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sql_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1172\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(query, HybridQuery):\n\u001b[32m 1173\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._hybrid_search(query)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:925\u001b[39m, in \u001b[36mSearchIndex._sql_query\u001b[39m\u001b[34m(self, sql_query)\u001b[39m\n\u001b[32m 922\u001b[39m executor = Executor(\u001b[38;5;28mself\u001b[39m._redis_client, registry)\n\u001b[32m 924\u001b[39m \u001b[38;5;66;03m# Execute the query with any params\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m925\u001b[39m result = \u001b[43mexecutor\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql_query\u001b[49m\u001b[43m.\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m=\u001b[49m\u001b[43msql_query\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 927\u001b[39m \u001b[38;5;66;03m# Decode bytes to strings in the results (Redis may return bytes)\u001b[39;00m\n\u001b[32m 928\u001b[39m decoded_rows = []\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/sql-redis/sql_redis/executor.py:64\u001b[39m, in \u001b[36mExecutor.execute\u001b[39m\u001b[34m(self, sql, params)\u001b[39m\n\u001b[32m 61\u001b[39m cmd[i] = vector_param\n\u001b[32m 63\u001b[39m \u001b[38;5;66;03m# Execute command\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m64\u001b[39m raw_result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexecute_command\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43mcmd\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 66\u001b[39m \u001b[38;5;66;03m# Parse result based on command type\u001b[39;00m\n\u001b[32m 67\u001b[39m count = raw_result[\u001b[32m0\u001b[39m] \u001b[38;5;28;01mif\u001b[39;00m raw_result \u001b[38;5;28;01melse\u001b[39;00m \u001b[32m0\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:657\u001b[39m, in \u001b[36mRedis.execute_command\u001b[39m\u001b[34m(self, *args, **options)\u001b[39m\n\u001b[32m 656\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mexecute_command\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args, **options):\n\u001b[32m--> \u001b[39m\u001b[32m657\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_execute_command\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:668\u001b[39m, in \u001b[36mRedis._execute_command\u001b[39m\u001b[34m(self, *args, **options)\u001b[39m\n\u001b[32m 666\u001b[39m \u001b[38;5;28mself\u001b[39m.single_connection_lock.acquire()\n\u001b[32m 667\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m668\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconn\u001b[49m\u001b[43m.\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcall_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 669\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_send_command_parse_response\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 670\u001b[39m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcommand_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43moptions\u001b[49m\n\u001b[32m 671\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 672\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_close_connection\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconn\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 673\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 675\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 676\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m conn \u001b[38;5;129;01mand\u001b[39;00m conn.should_reconnect():\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/retry.py:116\u001b[39m, in \u001b[36mRetry.call_with_retry\u001b[39m\u001b[34m(self, do, fail, is_retryable)\u001b[39m\n\u001b[32m 114\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 115\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m116\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdo\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 117\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;28mself\u001b[39m._supported_errors \u001b[38;5;28;01mas\u001b[39;00m error:\n\u001b[32m 118\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_retryable \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_retryable(error):\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:669\u001b[39m, in \u001b[36mRedis._execute_command..\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 666\u001b[39m \u001b[38;5;28mself\u001b[39m.single_connection_lock.acquire()\n\u001b[32m 667\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 668\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m conn.retry.call_with_retry(\n\u001b[32m--> \u001b[39m\u001b[32m669\u001b[39m \u001b[38;5;28;01mlambda\u001b[39;00m: \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_send_command_parse_response\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 670\u001b[39m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcommand_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43moptions\u001b[49m\n\u001b[32m 671\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[32m 672\u001b[39m \u001b[38;5;28;01mlambda\u001b[39;00m _: \u001b[38;5;28mself\u001b[39m._close_connection(conn),\n\u001b[32m 673\u001b[39m )\n\u001b[32m 675\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 676\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m conn \u001b[38;5;129;01mand\u001b[39;00m conn.should_reconnect():\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:640\u001b[39m, in \u001b[36mRedis._send_command_parse_response\u001b[39m\u001b[34m(self, conn, command_name, *args, **options)\u001b[39m\n\u001b[32m 636\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 637\u001b[39m \u001b[33;03mSend a command and parse the response\u001b[39;00m\n\u001b[32m 638\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 639\u001b[39m conn.send_command(*args, **options)\n\u001b[32m--> \u001b[39m\u001b[32m640\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparse_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcommand_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:691\u001b[39m, in \u001b[36mRedis.parse_response\u001b[39m\u001b[34m(self, connection, command_name, **options)\u001b[39m\n\u001b[32m 689\u001b[39m options.pop(NEVER_DECODE)\n\u001b[32m 690\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m691\u001b[39m response = \u001b[43mconnection\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 692\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ResponseError:\n\u001b[32m 693\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m EMPTY_RESPONSE \u001b[38;5;129;01min\u001b[39;00m options:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/connection.py:1155\u001b[39m, in \u001b[36mAbstractConnection.read_response\u001b[39m\u001b[34m(self, disable_decoding, disconnect_on_error, push_request)\u001b[39m\n\u001b[32m 1153\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, ResponseError):\n\u001b[32m 1154\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1155\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m response\n\u001b[32m 1156\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 1157\u001b[39m \u001b[38;5;28;01mdel\u001b[39;00m response \u001b[38;5;66;03m# avoid creating ref cycles\u001b[39;00m\n", + "\u001b[31mResponseError\u001b[39m: Syntax error at offset 29 near and" + ] + } + ], + "source": [ + "# Phrase with stop words currently limitation of core Redis\n", + "sql_str = \"\"\"\n", + " SELECT user, credit_score, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = 'diagnosing and treating'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -486,10 +747,11 @@ "[{'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" ] }, - "execution_count": 14, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -521,7 +783,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -535,17 +797,17 @@ "data": { "text/plain": [ "[{'credit_score': 'high',\n", - " 'count_age': '8',\n", + " 'count_age': '10',\n", " 'count_distinct_age': '2',\n", " 'min_age': '34',\n", " 'max_age': '61',\n", " 'avg_age': '47.5',\n", - " 'std_age': '14.4321070633',\n", + " 'std_age': '14.2302494708',\n", " 'fist_value_age': '34',\n", " 'to_list_age': [b'34', b'61'],\n", " 'quantile_age': '61'},\n", " {'credit_score': 'medium',\n", - " 'count_age': '4',\n", + " 'count_age': '5',\n", " 'count_distinct_age': '1',\n", " 'min_age': '17',\n", " 'max_age': '17',\n", @@ -555,18 +817,18 @@ " 'to_list_age': [b'17'],\n", " 'quantile_age': '17'},\n", " {'credit_score': 'low',\n", - " 'count_age': '8',\n", + " 'count_age': '10',\n", " 'count_distinct_age': '2',\n", " 'min_age': '24',\n", " 'max_age': '54',\n", " 'avg_age': '39',\n", - " 'std_age': '16.0356745147',\n", - " 'fist_value_age': '54',\n", + " 'std_age': '15.8113883008',\n", + " 'fist_value_age': '24',\n", " 'to_list_age': [b'24', b'54'],\n", " 'quantile_age': '54'}]" ] }, - "execution_count": 21, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -604,7 +866,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -621,15 +883,15 @@ " {'vector_distance': '0.10912925005', 'user': 'john'},\n", " {'vector_distance': '0.10912925005', 'user': 'john'},\n", " {'vector_distance': '0.10912925005', 'user': 'john'},\n", + " {'vector_distance': '0.10912925005', 'user': 'john'},\n", " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", - " {'vector_distance': '0.14079028368', 'user': 'joe'},\n", - " {'vector_distance': '0.14079028368', 'user': 'joe'}]" + " {'vector_distance': '0.121690034866', 'user': 'bill'}]" ] }, - "execution_count": 16, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -652,7 +914,59 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"(@credit_score:{low})=>[KNN 10 @user_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 3 user credit_score vector_distance SORTBY vector_distance ASC\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", + " {'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", + " {'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", + " {'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", + " {'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", + " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'},\n", + " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'},\n", + " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'},\n", + " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'},\n", + " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'}]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, credit_score, vector_distance(user_embedding, :vec) AS vector_distance\n", + " FROM user_simple\n", + " WHERE credit_score = 'low'\n", + " ORDER BY vector_distance ASC\n", + " \"\"\"\n", + "vec = np.array([1, 1, 1], dtype=np.float32).tobytes()\n", + "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", + "\n", + "# they want full string\n", + "# i.e. \"bank of america\" \n", + "# \"bank\" | \"of\" | \"america\"\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [ { @@ -665,8 +979,7 @@ { "data": { "text/plain": [ - "[{'vector_distance': '0.164599537849', 'user': 'mary'},\n", - " {'vector_distance': '0.164599537849', 'user': 'mary'},\n", + "[{'vector_distance': '0.0295688509941', 'user': 'bill'},\n", " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", @@ -674,10 +987,11 @@ " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", + " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", " {'vector_distance': '0.00608772039413', 'user': 'john'}]" ] }, - "execution_count": 17, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -719,7 +1033,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -729,7 +1043,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -739,7 +1053,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ diff --git a/tests/integration/test_sql_redis.py b/tests/integration/test_sql_redis.py index 202a003d..4cf97f6e 100644 --- a/tests/integration/test_sql_redis.py +++ b/tests/integration/test_sql_redis.py @@ -456,6 +456,94 @@ def test_text_not_equals(self, sql_index): # Results should not contain 'laptop' as a primary match assert "laptop" not in result["title"].lower() + def test_text_prefix(self, sql_index): + """Test text prefix search with wildcard (term*).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'lap*' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match titles starting with "lap" (e.g., "laptop") + assert "lap" in result["title"].lower() + + def test_text_suffix(self, sql_index): + """Test text suffix search with wildcard (*term).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE name = '*book' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match names ending with "book" (e.g., "Python Book") + assert "book" in result["name"].lower() + + def test_text_fuzzy(self, sql_index): + """Test text fuzzy search with Levenshtein distance (%term%).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = '%laptap%' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should fuzzy match "laptop" even with typo "laptap" + assert "laptop" in result["title"].lower() + + def test_text_phrase(self, sql_index): + """Test text phrase search (multi-word exact phrase).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'gaming laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match exact phrase "gaming laptop" + title_lower = result["title"].lower() + assert "gaming" in title_lower and "laptop" in title_lower + + @pytest.mark.skip( + reason="Phrase search with stop words is a Redis limitation - " + "stop words like 'and' are stripped during query parsing" + ) + def test_text_phrase_with_stopword(self, sql_index): + """Test text phrase search containing stop words. + + This test is skipped because Redis strips stop words (like 'and', 'the', 'is') + during query parsing, which causes phrase searches containing them to fail. + See: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/stopwords/ + """ + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'laptop and keyboard' + """ + ) + results = sql_index.query(sql_query) + + # This would fail due to Redis stop word handling + assert len(results) >= 0 + @pytest.mark.xfail(reason="Text IN operator not yet supported in sql-redis") def test_text_in(self, sql_index): """Test text IN operator (multiple term search).""" From 41e14a15d663d683047998294e654ee75427d321 Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Tue, 3 Feb 2026 14:33:13 -0500 Subject: [PATCH 7/9] cut for 0.1.1 --- docs/api/query.rst | 20 + docs/user_guide/12_sql_to_redis_queries.ipynb | 472 +++---- docs/user_guide/index.md | 1 + pyproject.toml | 6 +- redisvl/index/index.py | 2 +- .../integration/test_redis_cluster_support.py | 2 + tests/integration/test_search_index.py | 1 + ...st_sql_redis.py => test_sql_redis_hash.py} | 64 +- tests/integration/test_sql_redis_json.py | 1134 +++++++++++++++++ uv.lock | 21 +- 10 files changed, 1414 insertions(+), 309 deletions(-) rename tests/integration/{test_sql_redis.py => test_sql_redis_hash.py} (94%) create mode 100644 tests/integration/test_sql_redis_json.py diff --git a/docs/api/query.rst b/docs/api/query.rst index 22616007..70a29bf3 100644 --- a/docs/api/query.rst +++ b/docs/api/query.rst @@ -231,3 +231,23 @@ MultiVectorQuery :inherited-members: :show-inheritance: :exclude-members: add_filter,get_args,highlight,return_field,summarize + + +SQLQuery +======== + +.. currentmodule:: redisvl.query + + +.. autoclass:: SQLQuery + :members: + :show-inheritance: + +.. note:: + SQLQuery requires the optional ``sql-redis`` package. Install with: + ``pip install redisvl[sql-redis]`` + +.. note:: + SQLQuery translates SQL SELECT statements into Redis FT.SEARCH or FT.AGGREGATE commands. + The SQL syntax supports WHERE clauses, field selection, ordering, and parameterized queries + for vector similarity searches. diff --git a/docs/user_guide/12_sql_to_redis_queries.ipynb b/docs/user_guide/12_sql_to_redis_queries.ipynb index 0d70fafe..7f26d08d 100644 --- a/docs/user_guide/12_sql_to_redis_queries.ipynb +++ b/docs/user_guide/12_sql_to_redis_queries.ipynb @@ -9,25 +9,9 @@ "\n", "It may arise that you want to use SQL-like queries to interact with your Redis vector database. While Redis does not natively support SQL, the `redisvl` library provides a `SQLQuery` class that allows you to write SQL-like queries that are automatically translated into Redis queries.\n", "\n", - "The `SQLQuery` class is a wrapper around the `sql-redis` package, which provides a SQL-to-Redis query translator. The `sql-redis` package is not installed by default with `redisvl`, so you will need to install with the optional syntax:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "zsh:1: no matches found: redisvl[sql]\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "%pip install redisvl[sql]" + "The `SQLQuery` class is a wrapper around the [`sql-redis`](https://pypi.org/project/sql-redis/) package, which provides a SQL-to-Redis query translator. The `sql-redis` package is not installed by default with `redisvl`, so you will need to install with the optional syntax:\n", + "\n", + "`pip install redisvl[sql-redis]` or, if running locally, you can `uv sync --all-extras --all-groups`" ] }, { @@ -39,24 +23,9 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 83, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading weights: 100%|██████████| 199/199 [00:00<00:00, 1515.30it/s, Materializing param=pooler.dense.weight] \n", - "MPNetModel LOAD REPORT from: sentence-transformers/all-mpnet-base-v2\n", - "Key | Status | | \n", - "------------------------+------------+--+-\n", - "embeddings.position_ids | UNEXPECTED | | \n", - "\n", - "Notes:\n", - "- UNEXPECTED\t:can be ignored when loading from different task/architecture; not ok if you expect identical arch.\n" - ] - } - ], + "outputs": [], "source": [ "from redisvl.utils.vectorize import HFTextVectorizer\n", "\n", @@ -66,16 +35,14 @@ " \"index\": {\n", " \"name\": \"user_simple\",\n", " \"prefix\": \"user_simple_docs\",\n", + " \"storage_type\": \"json\",\n", " },\n", " \"fields\": [\n", " {\"name\": \"user\", \"type\": \"tag\"},\n", - " {\"name\": \"credit_score\", \"type\": \"tag\"},\n", + " {\"name\": \"region\", \"type\": \"tag\"},\n", " {\"name\": \"job\", \"type\": \"tag\"},\n", " {\"name\": \"job_description\", \"type\": \"text\"},\n", " {\"name\": \"age\", \"type\": \"numeric\"},\n", - " ]\n", - "}\n", - "\n", " {\n", " \"name\": \"job_embedding\",\n", " \"type\": \"vector\",\n", @@ -85,7 +52,9 @@ " \"algorithm\": \"flat\",\n", " \"datatype\": \"float32\"\n", " }\n", - " }" + " }\n", + " ]\n", + "}" ] }, { @@ -93,19 +62,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Sample Dataset Preparation\n", - "\n", - "Below, create a mock dataset with `user`, `job`, `age`, `credit_score`, and\n", - "`user_embedding` fields. The `user_embedding` vectors are synthetic examples\n", - "for demonstration purposes.\n", - "\n", - "For more information on creating real-world embeddings, refer to this\n", - "[article](https://mlops.community/vector-similarity-search-from-basics-to-production/)." + "## Create sample dataset" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 84, "metadata": {}, "outputs": [], "source": [ @@ -114,35 +76,35 @@ " 'user': 'john',\n", " 'age': 34,\n", " 'job': 'software engineer',\n", - " 'credit_score': 'high',\n", + " 'region': 'us-west',\n", " 'job_description': 'Designs, develops, and maintains software applications and systems.'\n", " },\n", " {\n", " 'user': 'bill',\n", " 'age': 54,\n", " 'job': 'engineer',\n", - " 'credit_score': 'low',\n", + " 'region': 'us-central',\n", " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.'\n", " },\n", " {\n", " 'user': 'mary',\n", " 'age': 24,\n", " 'job': 'doctor',\n", - " 'credit_score': 'low',\n", + " 'region': 'us-central',\n", " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.'\n", " },\n", " {\n", " 'user': 'joe',\n", - " 'age': 17,\n", + " 'age': 27,\n", " 'job': 'dentist',\n", - " 'credit_score': 'medium',\n", + " 'region': 'us-east',\n", " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.'\n", " },\n", " {\n", " 'user': 'stacy',\n", " 'age': 61,\n", " 'job': 'project manager',\n", - " 'credit_score': 'high',\n", + " 'region': 'us-west',\n", " 'job_description': 'Plans, organizes, and oversees projects from inception to completion.'\n", " }\n", "]\n", @@ -150,7 +112,7 @@ "data = [\n", " { \n", " **d,\n", - " \"user_embedding\": hf.embed(f\"{d['job_description']=} {d['job']=}\", as_buffer=True),\n", + " \"job_embedding\": hf.embed(f\"{d['job_description']=} {d['job']=}\"),\n", " } \n", " for d in data\n", "]" @@ -177,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 85, "metadata": {}, "outputs": [], "source": [ @@ -199,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 86, "metadata": {}, "outputs": [], "source": [ @@ -217,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 87, "metadata": {}, "outputs": [], "source": [ @@ -238,14 +200,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 88, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['user_simple_docs:01KG8A5H6C8KQEZK0H56NEXEVG', 'user_simple_docs:01KG8A5H6GCPS1HD6S2FKWMBRS', 'user_simple_docs:01KG8A5H6H51C0X27K7RP5YKV5', 'user_simple_docs:01KG8A5H6H51C0X27K7RP5YKV6', 'user_simple_docs:01KG8A5H6H51C0X27K7RP5YKV7']\n" + "['user_simple_docs:01KGJDNVDAZ9A6XY69Q6BCRMX5', 'user_simple_docs:01KGJDNVE024SKZB3804R8B6C3', 'user_simple_docs:01KGJDNVEKW1BV836X4926K7S3', 'user_simple_docs:01KGJDNVF62TBDR9Y9V4WZ59ZG', 'user_simple_docs:01KGJDNVFS0BXM7GF6DA66JHSN']\n" ] } ], @@ -267,14 +229,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "from redisvl.query import SQLQuery\n", "\n", "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, age\n", + " SELECT user, region, job, age\n", " FROM user_simple\n", " WHERE age > 17\n", " \"\"\"\n", @@ -291,16 +253,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 90, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'FT.SEARCH user_simple \"@age:[(17 +inf]\" RETURN 4 user credit_score job age'" + "'FT.SEARCH user_simple \"@age:[(17 +inf]\" RETURN 4 user region job age'" ] }, - "execution_count": 14, + "execution_count": 90, "metadata": {}, "output_type": "execute_result" } @@ -318,25 +280,23 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'user': 'john',\n", - " 'credit_score': 'high',\n", + " 'region': 'us-west',\n", " 'job': 'software engineer',\n", " 'age': '34'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'stacy',\n", - " 'credit_score': 'high',\n", - " 'job': 'project manager',\n", - " 'age': '61'}]" + " {'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'mary', 'region': 'us-central', 'job': 'doctor', 'age': '24'},\n", + " {'user': 'joe', 'region': 'us-east', 'job': 'dentist', 'age': '27'},\n", + " {'user': 'stacy', 'region': 'us-west', 'job': 'project manager', 'age': '61'}]" ] }, - "execution_count": 15, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -357,39 +317,36 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"@age:[(17 +inf] @credit_score:{high}\" RETURN 4 user credit_score job age\n" + "Resulting redis query: FT.SEARCH user_simple \"@age:[(17 +inf] @region:{us\\-west}\" RETURN 4 user region job age\n" ] }, { "data": { "text/plain": [ "[{'user': 'john',\n", - " 'credit_score': 'high',\n", + " 'region': 'us-west',\n", " 'job': 'software engineer',\n", " 'age': '34'},\n", - " {'user': 'stacy',\n", - " 'credit_score': 'high',\n", - " 'job': 'project manager',\n", - " 'age': '61'}]" + " {'user': 'stacy', 'region': 'us-west', 'job': 'project manager', 'age': '61'}]" ] }, - "execution_count": 16, + "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, age\n", + " SELECT user, region, job, age\n", " FROM user_simple\n", - " WHERE age > 17 and credit_score = 'high'\n", + " WHERE age > 17 and region = 'us-west'\n", "\"\"\"\n", "\n", "# could maybe be nice to set a connection string at the class level\n", @@ -403,41 +360,38 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 93, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"((@credit_score:{high})|(@credit_score:{low}))\" RETURN 4 user credit_score job age\n" + "Resulting redis query: FT.SEARCH user_simple \"((@region:{us\\-west})|(@region:{us\\-central}))\" RETURN 4 user region job age\n" ] }, { "data": { "text/plain": [ - "[{'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'stacy', 'credit_score': 'high', 'job': 'dentist', 'age': '61'},\n", - " {'user': 'john', 'credit_score': 'high', 'job': 'engineer', 'age': '34'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'},\n", - " {'user': 'mary', 'credit_score': 'low', 'job': 'doctor', 'age': '24'}]" + "[{'user': 'john',\n", + " 'region': 'us-west',\n", + " 'job': 'software engineer',\n", + " 'age': '34'},\n", + " {'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'stacy', 'region': 'us-west', 'job': 'project manager', 'age': '61'},\n", + " {'user': 'mary', 'region': 'us-central', 'job': 'doctor', 'age': '24'}]" ] }, - "execution_count": 33, + "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, age\n", + " SELECT user, region, job, age\n", " FROM user_simple\n", - " WHERE credit_score = 'high' or credit_score = 'low'\n", + " WHERE region = 'us-west' or region = 'us-central'\n", " \"\"\"\n", "\n", "sql_query = SQLQuery(sql_str)\n", @@ -449,27 +403,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 94, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"@job:{software engineer|engineer|pancake tester}\" RETURN 4 user credit_score job age\n" + "Resulting redis query: FT.SEARCH user_simple \"@job:{software engineer|engineer|pancake tester}\" RETURN 4 user region job age\n" ] }, { "data": { "text/plain": [ "[{'user': 'john',\n", - " 'credit_score': 'high',\n", + " 'region': 'us-west',\n", " 'job': 'software engineer',\n", " 'age': '34'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" + " {'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'}]" ] }, - "execution_count": 21, + "execution_count": 94, "metadata": {}, "output_type": "execute_result" } @@ -477,7 +431,7 @@ "source": [ "# job is a tag field therefore this syntax works\n", "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, age\n", + " SELECT user, region, job, age\n", " FROM user_simple\n", " WHERE job IN ('software engineer', 'engineer', 'pancake tester')\n", " \"\"\"\n", @@ -504,27 +458,27 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 95, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"@job_description:sci*\" RETURN 5 user credit_score job job_description age\n" + "Resulting redis query: FT.SEARCH user_simple \"@job_description:sci*\" RETURN 5 user region job job_description age\n" ] }, { "data": { "text/plain": [ "[{'user': 'bill',\n", - " 'credit_score': 'low',\n", + " 'region': 'us-central',\n", " 'job': 'engineer',\n", " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.',\n", " 'age': '54'}]" ] }, - "execution_count": 22, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } @@ -532,7 +486,7 @@ "source": [ "# Prefix\n", "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, job_description, age\n", + " SELECT user, region, job, job_description, age\n", " FROM user_simple\n", " WHERE job_description = 'sci*'\n", "\"\"\"\n", @@ -548,27 +502,32 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 96, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"@job_description:*care\" RETURN 5 user credit_score job job_description age\n" + "Resulting redis query: FT.SEARCH user_simple \"@job_description:*care\" RETURN 5 user region job job_description age\n" ] }, { "data": { "text/plain": [ - "[{'user': 'joe',\n", - " 'credit_score': 'medium',\n", + "[{'user': 'mary',\n", + " 'region': 'us-central',\n", + " 'job': 'doctor',\n", + " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.',\n", + " 'age': '24'},\n", + " {'user': 'joe',\n", + " 'region': 'us-east',\n", " 'job': 'dentist',\n", " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n", - " 'age': '17'}]" + " 'age': '27'}]" ] }, - "execution_count": 23, + "execution_count": 96, "metadata": {}, "output_type": "execute_result" } @@ -576,7 +535,7 @@ "source": [ "# Suffix\n", "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, job_description, age\n", + " SELECT user, region, job, job_description, age\n", " FROM user_simple\n", " WHERE job_description = '*care'\n", "\"\"\"\n", @@ -592,27 +551,27 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 97, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"@job_description:%diagnose%\" RETURN 5 user credit_score job job_description age\n" + "Resulting redis query: FT.SEARCH user_simple \"@job_description:%diagnose%\" RETURN 5 user region job job_description age\n" ] }, { "data": { "text/plain": [ "[{'user': 'mary',\n", - " 'credit_score': 'low',\n", + " 'region': 'us-central',\n", " 'job': 'doctor',\n", - " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions.',\n", + " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.',\n", " 'age': '24'}]" ] }, - "execution_count": 26, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } @@ -620,7 +579,7 @@ "source": [ "# Fuzzy\n", "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, job_description, age\n", + " SELECT user, region, job, job_description, age\n", " FROM user_simple\n", " WHERE job_description = '%diagnose%'\n", "\"\"\"\n", @@ -636,27 +595,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 98, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"healthcare including\"\" RETURN 5 user credit_score job job_description age\n" + "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"healthcare including\"\" RETURN 5 user region job job_description age\n" ] }, { "data": { "text/plain": [ "[{'user': 'joe',\n", - " 'credit_score': 'medium',\n", + " 'region': 'us-east',\n", " 'job': 'dentist',\n", " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n", - " 'age': '17'}]" + " 'age': '27'}]" ] }, - "execution_count": 28, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } @@ -664,7 +623,7 @@ "source": [ "# Phrase no stop words\n", "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, job_description, age\n", + " SELECT user, region, job, job_description, age\n", " FROM user_simple\n", " WHERE job_description = 'healthcare including'\n", "\"\"\"\n", @@ -680,42 +639,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 99, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"diagnosing $and treating\"\" RETURN 5 user credit_score job job_description age\n" + "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"diagnosing treating\"\" RETURN 5 user region job job_description age\n" ] }, { - "ename": "ResponseError", - "evalue": "Syntax error at offset 29 near and", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mResponseError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[32]\u001b[39m\u001b[32m, line 14\u001b[39m\n\u001b[32m 12\u001b[39m redis_query = sql_query.redis_query_string(redis_url=\u001b[33m\"\u001b[39m\u001b[33mredis://localhost:6379\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 13\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mResulting redis query: \u001b[39m\u001b[33m\"\u001b[39m, redis_query)\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m results = \u001b[43mindex\u001b[49m\u001b[43m.\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql_query\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 15\u001b[39m results\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:1171\u001b[39m, in \u001b[36mSearchIndex.query\u001b[39m\u001b[34m(self, query)\u001b[39m\n\u001b[32m 1169\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._aggregate(query)\n\u001b[32m 1170\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(query, SQLQuery):\n\u001b[32m-> \u001b[39m\u001b[32m1171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sql_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1172\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(query, HybridQuery):\n\u001b[32m 1173\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._hybrid_search(query)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/redisvl/index/index.py:925\u001b[39m, in \u001b[36mSearchIndex._sql_query\u001b[39m\u001b[34m(self, sql_query)\u001b[39m\n\u001b[32m 922\u001b[39m executor = Executor(\u001b[38;5;28mself\u001b[39m._redis_client, registry)\n\u001b[32m 924\u001b[39m \u001b[38;5;66;03m# Execute the query with any params\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m925\u001b[39m result = \u001b[43mexecutor\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql_query\u001b[49m\u001b[43m.\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m=\u001b[49m\u001b[43msql_query\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 927\u001b[39m \u001b[38;5;66;03m# Decode bytes to strings in the results (Redis may return bytes)\u001b[39;00m\n\u001b[32m 928\u001b[39m decoded_rows = []\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/sql-redis/sql_redis/executor.py:64\u001b[39m, in \u001b[36mExecutor.execute\u001b[39m\u001b[34m(self, sql, params)\u001b[39m\n\u001b[32m 61\u001b[39m cmd[i] = vector_param\n\u001b[32m 63\u001b[39m \u001b[38;5;66;03m# Execute command\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m64\u001b[39m raw_result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexecute_command\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43mcmd\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 66\u001b[39m \u001b[38;5;66;03m# Parse result based on command type\u001b[39;00m\n\u001b[32m 67\u001b[39m count = raw_result[\u001b[32m0\u001b[39m] \u001b[38;5;28;01mif\u001b[39;00m raw_result \u001b[38;5;28;01melse\u001b[39;00m \u001b[32m0\u001b[39m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:657\u001b[39m, in \u001b[36mRedis.execute_command\u001b[39m\u001b[34m(self, *args, **options)\u001b[39m\n\u001b[32m 656\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mexecute_command\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args, **options):\n\u001b[32m--> \u001b[39m\u001b[32m657\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_execute_command\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:668\u001b[39m, in \u001b[36mRedis._execute_command\u001b[39m\u001b[34m(self, *args, **options)\u001b[39m\n\u001b[32m 666\u001b[39m \u001b[38;5;28mself\u001b[39m.single_connection_lock.acquire()\n\u001b[32m 667\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m668\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconn\u001b[49m\u001b[43m.\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcall_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 669\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_send_command_parse_response\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 670\u001b[39m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcommand_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43moptions\u001b[49m\n\u001b[32m 671\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 672\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_close_connection\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconn\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 673\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 675\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 676\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m conn \u001b[38;5;129;01mand\u001b[39;00m conn.should_reconnect():\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/retry.py:116\u001b[39m, in \u001b[36mRetry.call_with_retry\u001b[39m\u001b[34m(self, do, fail, is_retryable)\u001b[39m\n\u001b[32m 114\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 115\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m116\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdo\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 117\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;28mself\u001b[39m._supported_errors \u001b[38;5;28;01mas\u001b[39;00m error:\n\u001b[32m 118\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_retryable \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_retryable(error):\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:669\u001b[39m, in \u001b[36mRedis._execute_command..\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 666\u001b[39m \u001b[38;5;28mself\u001b[39m.single_connection_lock.acquire()\n\u001b[32m 667\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 668\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m conn.retry.call_with_retry(\n\u001b[32m--> \u001b[39m\u001b[32m669\u001b[39m \u001b[38;5;28;01mlambda\u001b[39;00m: \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_send_command_parse_response\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 670\u001b[39m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcommand_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43moptions\u001b[49m\n\u001b[32m 671\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[32m 672\u001b[39m \u001b[38;5;28;01mlambda\u001b[39;00m _: \u001b[38;5;28mself\u001b[39m._close_connection(conn),\n\u001b[32m 673\u001b[39m )\n\u001b[32m 675\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 676\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m conn \u001b[38;5;129;01mand\u001b[39;00m conn.should_reconnect():\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:640\u001b[39m, in \u001b[36mRedis._send_command_parse_response\u001b[39m\u001b[34m(self, conn, command_name, *args, **options)\u001b[39m\n\u001b[32m 636\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 637\u001b[39m \u001b[33;03mSend a command and parse the response\u001b[39;00m\n\u001b[32m 638\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 639\u001b[39m conn.send_command(*args, **options)\n\u001b[32m--> \u001b[39m\u001b[32m640\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparse_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcommand_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/client.py:691\u001b[39m, in \u001b[36mRedis.parse_response\u001b[39m\u001b[34m(self, connection, command_name, **options)\u001b[39m\n\u001b[32m 689\u001b[39m options.pop(NEVER_DECODE)\n\u001b[32m 690\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m691\u001b[39m response = \u001b[43mconnection\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 692\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ResponseError:\n\u001b[32m 693\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m EMPTY_RESPONSE \u001b[38;5;129;01min\u001b[39;00m options:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/redis-vl-python/.venv/lib/python3.11/site-packages/redis/connection.py:1155\u001b[39m, in \u001b[36mAbstractConnection.read_response\u001b[39m\u001b[34m(self, disable_decoding, disconnect_on_error, push_request)\u001b[39m\n\u001b[32m 1153\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, ResponseError):\n\u001b[32m 1154\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1155\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m response\n\u001b[32m 1156\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 1157\u001b[39m \u001b[38;5;28;01mdel\u001b[39;00m response \u001b[38;5;66;03m# avoid creating ref cycles\u001b[39;00m\n", - "\u001b[31mResponseError\u001b[39m: Syntax error at offset 29 near and" - ] + "data": { + "text/plain": [ + "[{'user': 'joe',\n", + " 'region': 'us-east',\n", + " 'job': 'dentist',\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n", + " 'age': '27'}]" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "# Phrase with stop words currently limitation of core Redis\n", "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, job_description, age\n", + " SELECT user, region, job, job_description, age\n", " FROM user_simple\n", " WHERE job_description = 'diagnosing and treating'\n", "\"\"\"\n", @@ -731,34 +683,30 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 100, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"@age:[40 60]\" RETURN 4 user credit_score job age\n" + "Resulting redis query: FT.SEARCH user_simple \"@age:[40 60]\" RETURN 4 user region job age\n" ] }, { "data": { "text/plain": [ - "[{'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'},\n", - " {'user': 'bill', 'credit_score': 'low', 'job': 'engineer', 'age': '54'}]" + "[{'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'}]" ] }, - "execution_count": 35, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql_str = \"\"\"\n", - " SELECT user, credit_score, job, age\n", + " SELECT user, region, job, age\n", " FROM user_simple\n", " WHERE age BETWEEN 40 and 60\n", " \"\"\"\n", @@ -783,52 +731,52 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 101, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.AGGREGATE user_simple \"*\" LOAD 2 age credit_score GROUPBY 1 @credit_score REDUCE COUNT 0 AS count_age REDUCE COUNT_DISTINCT 1 @age AS count_distinct_age REDUCE MIN 1 @age AS min_age REDUCE MAX 1 @age AS max_age REDUCE AVG 1 @age AS avg_age REDUCE STDDEV 1 @age AS std_age REDUCE FIRST_VALUE 1 @age AS fist_value_age REDUCE TOLIST 1 @age AS to_list_age REDUCE QUANTILE 2 @age 0.99 AS quantile_age\n" + "Resulting redis query: FT.AGGREGATE user_simple \"*\" LOAD 2 age region GROUPBY 1 @region REDUCE COUNT 0 AS count_age REDUCE COUNT_DISTINCT 1 @age AS count_distinct_age REDUCE MIN 1 @age AS min_age REDUCE MAX 1 @age AS max_age REDUCE AVG 1 @age AS avg_age REDUCE STDDEV 1 @age AS std_age REDUCE FIRST_VALUE 1 @age AS fist_value_age REDUCE TOLIST 1 @age AS to_list_age REDUCE QUANTILE 2 @age 0.99 AS quantile_age\n" ] }, { "data": { "text/plain": [ - "[{'credit_score': 'high',\n", - " 'count_age': '10',\n", + "[{'region': 'us-west',\n", + " 'count_age': '2',\n", " 'count_distinct_age': '2',\n", " 'min_age': '34',\n", " 'max_age': '61',\n", " 'avg_age': '47.5',\n", - " 'std_age': '14.2302494708',\n", + " 'std_age': '19.091883092',\n", " 'fist_value_age': '34',\n", " 'to_list_age': [b'34', b'61'],\n", " 'quantile_age': '61'},\n", - " {'credit_score': 'medium',\n", - " 'count_age': '5',\n", - " 'count_distinct_age': '1',\n", - " 'min_age': '17',\n", - " 'max_age': '17',\n", - " 'avg_age': '17',\n", - " 'std_age': '0',\n", - " 'fist_value_age': '17',\n", - " 'to_list_age': [b'17'],\n", - " 'quantile_age': '17'},\n", - " {'credit_score': 'low',\n", - " 'count_age': '10',\n", + " {'region': 'us-central',\n", + " 'count_age': '2',\n", " 'count_distinct_age': '2',\n", " 'min_age': '24',\n", " 'max_age': '54',\n", " 'avg_age': '39',\n", - " 'std_age': '15.8113883008',\n", - " 'fist_value_age': '24',\n", + " 'std_age': '21.2132034356',\n", + " 'fist_value_age': '54',\n", " 'to_list_age': [b'24', b'54'],\n", - " 'quantile_age': '54'}]" + " 'quantile_age': '54'},\n", + " {'region': 'us-east',\n", + " 'count_age': '1',\n", + " 'count_distinct_age': '1',\n", + " 'min_age': '27',\n", + " 'max_age': '27',\n", + " 'avg_age': '27',\n", + " 'std_age': '0',\n", + " 'fist_value_age': '27',\n", + " 'to_list_age': [b'27'],\n", + " 'quantile_age': '27'}]" ] }, - "execution_count": 44, + "execution_count": 101, "metadata": {}, "output_type": "execute_result" } @@ -847,7 +795,7 @@ " ARRAY_AGG(age) as to_list_age,\n", " QUANTILE(age, 0.99) as quantile_age\n", " FROM user_simple\n", - " GROUP BY credit_score\n", + " GROUP BY region\n", " \"\"\"\n", "\n", "sql_query = SQLQuery(sql_str)\n", @@ -866,97 +814,56 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 102, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"*=>[KNN 10 @user_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 2 user vector_distance SORTBY vector_distance ASC\n" + "Resulting redis query: FT.SEARCH user_simple \"*=>[KNN 10 @job_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 4 user job job_description vector_distance SORTBY vector_distance ASC\n" ] }, { "data": { "text/plain": [ - "[{'vector_distance': '0.10912925005', 'user': 'john'},\n", - " {'vector_distance': '0.10912925005', 'user': 'john'},\n", - " {'vector_distance': '0.10912925005', 'user': 'john'},\n", - " {'vector_distance': '0.10912925005', 'user': 'john'},\n", - " {'vector_distance': '0.10912925005', 'user': 'john'},\n", - " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", - " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", - " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", - " {'vector_distance': '0.121690034866', 'user': 'bill'},\n", - " {'vector_distance': '0.121690034866', 'user': 'bill'}]" + "[{'vector_distance': '0.823510587215',\n", + " 'user': 'bill',\n", + " 'job': 'engineer',\n", + " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.'},\n", + " {'vector_distance': '0.965160429478',\n", + " 'user': 'john',\n", + " 'job': 'software engineer',\n", + " 'job_description': 'Designs, develops, and maintains software applications and systems.'},\n", + " {'vector_distance': '1.00401365757',\n", + " 'user': 'mary',\n", + " 'job': 'doctor',\n", + " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.'},\n", + " {'vector_distance': '1.0062687397',\n", + " 'user': 'stacy',\n", + " 'job': 'project manager',\n", + " 'job_description': 'Plans, organizes, and oversees projects from inception to completion.'},\n", + " {'vector_distance': '1.01110625267',\n", + " 'user': 'joe',\n", + " 'job': 'dentist',\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.'}]" ] }, - "execution_count": 37, + "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql_str = \"\"\"\n", - " SELECT user, vector_distance(user_embedding, :vec) AS vector_distance\n", + " SELECT user, job, job_description, cosine_distance(job_embedding, :vec) AS vector_distance\n", " FROM user_simple\n", " ORDER BY vector_distance ASC\n", " \"\"\"\n", - "vec = np.array([1, 1, 1], dtype=np.float32).tobytes()\n", - "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", "\n", - "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", - "print(\"Resulting redis query: \", redis_query)\n", - "results = index.query(sql_query)\n", - "\n", - "results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Resulting redis query: FT.SEARCH user_simple \"(@credit_score:{low})=>[KNN 10 @user_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 3 user credit_score vector_distance SORTBY vector_distance ASC\n" - ] - }, - { - "data": { - "text/plain": [ - "[{'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", - " {'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", - " {'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", - " {'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", - " {'vector_distance': '0.121690034866', 'user': 'bill', 'credit_score': 'low'},\n", - " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'},\n", - " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'},\n", - " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'},\n", - " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'},\n", - " {'vector_distance': '0.222222208977', 'user': 'mary', 'credit_score': 'low'}]" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sql_str = \"\"\"\n", - " SELECT user, credit_score, vector_distance(user_embedding, :vec) AS vector_distance\n", - " FROM user_simple\n", - " WHERE credit_score = 'low'\n", - " ORDER BY vector_distance ASC\n", - " \"\"\"\n", - "vec = np.array([1, 1, 1], dtype=np.float32).tobytes()\n", + "vec = hf.embed(\"looking for someone to use base principles to solve problems\", as_buffer=True)\n", "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", "\n", - "# they want full string\n", - "# i.e. \"bank of america\" \n", - "# \"bank\" | \"of\" | \"america\"\n", "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", "print(\"Resulting redis query: \", redis_query)\n", "results = index.query(sql_query)\n", @@ -966,44 +873,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 103, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resulting redis query: FT.SEARCH user_simple \"*=>[KNN 10 @user_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 2 user vector_distance SORTBY vector_distance DESC\n" + "Resulting redis query: FT.SEARCH user_simple \"(@region:{us\\-central})=>[KNN 10 @job_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 3 user region vector_distance SORTBY vector_distance ASC\n" ] }, { "data": { "text/plain": [ - "[{'vector_distance': '0.0295688509941', 'user': 'bill'},\n", - " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", - " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", - " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", - " {'vector_distance': '0.0295688509941', 'user': 'bill'},\n", - " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", - " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", - " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", - " {'vector_distance': '0.00608772039413', 'user': 'john'},\n", - " {'vector_distance': '0.00608772039413', 'user': 'john'}]" + "[{'vector_distance': '0.823510587215', 'user': 'bill', 'region': 'us-central'},\n", + " {'vector_distance': '1.00401365757', 'user': 'mary', 'region': 'us-central'}]" ] }, - "execution_count": 38, + "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sql_str = \"\"\"\n", - " SELECT user, cosine_distance(user_embedding, :vec) AS vector_distance\n", + " SELECT user, region, cosine_distance(job_embedding, :vec) AS vector_distance\n", " FROM user_simple\n", - " ORDER BY vector_distance DESC\n", + " WHERE region = 'us-central'\n", + " ORDER BY vector_distance ASC\n", " \"\"\"\n", "\n", - "vec = np.array([0.5, 0.1, 0.5], dtype=np.float32).tobytes()\n", + "vec = hf.embed(\"looking for someone to use base principles to solve problems\", as_buffer=True)\n", "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", "\n", "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", @@ -1033,32 +933,54 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 104, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "5" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Clear all data from Redis associated with the index\n", - "# await index.clear()" + "index.clear()" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 105, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Butm the index is still in place\n", - "# await index.exists()" + "# But the index is still in place\n", + "index.exists()" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "# Remove / delete the index in its entirety\n", - "# await index.delete()" + "index.delete()" ] } ], diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 602983b5..f89fe51e 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -23,4 +23,5 @@ User guides provide helpful resources for using RedisVL and its different compon 09_svs_vamana 10_embeddings_cache 11_advanced_queries +12_sql_to_redis_queries ``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6eb3c14a..4469c9f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "redisvl" -version = "0.13.2" +version = "0.14.0" description = "Python client library and CLI for using Redis as a vector database" authors = [{ name = "Redis Inc.", email = "applied.ai@redis.com" }] requires-python = ">=3.9.2,<3.14" @@ -51,8 +51,8 @@ bedrock = [ pillow = [ "pillow>=11.3.0", ] -sql = [ - "sql-redis @ file:///Users/robert.shelton/Documents/sql-redis/dist/sql_redis-0.1.0-py3-none-any.whl", +sql-redis = [ + "sql-redis>=0.1.1", ] [project.urls] diff --git a/redisvl/index/index.py b/redisvl/index/index.py index 2ed8a42a..40ce8ae0 100644 --- a/redisvl/index/index.py +++ b/redisvl/index/index.py @@ -936,7 +936,7 @@ def _sql_query(self, sql_query: SQLQuery) -> List[Dict[str, Any]]: except ImportError: raise ImportError( "sql-redis is required for SQL query support. " - "Install it with: pip install redisvl[sql]" + "Install it with: pip install redisvl[sql-redis]" ) registry = SchemaRegistry(self._redis_client) diff --git a/tests/integration/test_redis_cluster_support.py b/tests/integration/test_redis_cluster_support.py index 80b82420..0d18dea3 100644 --- a/tests/integration/test_redis_cluster_support.py +++ b/tests/integration/test_redis_cluster_support.py @@ -89,6 +89,7 @@ def test_search_index_cluster_info(redis_cluster_url): finally: index.delete(drop=True) + @pytest.mark.requires_cluster @pytest.mark.asyncio async def test_async_search_index_cluster_info(redis_cluster_url): @@ -110,6 +111,7 @@ async def test_async_search_index_cluster_info(redis_cluster_url): await index.delete(drop=True) await client.aclose() + @pytest.mark.requires_cluster @pytest.mark.asyncio async def test_async_search_index_client(redis_cluster_url): diff --git a/tests/integration/test_search_index.py b/tests/integration/test_search_index.py index ae64a229..ebfedbe7 100644 --- a/tests/integration/test_search_index.py +++ b/tests/integration/test_search_index.py @@ -304,6 +304,7 @@ def test_search_index_delete(index): assert not index.exists() assert index.name not in convert_bytes(index.client.execute_command("FT._LIST")) + @pytest.mark.parametrize("num_docs", [0, 1, 5, 10, 2042]) def test_search_index_clear(index, num_docs): index.create(overwrite=True, drop=True) diff --git a/tests/integration/test_sql_redis.py b/tests/integration/test_sql_redis_hash.py similarity index 94% rename from tests/integration/test_sql_redis.py rename to tests/integration/test_sql_redis_hash.py index 4cf97f6e..b15b2d6a 100644 --- a/tests/integration/test_sql_redis.py +++ b/tests/integration/test_sql_redis_hash.py @@ -150,6 +150,15 @@ def sql_index(redis_url, worker_id): "category": "stationery", "tags": "bestseller", }, + { + "title": "Laptop and Keyboard Bundle", + "name": "Bundle Pack", + "price": 999, + "stock": 15, + "rating": 4.7, + "category": "electronics", + "tags": "featured,sale", + }, ] index.load(products) @@ -521,28 +530,49 @@ def test_text_phrase(self, sql_index): title_lower = result["title"].lower() assert "gaming" in title_lower and "laptop" in title_lower - @pytest.mark.skip( - reason="Phrase search with stop words is a Redis limitation - " - "stop words like 'and' are stripped during query parsing" - ) def test_text_phrase_with_stopword(self, sql_index): """Test text phrase search containing stop words. - This test is skipped because Redis strips stop words (like 'and', 'the', 'is') - during query parsing, which causes phrase searches containing them to fail. + Redis does not index stop words (like 'and', 'the', 'is') by default. + The sql-redis library works around this by automatically stripping + stop words from phrase searches and emitting a warning. See: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/stopwords/ """ - sql_query = SQLQuery( - f""" - SELECT title, name - FROM {sql_index.name} - WHERE title = 'laptop and keyboard' - """ - ) - results = sql_index.query(sql_query) - - # This would fail due to Redis stop word handling - assert len(results) >= 0 + import warnings + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'laptop and keyboard' + """ + ) + results = sql_index.query(sql_query) + + # Should find the "Laptop and Keyboard Bundle" product + assert len(results) >= 1 + # Verify at least one result contains both "laptop" and "keyboard" + found_match = False + for result in results: + title_lower = result["title"].lower() + if "laptop" in title_lower and "keyboard" in title_lower: + found_match = True + break + assert found_match, "Expected to find a result with 'laptop' and 'keyboard'" + + # Verify a warning was emitted about stopword removal + stopword_warnings = [ + warning + for warning in w + if "Stopwords" in str(warning.message) + and "and" in str(warning.message).lower() + ] + assert ( + len(stopword_warnings) >= 1 + ), "Expected a warning about stopword removal" @pytest.mark.xfail(reason="Text IN operator not yet supported in sql-redis") def test_text_in(self, sql_index): diff --git a/tests/integration/test_sql_redis_json.py b/tests/integration/test_sql_redis_json.py new file mode 100644 index 00000000..d96c1ab8 --- /dev/null +++ b/tests/integration/test_sql_redis_json.py @@ -0,0 +1,1134 @@ +"""Integration tests for SQLQuery class. + +These tests verify that SQLQuery can translate SQL-like syntax +into proper Redis queries and return expected results. +""" + +import uuid + +import pytest + +from redisvl.index import SearchIndex +from redisvl.query import SQLQuery + + +@pytest.fixture +def sql_index(redis_url, worker_id): + """Create a products index for SQL query testing.""" + unique_id = str(uuid.uuid4())[:8] + index_name = f"sql_products_{worker_id}_{unique_id}" + + index = SearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": f"product_{worker_id}_{unique_id}", + "storage_type": "json", + }, + "fields": [ + {"name": "title", "type": "text", "attrs": {"sortable": True}}, + {"name": "name", "type": "text", "attrs": {"sortable": True}}, + {"name": "price", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "stock", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "rating", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "category", "type": "tag", "attrs": {"sortable": True}}, + {"name": "tags", "type": "tag"}, + ], + }, + redis_url=redis_url, + ) + + index.create(overwrite=True) + + # Load test data + products = [ + { + "title": "Gaming laptop Pro", + "name": "Gaming Laptop", + "price": 899, + "stock": 10, + "rating": 4.5, + "category": "electronics", + "tags": "sale,featured", + }, + { + "title": "Budget laptop Basic", + "name": "Budget Laptop", + "price": 499, + "stock": 25, + "rating": 3.8, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Premium laptop Ultra", + "name": "Premium Laptop", + "price": 1299, + "stock": 5, + "rating": 4.9, + "category": "electronics", + "tags": "featured", + }, + { + "title": "Python Programming", + "name": "Python Book", + "price": 45, + "stock": 100, + "rating": 4.7, + "category": "books", + "tags": "bestseller", + }, + { + "title": "Redis in Action", + "name": "Redis Book", + "price": 55, + "stock": 50, + "rating": 4.6, + "category": "books", + "tags": "featured", + }, + { + "title": "Data Science Guide", + "name": "DS Book", + "price": 65, + "stock": 30, + "rating": 4.4, + "category": "books", + "tags": "sale", + }, + { + "title": "Wireless Mouse", + "name": "Mouse", + "price": 29, + "stock": 200, + "rating": 4.2, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Mechanical Keyboard", + "name": "Keyboard", + "price": 149, + "stock": 75, + "rating": 4.6, + "category": "electronics", + "tags": "featured", + }, + { + "title": "USB Hub", + "name": "Hub", + "price": 25, + "stock": 150, + "rating": 3.9, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Monitor Stand", + "name": "Stand", + "price": 89, + "stock": 40, + "rating": 4.1, + "category": "accessories", + "tags": "sale,featured", + }, + { + "title": "Desk Lamp", + "name": "Lamp", + "price": 35, + "stock": 80, + "rating": 4.0, + "category": "accessories", + "tags": "sale", + }, + { + "title": "Notebook Set", + "name": "Notebooks", + "price": 15, + "stock": 300, + "rating": 4.3, + "category": "stationery", + "tags": "bestseller", + }, + { + "title": "Laptop and Keyboard Bundle", + "name": "Bundle Pack", + "price": 999, + "stock": 15, + "rating": 4.7, + "category": "electronics", + "tags": "featured,sale", + }, + ] + + index.load(products) + + yield index + + # Cleanup + index.delete(drop=True) + + +class TestSQLQueryBasic: + """Tests for basic SQL SELECT queries.""" + + def test_import_sql_query(self): + """Verify SQLQuery can be imported from redisvl.query.""" + from redisvl.query import SQLQuery + + assert SQLQuery is not None + + def test_select_all_fields(self, sql_index): + """Test SELECT * returns all fields.""" + sql_query = SQLQuery(f"SELECT * FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) > 0 + # Verify results contain expected fields + assert "title" in results[0] + assert "price" in results[0] + + def test_select_specific_fields(self, sql_index): + """Test SELECT with specific field list.""" + sql_query = SQLQuery(f"SELECT title, price FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) > 0 + # Results should contain requested fields + assert "title" in results[0] + assert "price" in results[0] + + def test_redis_query_string_with_client(self, sql_index): + """Test redis_query_string() with redis_client returns the Redis command string.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + + # Get the Redis command string using redis_client + redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client) + + # Verify it's a valid FT.SEARCH command + assert redis_cmd.startswith("FT.SEARCH") + assert sql_index.name in redis_cmd + assert "electronics" in redis_cmd + + def test_redis_query_string_with_url(self, sql_index, redis_url): + """Test redis_query_string() with redis_url returns the Redis command string.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + + # Get the Redis command string using redis_url + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify it's a valid FT.SEARCH command + assert redis_cmd.startswith("FT.SEARCH") + assert sql_index.name in redis_cmd + assert "electronics" in redis_cmd + + def test_redis_query_string_aggregate(self, sql_index): + """Test redis_query_string() returns FT.AGGREGATE for aggregation queries.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count + FROM {sql_index.name} + GROUP BY category + """ + ) + + redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client) + + # Verify it's a valid FT.AGGREGATE command + assert redis_cmd.startswith("FT.AGGREGATE") + assert sql_index.name in redis_cmd + assert "GROUPBY" in redis_cmd + + +class TestSQLQueryWhere: + """Tests for SQL WHERE clause filtering.""" + + def test_where_tag_equals(self, sql_index): + """Test WHERE with tag field equality.""" + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] == "electronics" + + def test_where_numeric_comparison(self, sql_index): + """Test WHERE with numeric field comparison.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price < 50 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) < 50 + + def test_where_combined_and(self, sql_index): + """Test WHERE with AND combining multiple conditions.""" + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'electronics' AND price < 100 + """ + ) + results = sql_index.query(sql_query) + + for result in results: + assert result["category"] == "electronics" + assert float(result["price"]) < 100 + + def test_where_numeric_range(self, sql_index): + """Test WHERE with numeric range (BETWEEN equivalent).""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price >= 25 AND price <= 50 + """ + ) + results = sql_index.query(sql_query) + + for result in results: + price = float(result["price"]) + assert 25 <= price <= 50 + + +class TestSQLQueryTagOperators: + """Tests for SQL tag field operators.""" + + def test_tag_not_equals(self, sql_index): + """Test tag != operator.""" + sql_query = SQLQuery( + f""" + SELECT title, category + FROM {sql_index.name} + WHERE category != 'electronics' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] != "electronics" + + def test_tag_in(self, sql_index): + """Test tag IN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, category + FROM {sql_index.name} + WHERE category IN ('books', 'accessories') + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] in ("books", "accessories") + + +class TestSQLQueryNumericOperators: + """Tests for SQL numeric field operators.""" + + def test_numeric_greater_than(self, sql_index): + """Test numeric > operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price > 100 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) > 100 + + def test_numeric_equals(self, sql_index): + """Test numeric = operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price = 45 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert float(result["price"]) == 45 + + def test_numeric_not_equals(self, sql_index): + """Test numeric != operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price != 45 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) != 45 + + @pytest.mark.xfail(reason="Numeric IN operator not yet supported in sql-redis") + def test_numeric_in(self, sql_index): + """Test numeric IN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price IN (45, 55, 65) + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert float(result["price"]) in (45, 55, 65) + + def test_numeric_between(self, sql_index): + """Test numeric BETWEEN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price BETWEEN 40 AND 60 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + price = float(result["price"]) + assert 40 <= price <= 60 + + +class TestSQLQueryTextOperators: + """Tests for SQL text field operators.""" + + def test_text_equals(self, sql_index): + """Test text = operator (full-text search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert "laptop" in result["title"].lower() + + def test_text_not_equals(self, sql_index): + """Test text != operator (negated full-text search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title != 'laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + # Results should not contain 'laptop' as a primary match + assert "laptop" not in result["title"].lower() + + def test_text_prefix(self, sql_index): + """Test text prefix search with wildcard (term*).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'lap*' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match titles starting with "lap" (e.g., "laptop") + assert "lap" in result["title"].lower() + + def test_text_suffix(self, sql_index): + """Test text suffix search with wildcard (*term).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE name = '*book' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match names ending with "book" (e.g., "Python Book") + assert "book" in result["name"].lower() + + def test_text_fuzzy(self, sql_index): + """Test text fuzzy search with Levenshtein distance (%term%).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = '%laptap%' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should fuzzy match "laptop" even with typo "laptap" + assert "laptop" in result["title"].lower() + + def test_text_phrase(self, sql_index): + """Test text phrase search (multi-word exact phrase).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'gaming laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match exact phrase "gaming laptop" + title_lower = result["title"].lower() + assert "gaming" in title_lower and "laptop" in title_lower + + def test_text_phrase_with_stopword(self, sql_index): + """Test text phrase search containing stop words. + + Redis does not index stop words (like 'and', 'the', 'is') by default. + The sql-redis library works around this by automatically stripping + stop words from phrase searches and emitting a warning. + See: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/stopwords/ + """ + import warnings + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'laptop and keyboard' + """ + ) + results = sql_index.query(sql_query) + + # Should find the "Laptop and Keyboard Bundle" product + assert len(results) >= 1 + # Verify at least one result contains both "laptop" and "keyboard" + found_match = False + for result in results: + title_lower = result["title"].lower() + if "laptop" in title_lower and "keyboard" in title_lower: + found_match = True + break + assert found_match, "Expected to find a result with 'laptop' and 'keyboard'" + + # Verify a warning was emitted about stopword removal + stopword_warnings = [ + warning + for warning in w + if "Stopwords" in str(warning.message) + and "and" in str(warning.message).lower() + ] + assert ( + len(stopword_warnings) >= 1 + ), "Expected a warning about stopword removal" + + @pytest.mark.xfail(reason="Text IN operator not yet supported in sql-redis") + def test_text_in(self, sql_index): + """Test text IN operator (multiple term search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title IN ('Python', 'Redis') + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + title_lower = result["title"].lower() + assert "python" in title_lower or "redis" in title_lower + + +class TestSQLQueryOrderBy: + """Tests for SQL ORDER BY clause.""" + + def test_order_by_asc(self, sql_index): + """Test ORDER BY ascending.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + ORDER BY price ASC + """ + ) + results = sql_index.query(sql_query) + + prices = [float(r["price"]) for r in results] + assert prices == sorted(prices) + + def test_order_by_desc(self, sql_index): + """Test ORDER BY descending.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + ORDER BY price DESC + """ + ) + results = sql_index.query(sql_query) + + prices = [float(r["price"]) for r in results] + assert prices == sorted(prices, reverse=True) + + +class TestSQLQueryLimit: + """Tests for SQL LIMIT and OFFSET clauses.""" + + def test_limit(self, sql_index): + """Test LIMIT clause.""" + sql_query = SQLQuery(f"SELECT title FROM {sql_index.name} LIMIT 3") + results = sql_index.query(sql_query) + + assert len(results) == 3 + + def test_limit_with_offset(self, sql_index): + """Test LIMIT with OFFSET for pagination.""" + # First page + sql_query1 = SQLQuery( + f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 0" + ) + results1 = sql_index.query(sql_query1) + + # Second page + sql_query2 = SQLQuery( + f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 3" + ) + results2 = sql_index.query(sql_query2) + + assert len(results1) == 3 + assert len(results2) == 3 + # Pages should have different results + titles1 = {r["title"] for r in results1} + titles2 = {r["title"] for r in results2} + assert titles1.isdisjoint(titles2) + + +class TestSQLQueryAggregation: + """Tests for SQL aggregation (GROUP BY, COUNT, AVG, etc.).""" + + def test_count_all(self, sql_index): + """Test COUNT(*) aggregation.""" + sql_query = SQLQuery(f"SELECT COUNT(*) as total FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert int(results[0]["total"]) == 12 # 12 products in test data + + def test_group_by_with_count(self, sql_index): + """Test GROUP BY with COUNT.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + # Should have groups for electronics, books, accessories, stationery + categories = {r["category"] for r in results} + assert "electronics" in categories + assert "books" in categories + + def test_group_by_with_avg(self, sql_index): + """Test GROUP BY with AVG.""" + sql_query = SQLQuery( + f""" + SELECT category, AVG(price) as avg_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + # All results should have category and avg_price + for result in results: + assert "category" in result + assert "avg_price" in result + assert float(result["avg_price"]) > 0 + + def test_group_by_with_filter(self, sql_index): + """Test GROUP BY with WHERE filter.""" + sql_query = SQLQuery( + f""" + SELECT category, AVG(price) as avg_price + FROM {sql_index.name} + WHERE stock > 50 + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "avg_price" in result + + def test_group_by_with_sum(self, sql_index): + """Test GROUP BY with SUM reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, SUM(price) as total_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "total_price" in result + assert float(result["total_price"]) > 0 + + def test_group_by_with_min(self, sql_index): + """Test GROUP BY with MIN reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, MIN(price) as min_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "min_price" in result + assert float(result["min_price"]) > 0 + + def test_group_by_with_max(self, sql_index): + """Test GROUP BY with MAX reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, MAX(price) as max_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "max_price" in result + assert float(result["max_price"]) > 0 + + def test_global_sum(self, sql_index): + """Test global SUM aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT SUM(price) as total + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "total" in results[0] + assert float(results[0]["total"]) > 0 + + def test_global_min(self, sql_index): + """Test global MIN aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT MIN(price) as min_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "min_price" in results[0] + assert float(results[0]["min_price"]) > 0 + + def test_global_max(self, sql_index): + """Test global MAX aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT MAX(price) as max_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "max_price" in results[0] + assert float(results[0]["max_price"]) > 0 + + def test_multiple_reducers(self, sql_index): + """Test multiple reducers in a single query.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count, SUM(price) as total, AVG(price) as avg_price, MIN(price) as min_price, MAX(price) as max_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "count" in result + assert "total" in result + assert "avg_price" in result + assert "min_price" in result + assert "max_price" in result + + def test_count_distinct(self, sql_index): + """Test COUNT_DISTINCT reducer using Redis-specific syntax.""" + sql_query = SQLQuery( + f""" + SELECT COUNT_DISTINCT(category) as unique_categories + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "unique_categories" in results[0] + # Should have 4 unique categories: electronics, books, accessories, stationery + assert int(results[0]["unique_categories"]) == 4 + + def test_stddev(self, sql_index): + """Test STDDEV reducer.""" + sql_query = SQLQuery( + f""" + SELECT STDDEV(price) as price_stddev + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "price_stddev" in results[0] + # Verify it's a valid numeric value + stddev_value = float(results[0]["price_stddev"]) + assert stddev_value >= 0 # Standard deviation is always non-negative + + def test_quantile(self, sql_index): + """Test QUANTILE reducer.""" + sql_query = SQLQuery( + f""" + SELECT QUANTILE(price, 0.5) as median_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "median_price" in results[0] + # Verify it's a valid numeric value + median_value = float(results[0]["median_price"]) + assert median_value >= 0 + + def test_tolist(self, sql_index): + """Test TOLIST reducer via ARRAY_AGG SQL function.""" + sql_query = SQLQuery( + f""" + SELECT category, ARRAY_AGG(title) as titles + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "titles" in result + # TOLIST returns a comma-separated string or list of values + assert result["titles"] is not None + + def test_first_value(self, sql_index): + """Test FIRST_VALUE reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, FIRST_VALUE(title) as first_title + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "first_title" in result + # Verify it's a non-empty string + assert isinstance(result["first_title"], str) + assert len(result["first_title"]) > 0 + + +class TestSQLQueryIntegration: + """End-to-end integration tests matching proposal examples.""" + + def test_proposal_example_basic(self, sql_index): + """Test the basic example from the MLP proposal.""" + # Example from proposal doc (adapted for our test data) + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'books' + """ + ) + + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] == "books" + assert "title" in result + assert "price" in result + + +@pytest.fixture +def vector_index(redis_url, worker_id): + """Create a books index with vector embeddings for SQL query testing.""" + import numpy as np + + unique_id = str(uuid.uuid4())[:8] + index_name = f"sql_books_{worker_id}_{unique_id}" + + index = SearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": f"book_{worker_id}_{unique_id}", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text", "attrs": {"sortable": True}}, + {"name": "genre", "type": "tag", "attrs": {"sortable": True}}, + {"name": "price", "type": "numeric", "attrs": {"sortable": True}}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "dims": 4, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32", + }, + }, + ], + }, + redis_url=redis_url, + ) + + index.create(overwrite=True) + + # Create test books with embeddings + books = [ + { + "title": "Dune", + "genre": "Science Fiction", + "price": 15, + "embedding": np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes(), + }, + { + "title": "Foundation", + "genre": "Science Fiction", + "price": 18, + "embedding": np.array([0.15, 0.25, 0.35, 0.45], dtype=np.float32).tobytes(), + }, + { + "title": "Neuromancer", + "genre": "Science Fiction", + "price": 12, + "embedding": np.array([0.2, 0.3, 0.4, 0.5], dtype=np.float32).tobytes(), + }, + { + "title": "The Hobbit", + "genre": "Fantasy", + "price": 14, + "embedding": np.array([0.9, 0.8, 0.7, 0.6], dtype=np.float32).tobytes(), + }, + { + "title": "1984", + "genre": "Dystopian", + "price": 25, + "embedding": np.array([0.5, 0.5, 0.5, 0.5], dtype=np.float32).tobytes(), + }, + ] + + index.load(books) + + yield index + + # Cleanup + index.delete(drop=True) + + +class TestSQLQueryVectorSearch: + """Tests for SQL vector similarity search using cosine_distance() and vector_distance().""" + + def test_vector_distance_function(self, vector_index): + """Test vector search with vector_distance() function.""" + import numpy as np + + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, vector_distance(embedding, :vec) AS score + FROM {vector_index.name} + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + results = vector_index.query(sql_query) + + assert len(results) > 0 + assert len(results) <= 3 + for result in results: + assert "title" in result + assert "score" in result + # Score should be a valid non-negative distance value + score = float(result["score"]) + assert score >= 0 + + def test_vector_cosine_similarity(self, vector_index): + """Test vector search with cosine_distance() function - pgvector style.""" + import numpy as np + + # Query vector similar to Science Fiction books + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT + title, + genre, + price, + cosine_distance(embedding, :query_vector) AS vector_distance + FROM {vector_index.name} + WHERE genre = 'Science Fiction' + AND price <= 20 + ORDER BY cosine_distance(embedding, :query_vector) + LIMIT 3 + """, + params={"query_vector": query_vector}, + ) + + results = vector_index.query(sql_query) + + # Should return Science Fiction books under $20 + assert len(results) > 0 + assert len(results) <= 3 + for result in results: + assert result["genre"] == "Science Fiction" + assert float(result["price"]) <= 20 + # Verify vector_distance is returned (like VectorQuery with return_score=True) + assert "vector_distance" in result + # Distance should be a valid non-negative value + distance = float(result["vector_distance"]) + assert distance >= 0 + + def test_vector_redis_query_string(self, vector_index, redis_url): + """Test redis_query_string() returns correct KNN query for vector search.""" + import numpy as np + + # Query vector + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, cosine_distance(embedding, :vec) AS vector_distance + FROM {vector_index.name} + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + # Get the Redis command string + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify it's a valid FT.SEARCH with KNN syntax + assert redis_cmd.startswith("FT.SEARCH") + assert vector_index.name in redis_cmd + assert "KNN 3" in redis_cmd + assert "@embedding" in redis_cmd + assert "$vector" in redis_cmd + assert "vector_distance" in redis_cmd + + def test_vector_search_with_prefilter_redis_query_string( + self, vector_index, redis_url + ): + """Test redis_query_string() returns correct prefiltered KNN query.""" + import numpy as np + + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, genre, cosine_distance(embedding, :vec) AS vector_distance + FROM {vector_index.name} + WHERE genre = 'Science Fiction' + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify prefilter syntax: (filter)=>[KNN ...] + assert redis_cmd.startswith("FT.SEARCH") + assert "Science Fiction" in redis_cmd or "Science\\ Fiction" in redis_cmd + assert "=>[KNN" in redis_cmd + assert "KNN 3" in redis_cmd diff --git a/uv.lock b/uv.lock index 53315560..44e652aa 100644 --- a/uv.lock +++ b/uv.lock @@ -4255,7 +4255,7 @@ wheels = [ [[package]] name = "redisvl" -version = "0.13.2" +version = "0.14.0" source = { editable = "." } dependencies = [ { name = "jsonpath-ng" }, @@ -4299,7 +4299,7 @@ pillow = [ sentence-transformers = [ { name = "sentence-transformers" }, ] -sql = [ +sql-redis = [ { name = "sql-redis" }, ] vertexai = [ @@ -4358,12 +4358,12 @@ requires-dist = [ { name = "pyyaml", specifier = ">=5.4,<7.0" }, { name = "redis", specifier = ">=5.0,<7.2" }, { name = "sentence-transformers", marker = "extra == 'sentence-transformers'", specifier = ">=3.4.0,<4" }, - { name = "sql-redis", marker = "extra == 'sql'", path = "../sql-redis/dist/sql_redis-0.1.0-py3-none-any.whl" }, + { name = "sql-redis", marker = "extra == 'sql-redis'", specifier = ">=0.1.1" }, { name = "tenacity", specifier = ">=8.2.2" }, { name = "urllib3", marker = "extra == 'bedrock'", specifier = "<2.2.0" }, { name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.2.2" }, ] -provides-extras = ["mistralai", "openai", "nltk", "cohere", "voyageai", "sentence-transformers", "langcache", "vertexai", "bedrock", "pillow", "sql"] +provides-extras = ["mistralai", "openai", "nltk", "cohere", "voyageai", "sentence-transformers", "langcache", "vertexai", "bedrock", "pillow", "sql-redis"] [package.metadata.requires-dev] dev = [ @@ -5269,21 +5269,16 @@ wheels = [ [[package]] name = "sql-redis" -version = "0.1.0" -source = { path = "../sql-redis/dist/sql_redis-0.1.0-py3-none-any.whl" } +version = "0.1.1" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "redis", version = "7.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "redis", version = "7.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "sqlglot" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/ed/ef/9ef69125be3b8a9906010f4bfd84d3b12fce86d9ecc9ed18443ff5fa9af6/sql_redis-0.1.1.tar.gz", hash = "sha256:1b763bd33e8963811a8c3d191506d5572f6584bfa5bbfa9c8af09a51f07baf02", size = 103713, upload-time = "2026-02-03T19:29:47.878Z" } wheels = [ - { filename = "sql_redis-0.1.0-py3-none-any.whl", hash = "sha256:9e5be7a8d90c3e52f1cfe5abc8c7be8e9a42eee9b20a3ca53874bc1026c8c2b2" }, -] - -[package.metadata] -requires-dist = [ - { name = "redis", specifier = ">=5.0.0" }, - { name = "sqlglot", specifier = ">=26.0.0" }, + { url = "https://files.pythonhosted.org/packages/04/cf/c9e13d253acb3c08dc9113dc3e75962ebb69584d6286b931f364dfb9225d/sql_redis-0.1.1-py3-none-any.whl", hash = "sha256:8369e8c61990b0f9aa5ad1a9d4b03060f770af5a7b856b84e88e819efcacb1ed", size = 18716, upload-time = "2026-02-03T19:29:46.899Z" }, ] [[package]] From 7f3f5ff946bfccb23c7a838bcbc0633901f070d8 Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Tue, 3 Feb 2026 14:42:30 -0500 Subject: [PATCH 8/9] fix tests --- tests/integration/test_sql_redis_hash.py | 2 +- tests/integration/test_sql_redis_json.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_sql_redis_hash.py b/tests/integration/test_sql_redis_hash.py index b15b2d6a..33560c35 100644 --- a/tests/integration/test_sql_redis_hash.py +++ b/tests/integration/test_sql_redis_hash.py @@ -665,7 +665,7 @@ def test_count_all(self, sql_index): results = sql_index.query(sql_query) assert len(results) == 1 - assert int(results[0]["total"]) == 12 # 12 products in test data + assert int(results[0]["total"]) == 13 # 13 products in test data def test_group_by_with_count(self, sql_index): """Test GROUP BY with COUNT.""" diff --git a/tests/integration/test_sql_redis_json.py b/tests/integration/test_sql_redis_json.py index d96c1ab8..76191af0 100644 --- a/tests/integration/test_sql_redis_json.py +++ b/tests/integration/test_sql_redis_json.py @@ -184,9 +184,18 @@ def test_select_all_fields(self, sql_index): results = sql_index.query(sql_query) assert len(results) > 0 - # Verify results contain expected fields - assert "title" in results[0] - assert "price" in results[0] + # For JSON storage, results may contain '$' key with JSON string or parsed fields + first_result = results[0] + if "$" in first_result: + # JSON storage returns data under '$' key + import json + + data = json.loads(first_result["$"]) + assert "title" in data + assert "price" in data + else: + assert "title" in first_result + assert "price" in first_result def test_select_specific_fields(self, sql_index): """Test SELECT with specific field list.""" @@ -665,7 +674,7 @@ def test_count_all(self, sql_index): results = sql_index.query(sql_query) assert len(results) == 1 - assert int(results[0]["total"]) == 12 # 12 products in test data + assert int(results[0]["total"]) == 13 # 13 products in test data def test_group_by_with_count(self, sql_index): """Test GROUP BY with COUNT.""" From 2f118f7f1ee76fb47ee62e948ebd8a2becf79840 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Thu, 5 Feb 2026 19:37:42 -0500 Subject: [PATCH 9/9] replace str.replace() with token-based parameter substitution The previous implementation used str.replace() which had two bugs: 1. Partial matching: :id would incorrectly replace inside :product_id 2. No quote escaping: values like O'Brien produced invalid SQL The new token-based approach: - Splits SQL on :param patterns, then rebuilds with substituted values - Prevents partial matching by design (tokens are complete identifiers) - Escapes single quotes using SQL standard (' -> '') - 2-7x faster than regex alternative, scales better with more parameters Added unit tests covering partial matching, quote escaping, and edge cases (unicode, backslashes, empty strings, bytes).dit tool working with tests --- redisvl/query/sql.py | 59 ++++- tests/unit/test_sql_parameter_substitution.py | 224 ++++++++++++++++++ 2 files changed, 275 insertions(+), 8 deletions(-) create mode 100644 tests/unit/test_sql_parameter_substitution.py diff --git a/redisvl/query/sql.py b/redisvl/query/sql.py index 5f21c166..06dd2369 100644 --- a/redisvl/query/sql.py +++ b/redisvl/query/sql.py @@ -1,5 +1,6 @@ """SQL Query class for executing SQL-like queries against Redis.""" +import re from typing import Any, Dict, Optional @@ -40,6 +41,55 @@ def __init__(self, sql: str, params: Optional[Dict[str, Any]] = None): self.sql = sql self.params = params or {} + def _substitute_params(self, sql: str, params: Dict[str, Any]) -> str: + """Substitute parameter placeholders in SQL with actual values. + + Uses token-based approach: splits SQL on :param patterns, then rebuilds + with substituted values. This prevents partial matching (e.g., :id + won't match inside :product_id) and is faster than regex at scale. + + Args: + sql: The SQL string with :param placeholders. + params: Dictionary mapping parameter names to values. + + Returns: + SQL string with parameters substituted. + + Note: + - String values are wrapped in single quotes with proper escaping + - Numeric values are converted to strings + - Bytes values (e.g., vectors) are NOT substituted here + """ + if not params: + return sql + + # Split SQL on :param patterns, keeping the delimiters + # Pattern matches : followed by valid identifier (letter/underscore, then alphanumeric/underscore) + tokens = re.split(r"(:[a-zA-Z_][a-zA-Z0-9_]*)", sql) + + result = [] + for token in tokens: + if token.startswith(":"): + key = token[1:] # Remove leading : + if key in params: + value = params[key] + if isinstance(value, (int, float)): + result.append(str(value)) + elif isinstance(value, str): + # Escape single quotes using SQL standard: ' -> '' + escaped = value.replace("'", "''") + result.append(f"'{escaped}'") + else: + # Keep placeholder for bytes (vectors handled by Executor) + result.append(token) + else: + # Keep unmatched placeholders as-is + result.append(token) + else: + result.append(token) + + return "".join(result) + def redis_query_string( self, redis_client: Optional[Any] = None, @@ -103,14 +153,7 @@ def redis_query_string( translator = Translator(registry) # Substitute non-bytes params in SQL before translation - sql = self.sql - for key, value in self.params.items(): - placeholder = f":{key}" - if isinstance(value, (int, float)): - sql = sql.replace(placeholder, str(value)) - elif isinstance(value, str): - sql = sql.replace(placeholder, f"'{value}'") - # bytes (vectors) are handled separately + sql = self._substitute_params(self.sql, self.params) translated = translator.translate(sql) return translated.to_command_string() diff --git a/tests/unit/test_sql_parameter_substitution.py b/tests/unit/test_sql_parameter_substitution.py new file mode 100644 index 00000000..3ec51f28 --- /dev/null +++ b/tests/unit/test_sql_parameter_substitution.py @@ -0,0 +1,224 @@ +"""Unit tests for SQL parameter substitution in SQLQuery. + +These tests verify that parameter substitution correctly handles: +1. Partial matching bug: :id should not replace inside :product_id +2. Quote escaping bug: Single quotes in values should be SQL-escaped +3. Edge cases: Multiple occurrences, similar names, special characters +""" + +import pytest + +from redisvl.query.sql import SQLQuery + + +def buggy_substitute_params(sql: str, params: dict) -> str: + """Simulate the CURRENT buggy implementation for comparison. + + This is the exact code from redisvl/query/sql.py lines 105-113. + """ + for key, value in params.items(): + placeholder = f":{key}" + if isinstance(value, (int, float)): + sql = sql.replace(placeholder, str(value)) + elif isinstance(value, str): + sql = sql.replace(placeholder, f"'{value}'") + return sql + + +class TestBuggyBehaviorDemonstration: + """Tests that DEMONSTRATE the bugs in the current implementation. + + These tests show what goes wrong with the naive str.replace() approach. + They should PASS (demonstrating the bug exists) before the fix, + and some assertions will need to change after the fix. + """ + + def test_partial_match_bug_exists(self): + """Demonstrate that :id incorrectly replaces inside :product_id.""" + sql = "SELECT * FROM idx WHERE id = :id AND product_id = :product_id" + params = {"id": 123, "product_id": 456} + + result = buggy_substitute_params(sql, params) + + # BUG: :id gets replaced inside :product_id first (dict ordering dependent) + # This demonstrates the bug - the result is corrupted + # Depending on dict ordering, we might get "product_123" corruption + assert ":id" not in result or "product_" in result # Some substitution happened + + def test_quote_escaping_bug_exists(self): + """Demonstrate that quotes are NOT escaped in current implementation.""" + sql = "SELECT * FROM idx WHERE name = :name" + params = {"name": "O'Brien"} + + result = buggy_substitute_params(sql, params) + + # BUG: The quote is NOT escaped - this produces invalid SQL + assert "O'Brien" in result # Raw quote, not escaped + assert "O''Brien" not in result # Proper escaping is missing + + +class TestParameterSubstitutionPartialMatching: + """Tests for the partial string matching bug. + + The bug: Using str.replace(':id', '123') would also replace + ':id' inside ':product_id', resulting in 'product_123'. + """ + + def test_similar_param_names_no_partial_match(self): + """Test that :id doesn't replace inside :product_id.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE id = :id AND product_id = :product_id", + params={"id": 123, "product_id": 456}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "id = 123" in substituted + assert "product_id = 456" in substituted + # Should NOT have "product_123" + assert "product_123" not in substituted + + def test_prefix_param_names(self): + """Test params where one is a prefix of another: :user, :user_id, :user_name.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE user = :user AND user_id = :user_id AND user_name = :user_name", + params={"user": "alice", "user_id": 42, "user_name": "Alice Smith"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "user = 'alice'" in substituted + assert "user_id = 42" in substituted + assert "user_name = 'Alice Smith'" in substituted + # Should NOT have corrupted values + assert "'alice'_id" not in substituted + assert "'alice'_name" not in substituted + + def test_suffix_param_names(self): + """Test params where one is a suffix pattern: :vec, :query_vec.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE vec = :vec AND query_vec = :query_vec", + params={"vec": 1.0, "query_vec": 2.0}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "vec = 1.0" in substituted or "vec = 1" in substituted + assert "query_vec = 2.0" in substituted or "query_vec = 2" in substituted + + +class TestParameterSubstitutionQuoteEscaping: + """Tests for the quote escaping bug. + + The bug: String values with single quotes like "O'Brien" would + produce invalid SQL: 'O'Brien' instead of 'O''Brien'. + """ + + def test_single_quote_in_value(self): + """Test that single quotes are properly escaped.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE name = :name", + params={"name": "O'Brien"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + # SQL standard escaping: ' becomes '' + assert "name = 'O''Brien'" in substituted + + def test_multiple_quotes_in_value(self): + """Test multiple single quotes in a value.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE phrase = :phrase", + params={"phrase": "It's a 'test' string"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "phrase = 'It''s a ''test'' string'" in substituted + + def test_apostrophe_names(self): + """Test common names with apostrophes.""" + test_cases = [ + ("McDonald's", "'McDonald''s'"), + ("O'Reilly", "'O''Reilly'"), + ("D'Angelo", "'D''Angelo'"), + ] + + for name, expected in test_cases: + sql_query = SQLQuery( + "SELECT * FROM idx WHERE name = :name", + params={"name": name}, + ) + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + assert f"name = {expected}" in substituted, f"Failed for {name}" + + +class TestParameterSubstitutionEdgeCases: + """Tests for edge cases in parameter substitution.""" + + def test_multiple_occurrences_same_param(self): + """Test that a parameter used multiple times is substituted everywhere.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE category = :cat OR subcategory = :cat", + params={"cat": "electronics"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert substituted.count("'electronics'") == 2 + + def test_empty_string_value(self): + """Test empty string parameter value.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE name = :name", + params={"name": ""}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "name = ''" in substituted + + def test_numeric_types(self): + """Test integer and float parameter values.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE count = :count AND price = :price", + params={"count": 42, "price": 99.99}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "count = 42" in substituted + assert "price = 99.99" in substituted + + def test_bytes_param_not_substituted(self): + """Test that bytes parameters are not substituted (handled separately).""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE embedding = :vec", + params={"vec": b"\x00\x01\x02\x03"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + # Bytes should remain as placeholder + assert ":vec" in substituted + + def test_special_characters_in_value(self): + """Test special characters that might interfere with regex.""" + special_values = [ + "hello@world.com", + "path/to/file", + "price: $100", + "regex.*pattern", + "back\\slash", + ] + + for value in special_values: + sql_query = SQLQuery( + "SELECT * FROM idx WHERE field = :field", + params={"field": value}, + ) + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + # Should contain the value wrapped in quotes (with any necessary escaping) + assert ":field" not in substituted, f"Failed to substitute for value: {value}" +