diff --git a/docs/api/query.rst b/docs/api/query.rst
index 22616007..70a29bf3 100644
--- a/docs/api/query.rst
+++ b/docs/api/query.rst
@@ -231,3 +231,23 @@ MultiVectorQuery
:inherited-members:
:show-inheritance:
:exclude-members: add_filter,get_args,highlight,return_field,summarize
+
+
+SQLQuery
+========
+
+.. currentmodule:: redisvl.query
+
+
+.. autoclass:: SQLQuery
+ :members:
+ :show-inheritance:
+
+.. note::
+ SQLQuery requires the optional ``sql-redis`` package. Install with:
+ ``pip install redisvl[sql-redis]``
+
+.. note::
+ SQLQuery translates SQL SELECT statements into Redis FT.SEARCH or FT.AGGREGATE commands.
+ The SQL syntax supports WHERE clauses, field selection, ordering, and parameterized queries
+ for vector similarity searches.
diff --git a/docs/user_guide/02_hybrid_queries.ipynb b/docs/user_guide/02_hybrid_queries.ipynb
index e7f8d225..b76f0c51 100644
--- a/docs/user_guide/02_hybrid_queries.ipynb
+++ b/docs/user_guide/02_hybrid_queries.ipynb
@@ -16,7 +16,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 94,
"metadata": {},
"outputs": [
{
@@ -43,7 +43,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
@@ -77,7 +77,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
@@ -92,18 +92,9 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 52,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "13:00:56 [RedisVL] INFO Indices:\n",
- "13:00:56 [RedisVL] INFO 1. user_queries\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# use the CLI to see the created index\n",
"!rvl index listall"
@@ -111,7 +102,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
@@ -121,7 +112,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 98,
"metadata": {},
"outputs": [
{
@@ -130,7 +121,7 @@
"7"
]
},
- "execution_count": 6,
+ "execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
@@ -160,13 +151,13 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "
| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
"
],
"text/plain": [
""
@@ -174,6 +165,16 @@
},
"metadata": {},
"output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'@credit_score:{high}=>[KNN 10 @user_embedding $vector AS vector_distance] RETURN 7 user credit_score age job office_location last_updated vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 10'"
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
@@ -190,18 +191,39 @@
")\n",
"\n",
"results = index.query(v)\n",
- "result_print(results)"
+ "result_print(results)\n",
+ "str(v)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'@credit_score:{high}=>[KNN 10 @user_embedding $vector AS vector_distance]'"
+ ]
+ },
+ "execution_count": 100,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "v.query_string()"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -242,13 +264,13 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -268,13 +290,13 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -305,13 +327,13 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -340,13 +362,13 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -367,7 +389,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 61,
"metadata": {},
"outputs": [
{
@@ -393,13 +415,13 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -428,7 +450,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 63,
"metadata": {},
"outputs": [
{
@@ -441,7 +463,7 @@
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -466,7 +488,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 64,
"metadata": {},
"outputs": [
{
@@ -479,7 +501,7 @@
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
"
],
"text/plain": [
""
@@ -505,7 +527,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 65,
"metadata": {},
"outputs": [
{
@@ -518,7 +540,7 @@
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
"
],
"text/plain": [
""
@@ -554,7 +576,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 66,
"metadata": {},
"outputs": [
{
@@ -582,13 +604,13 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -608,7 +630,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 68,
"metadata": {},
"outputs": [
{
@@ -634,7 +656,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 69,
"metadata": {},
"outputs": [
{
@@ -660,7 +682,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 70,
"metadata": {},
"outputs": [
{
@@ -686,13 +708,13 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -719,14 +741,14 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[{'id': 'user_queries_docs:01JY4J5VC91SV4C91BM4D0FCV2',\n",
- " 'score': 0.9090908893868948,\n",
+ "[{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90PY',\n",
+ " 'score': 1.8181817787737895,\n",
" 'vector_distance': '0',\n",
" 'user': 'john',\n",
" 'credit_score': 'high',\n",
@@ -734,7 +756,7 @@
" 'job': 'engineer',\n",
" 'office_location': '-122.4194,37.7749',\n",
" 'last_updated': '1741627789'},\n",
- " {'id': 'user_queries_docs:01JY4J5VC90DRSFJ0WKXXN49JT',\n",
+ " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90PZ',\n",
" 'score': 0.0,\n",
" 'vector_distance': '0',\n",
" 'user': 'derrick',\n",
@@ -743,8 +765,8 @@
" 'job': 'doctor',\n",
" 'office_location': '-122.4194,37.7749',\n",
" 'last_updated': '1741627789'},\n",
- " {'id': 'user_queries_docs:01JY4J5VC9QTPMCD60YP40Q6PW',\n",
- " 'score': 0.9090908893868948,\n",
+ " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q1',\n",
+ " 'score': 1.8181817787737895,\n",
" 'vector_distance': '0.109129190445',\n",
" 'user': 'tyler',\n",
" 'credit_score': 'high',\n",
@@ -752,25 +774,25 @@
" 'job': 'engineer',\n",
" 'office_location': '-122.0839,37.3861',\n",
" 'last_updated': '1742232589'},\n",
- " {'id': 'user_queries_docs:01JY4J5VC9FW7QQNJKDJ4Z7PRG',\n",
+ " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q2',\n",
" 'score': 0.0,\n",
- " 'vector_distance': '0.158808946609',\n",
+ " 'vector_distance': '0.158808887005',\n",
" 'user': 'tim',\n",
" 'credit_score': 'high',\n",
" 'age': '12',\n",
" 'job': 'dermatologist',\n",
" 'office_location': '-122.0839,37.3861',\n",
" 'last_updated': '1739644189'},\n",
- " {'id': 'user_queries_docs:01JY4J5VC940DJ9F47EJ6KN2MH',\n",
+ " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q3',\n",
" 'score': 0.0,\n",
- " 'vector_distance': '0.217882037163',\n",
+ " 'vector_distance': '0.217881977558',\n",
" 'user': 'taimur',\n",
" 'credit_score': 'low',\n",
" 'age': '15',\n",
" 'job': 'CEO',\n",
" 'office_location': '-122.0839,37.3861',\n",
" 'last_updated': '1742232589'},\n",
- " {'id': 'user_queries_docs:01JY4J5VC9D53KQD7ZTRP14KCE',\n",
+ " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q0',\n",
" 'score': 0.0,\n",
" 'vector_distance': '0.266666650772',\n",
" 'user': 'nancy',\n",
@@ -779,7 +801,7 @@
" 'job': 'doctor',\n",
" 'office_location': '-122.4194,37.7749',\n",
" 'last_updated': '1710696589'},\n",
- " {'id': 'user_queries_docs:01JY4J5VC9806MD90GBZNP0MNY',\n",
+ " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q4',\n",
" 'score': 0.0,\n",
" 'vector_distance': '0.653301358223',\n",
" 'user': 'joe',\n",
@@ -790,7 +812,7 @@
" 'last_updated': '1742232589'}]"
]
},
- "execution_count": 24,
+ "execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
@@ -813,7 +835,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 73,
"metadata": {},
"outputs": [
{
@@ -841,13 +863,13 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| score | vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0.4545454446934474 | 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.4545454446934474 | 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.4545454446934474 | 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.4545454446934474 | 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.4545454446934474 | 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.4545454446934474 | 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.4545454446934474 | 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| score | vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0.4545454446934474 | 0 | john | high | 18 | engineer | -122.4194,37.7749 | 1741627789 |
| 0.4545454446934474 | 0 | derrick | low | 14 | doctor | -122.4194,37.7749 | 1741627789 |
| 0.4545454446934474 | 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.4545454446934474 | 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.4545454446934474 | 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.4545454446934474 | 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 | 1710696589 |
| 0.4545454446934474 | 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -867,13 +889,13 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| score | vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0.0 | 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.0 | 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.0 | 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.0 | 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
+ "| score | vector_distance | user | credit_score | age | job | office_location | last_updated |
|---|
| 0.0 | 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 | 1742232589 |
| 0.0 | 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 | 1739644189 |
| 0.0 | 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 | 1742232589 |
| 0.0 | 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 | 1742232589 |
"
],
"text/plain": [
""
@@ -904,7 +926,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 76,
"metadata": {},
"outputs": [
{
@@ -948,13 +970,13 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location |
|---|
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
| 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
"
+ "| vector_distance | user | credit_score | age | job | office_location |
|---|
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
| 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
"
],
"text/plain": [
""
@@ -992,7 +1014,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
@@ -1007,7 +1029,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 79,
"metadata": {},
"outputs": [
{
@@ -1032,7 +1054,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 80,
"metadata": {},
"outputs": [
{
@@ -1057,7 +1079,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 81,
"metadata": {},
"outputs": [
{
@@ -1082,13 +1104,13 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job | office_location |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
| 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
| 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 |
"
+ "| vector_distance | user | credit_score | age | job | office_location |
|---|
| 0 | john | high | 18 | engineer | -122.4194,37.7749 |
| 0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
| 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
| 0.158808887005 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
| 0.217881977558 | taimur | low | 15 | CEO | -122.0839,37.3861 |
| 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
| 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 |
"
],
"text/plain": [
""
@@ -1116,7 +1138,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 83,
"metadata": {},
"outputs": [
{
@@ -1158,7 +1180,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 84,
"metadata": {},
"outputs": [
{
@@ -1192,13 +1214,13 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "| vector_distance | user | credit_score | age | job |
|---|
| 0 | john | high | 18 | engineer |
| 0 | derrick | low | 14 | doctor |
| 0.109129190445 | tyler | high | 100 | engineer |
| 0.158808946609 | tim | high | 12 | dermatologist |
"
+ "| vector_distance | user | credit_score | age | job |
|---|
| 0 | john | high | 18 | engineer |
| 0 | derrick | low | 14 | doctor |
| 0.109129190445 | tyler | high | 100 | engineer |
| 0.158808887005 | tim | high | 12 | dermatologist |
"
],
"text/plain": [
""
@@ -1233,7 +1255,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 86,
"metadata": {},
"outputs": [
{
@@ -1264,7 +1286,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 87,
"metadata": {},
"outputs": [
{
@@ -1304,7 +1326,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 88,
"metadata": {},
"outputs": [
{
@@ -1345,7 +1367,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 89,
"metadata": {},
"outputs": [
{
@@ -1354,7 +1376,7 @@
"'@job:(\"engineer\")=>[KNN 5 @user_embedding $vector AS vector_distance] RETURN 6 user credit_score age job office_location vector_distance SORTBY age DESC DIALECT 3 LIMIT 0 5'"
]
},
- "execution_count": 41,
+ "execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
@@ -1366,7 +1388,7 @@
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 90,
"metadata": {},
"outputs": [
{
@@ -1375,7 +1397,7 @@
"'@credit_score:{high}'"
]
},
- "execution_count": 42,
+ "execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
@@ -1388,7 +1410,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 91,
"metadata": {},
"outputs": [
{
@@ -1397,7 +1419,7 @@
"'((@credit_score:{high} @age:[18 +inf]) @age:[-inf 100])'"
]
},
- "execution_count": 43,
+ "execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
@@ -1422,17 +1444,17 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 92,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "{'id': 'user_queries_docs:01JY4J5VC91SV4C91BM4D0FCV2', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\\x00\\x00\\x00?', 'last_updated': '1741627789'}\n",
- "{'id': 'user_queries_docs:01JY4J5VC9D53KQD7ZTRP14KCE', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\\x00\\x00\\x00?', 'last_updated': '1710696589'}\n",
- "{'id': 'user_queries_docs:01JY4J5VC9QTPMCD60YP40Q6PW', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\\x00\\x00\\x00?', 'last_updated': '1742232589'}\n",
- "{'id': 'user_queries_docs:01JY4J5VC9FW7QQNJKDJ4Z7PRG', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\\x00\\x00\\x00?', 'last_updated': '1739644189'}\n"
+ "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90PY', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\\x00\\x00\\x00?', 'last_updated': '1741627789'}\n",
+ "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q0', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\\x00\\x00\\x00?', 'last_updated': '1710696589'}\n",
+ "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q1', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\\x00\\x00\\x00?', 'last_updated': '1742232589'}\n",
+ "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q2', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\\x00\\x00\\x00?', 'last_updated': '1739644189'}\n"
]
}
],
@@ -1444,7 +1466,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
@@ -1455,7 +1477,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": ".venv",
+ "display_name": "redisvl",
"language": "python",
"name": "python3"
},
@@ -1469,10 +1491,10 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.8"
+ "version": "3.11.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/docs/user_guide/12_sql_to_redis_queries.ipynb b/docs/user_guide/12_sql_to_redis_queries.ipynb
new file mode 100644
index 00000000..7f26d08d
--- /dev/null
+++ b/docs/user_guide/12_sql_to_redis_queries.ipynb
@@ -0,0 +1,1008 @@
+{
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# SQLQuery class\n",
+ "\n",
+ "It may arise that you want to use SQL-like queries to interact with your Redis vector database. While Redis does not natively support SQL, the `redisvl` library provides a `SQLQuery` class that allows you to write SQL-like queries that are automatically translated into Redis queries.\n",
+ "\n",
+ "The `SQLQuery` class is a wrapper around the [`sql-redis`](https://pypi.org/project/sql-redis/) package, which provides a SQL-to-Redis query translator. The `sql-redis` package is not installed by default with `redisvl`, so you will need to install with the optional syntax:\n",
+ "\n",
+ "`pip install redisvl[sql-redis]` or, if running locally, you can `uv sync --all-extras --all-groups`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create an index to search"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from redisvl.utils.vectorize import HFTextVectorizer\n",
+ "\n",
+ "hf = HFTextVectorizer()\n",
+ "\n",
+ "schema = {\n",
+ " \"index\": {\n",
+ " \"name\": \"user_simple\",\n",
+ " \"prefix\": \"user_simple_docs\",\n",
+ " \"storage_type\": \"json\",\n",
+ " },\n",
+ " \"fields\": [\n",
+ " {\"name\": \"user\", \"type\": \"tag\"},\n",
+ " {\"name\": \"region\", \"type\": \"tag\"},\n",
+ " {\"name\": \"job\", \"type\": \"tag\"},\n",
+ " {\"name\": \"job_description\", \"type\": \"text\"},\n",
+ " {\"name\": \"age\", \"type\": \"numeric\"},\n",
+ " {\n",
+ " \"name\": \"job_embedding\",\n",
+ " \"type\": \"vector\",\n",
+ " \"attrs\": {\n",
+ " \"dims\": len(hf.embed(\"get embed length\")),\n",
+ " \"distance_metric\": \"cosine\",\n",
+ " \"algorithm\": \"flat\",\n",
+ " \"datatype\": \"float32\"\n",
+ " }\n",
+ " }\n",
+ " ]\n",
+ "}"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create sample dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = [\n",
+ " {\n",
+ " 'user': 'john',\n",
+ " 'age': 34,\n",
+ " 'job': 'software engineer',\n",
+ " 'region': 'us-west',\n",
+ " 'job_description': 'Designs, develops, and maintains software applications and systems.'\n",
+ " },\n",
+ " {\n",
+ " 'user': 'bill',\n",
+ " 'age': 54,\n",
+ " 'job': 'engineer',\n",
+ " 'region': 'us-central',\n",
+ " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.'\n",
+ " },\n",
+ " {\n",
+ " 'user': 'mary',\n",
+ " 'age': 24,\n",
+ " 'job': 'doctor',\n",
+ " 'region': 'us-central',\n",
+ " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.'\n",
+ " },\n",
+ " {\n",
+ " 'user': 'joe',\n",
+ " 'age': 27,\n",
+ " 'job': 'dentist',\n",
+ " 'region': 'us-east',\n",
+ " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.'\n",
+ " },\n",
+ " {\n",
+ " 'user': 'stacy',\n",
+ " 'age': 61,\n",
+ " 'job': 'project manager',\n",
+ " 'region': 'us-west',\n",
+ " 'job_description': 'Plans, organizes, and oversees projects from inception to completion.'\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "data = [\n",
+ " { \n",
+ " **d,\n",
+ " \"job_embedding\": hf.embed(f\"{d['job_description']=} {d['job']=}\"),\n",
+ " } \n",
+ " for d in data\n",
+ "]"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create a `SearchIndex`\n",
+ "\n",
+ "With the schema and sample dataset ready, create a `SearchIndex`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Bring your own Redis connection instance\n",
+ "\n",
+ "This is ideal in scenarios where you have custom settings on the connection instance or if your application will share a connection pool:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from redisvl.index import SearchIndex\n",
+ "from redis import Redis\n",
+ "\n",
+ "client = Redis.from_url(\"redis://localhost:6379\")\n",
+ "index = SearchIndex.from_dict(schema, redis_client=client, validate_on_load=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Let the index manage the connection instance\n",
+ "\n",
+ "This is ideal for simple cases:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\", validate_on_load=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create the index\n",
+ "\n",
+ "Now that we are connected to Redis, we need to run the create command."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "index.create(overwrite=True, drop=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Load Data to `SearchIndex`\n",
+ "\n",
+ "Load the sample dataset to Redis.\n",
+ "\n",
+ "### Validate data entries on load\n",
+ "RedisVL uses pydantic validation under the hood to ensure loaded data is valid and confirms to your schema. This setting is optional and can be configured in the `SearchIndex` class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['user_simple_docs:01KGJDNVDAZ9A6XY69Q6BCRMX5', 'user_simple_docs:01KGJDNVE024SKZB3804R8B6C3', 'user_simple_docs:01KGJDNVEKW1BV836X4926K7S3', 'user_simple_docs:01KGJDNVF62TBDR9Y9V4WZ59ZG', 'user_simple_docs:01KGJDNVFS0BXM7GF6DA66JHSN']\n"
+ ]
+ }
+ ],
+ "source": [
+ "keys = index.load(data)\n",
+ "\n",
+ "print(keys)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create a `SQLQuery` Object\n",
+ "\n",
+ "First, let's test a simple select statement such as the one below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from redisvl.query import SQLQuery\n",
+ "\n",
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, age\n",
+ " FROM user_simple\n",
+ " WHERE age > 17\n",
+ " \"\"\"\n",
+ "\n",
+ "sql_query = SQLQuery(sql_str) "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Check the created query string"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'FT.SEARCH user_simple \"@age:[(17 +inf]\" RETURN 4 user region job age'"
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Executing the query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'john',\n",
+ " 'region': 'us-west',\n",
+ " 'job': 'software engineer',\n",
+ " 'age': '34'},\n",
+ " {'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'},\n",
+ " {'user': 'mary', 'region': 'us-central', 'job': 'doctor', 'age': '24'},\n",
+ " {'user': 'joe', 'region': 'us-east', 'job': 'dentist', 'age': '27'},\n",
+ " {'user': 'stacy', 'region': 'us-west', 'job': 'project manager', 'age': '61'}]"
+ ]
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Additional query support\n",
+ "\n",
+ "### Conditional operators"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"@age:[(17 +inf] @region:{us\\-west}\" RETURN 4 user region job age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'john',\n",
+ " 'region': 'us-west',\n",
+ " 'job': 'software engineer',\n",
+ " 'age': '34'},\n",
+ " {'user': 'stacy', 'region': 'us-west', 'job': 'project manager', 'age': '61'}]"
+ ]
+ },
+ "execution_count": 92,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, age\n",
+ " FROM user_simple\n",
+ " WHERE age > 17 and region = 'us-west'\n",
+ "\"\"\"\n",
+ "\n",
+ "# could maybe be nice to set a connection string at the class level\n",
+ "# this would deviate from our other query like classes though so thinking on it\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"((@region:{us\\-west})|(@region:{us\\-central}))\" RETURN 4 user region job age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'john',\n",
+ " 'region': 'us-west',\n",
+ " 'job': 'software engineer',\n",
+ " 'age': '34'},\n",
+ " {'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'},\n",
+ " {'user': 'stacy', 'region': 'us-west', 'job': 'project manager', 'age': '61'},\n",
+ " {'user': 'mary', 'region': 'us-central', 'job': 'doctor', 'age': '24'}]"
+ ]
+ },
+ "execution_count": 93,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, age\n",
+ " FROM user_simple\n",
+ " WHERE region = 'us-west' or region = 'us-central'\n",
+ " \"\"\"\n",
+ "\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"@job:{software engineer|engineer|pancake tester}\" RETURN 4 user region job age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'john',\n",
+ " 'region': 'us-west',\n",
+ " 'job': 'software engineer',\n",
+ " 'age': '34'},\n",
+ " {'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'}]"
+ ]
+ },
+ "execution_count": 94,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# job is a tag field therefore this syntax works\n",
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, age\n",
+ " FROM user_simple\n",
+ " WHERE job IN ('software engineer', 'engineer', 'pancake tester')\n",
+ " \"\"\"\n",
+ "\n",
+ "# could maybe be nice to set a connection string at the class level\n",
+ "# this would deviate from our other query like classes though so thinking on it\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Text based searches\n",
+ "\n",
+ "See [the docs](https://redis.io/docs/latest/develop/ai/search-and-query/query/full-text/) for available text queries in Redis.\n",
+ "\n",
+ "For more on exact matching see [here](https://redis.io/docs/latest/develop/ai/search-and-query/query/exact-match/)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"@job_description:sci*\" RETURN 5 user region job job_description age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'bill',\n",
+ " 'region': 'us-central',\n",
+ " 'job': 'engineer',\n",
+ " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.',\n",
+ " 'age': '54'}]"
+ ]
+ },
+ "execution_count": 95,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Prefix\n",
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, job_description, age\n",
+ " FROM user_simple\n",
+ " WHERE job_description = 'sci*'\n",
+ "\"\"\"\n",
+ "\n",
+ "# could maybe be nice to set a connection string at the class level\n",
+ "# this would deviate from our other query like classes though so thinking on it\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"@job_description:*care\" RETURN 5 user region job job_description age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'mary',\n",
+ " 'region': 'us-central',\n",
+ " 'job': 'doctor',\n",
+ " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.',\n",
+ " 'age': '24'},\n",
+ " {'user': 'joe',\n",
+ " 'region': 'us-east',\n",
+ " 'job': 'dentist',\n",
+ " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n",
+ " 'age': '27'}]"
+ ]
+ },
+ "execution_count": 96,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Suffix\n",
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, job_description, age\n",
+ " FROM user_simple\n",
+ " WHERE job_description = '*care'\n",
+ "\"\"\"\n",
+ "\n",
+ "# could maybe be nice to set a connection string at the class level\n",
+ "# this would deviate from our other query like classes though so thinking on it\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"@job_description:%diagnose%\" RETURN 5 user region job job_description age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'mary',\n",
+ " 'region': 'us-central',\n",
+ " 'job': 'doctor',\n",
+ " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.',\n",
+ " 'age': '24'}]"
+ ]
+ },
+ "execution_count": 97,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Fuzzy\n",
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, job_description, age\n",
+ " FROM user_simple\n",
+ " WHERE job_description = '%diagnose%'\n",
+ "\"\"\"\n",
+ "\n",
+ "# could maybe be nice to set a connection string at the class level\n",
+ "# this would deviate from our other query like classes though so thinking on it\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"healthcare including\"\" RETURN 5 user region job job_description age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'joe',\n",
+ " 'region': 'us-east',\n",
+ " 'job': 'dentist',\n",
+ " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n",
+ " 'age': '27'}]"
+ ]
+ },
+ "execution_count": 98,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Phrase no stop words\n",
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, job_description, age\n",
+ " FROM user_simple\n",
+ " WHERE job_description = 'healthcare including'\n",
+ "\"\"\"\n",
+ "\n",
+ "# could maybe be nice to set a connection string at the class level\n",
+ "# this would deviate from our other query like classes though so thinking on it\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"diagnosing treating\"\" RETURN 5 user region job job_description age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'joe',\n",
+ " 'region': 'us-east',\n",
+ " 'job': 'dentist',\n",
+ " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n",
+ " 'age': '27'}]"
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Phrase with stop words currently limitation of core Redis\n",
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, job_description, age\n",
+ " FROM user_simple\n",
+ " WHERE job_description = 'diagnosing and treating'\n",
+ "\"\"\"\n",
+ "\n",
+ "# could maybe be nice to set a connection string at the class level\n",
+ "# this would deviate from our other query like classes though so thinking on it\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"@age:[40 60]\" RETURN 4 user region job age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'}]"
+ ]
+ },
+ "execution_count": 100,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, job, age\n",
+ " FROM user_simple\n",
+ " WHERE age BETWEEN 40 and 60\n",
+ " \"\"\"\n",
+ "\n",
+ "# could maybe be nice to set a connection string at the class level\n",
+ "# this would deviate from our other query like classes though so thinking on it\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Aggregations\n",
+ "\n",
+ "See docs for redis supported reducer functions: [https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/aggregations/#supported-groupby-reducers](docs)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.AGGREGATE user_simple \"*\" LOAD 2 age region GROUPBY 1 @region REDUCE COUNT 0 AS count_age REDUCE COUNT_DISTINCT 1 @age AS count_distinct_age REDUCE MIN 1 @age AS min_age REDUCE MAX 1 @age AS max_age REDUCE AVG 1 @age AS avg_age REDUCE STDDEV 1 @age AS std_age REDUCE FIRST_VALUE 1 @age AS fist_value_age REDUCE TOLIST 1 @age AS to_list_age REDUCE QUANTILE 2 @age 0.99 AS quantile_age\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'region': 'us-west',\n",
+ " 'count_age': '2',\n",
+ " 'count_distinct_age': '2',\n",
+ " 'min_age': '34',\n",
+ " 'max_age': '61',\n",
+ " 'avg_age': '47.5',\n",
+ " 'std_age': '19.091883092',\n",
+ " 'fist_value_age': '34',\n",
+ " 'to_list_age': [b'34', b'61'],\n",
+ " 'quantile_age': '61'},\n",
+ " {'region': 'us-central',\n",
+ " 'count_age': '2',\n",
+ " 'count_distinct_age': '2',\n",
+ " 'min_age': '24',\n",
+ " 'max_age': '54',\n",
+ " 'avg_age': '39',\n",
+ " 'std_age': '21.2132034356',\n",
+ " 'fist_value_age': '54',\n",
+ " 'to_list_age': [b'24', b'54'],\n",
+ " 'quantile_age': '54'},\n",
+ " {'region': 'us-east',\n",
+ " 'count_age': '1',\n",
+ " 'count_distinct_age': '1',\n",
+ " 'min_age': '27',\n",
+ " 'max_age': '27',\n",
+ " 'avg_age': '27',\n",
+ " 'std_age': '0',\n",
+ " 'fist_value_age': '27',\n",
+ " 'to_list_age': [b'27'],\n",
+ " 'quantile_age': '27'}]"
+ ]
+ },
+ "execution_count": 101,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_str = \"\"\"\n",
+ " SELECT\n",
+ " user,\n",
+ " COUNT(age) as count_age,\n",
+ " COUNT_DISTINCT(age) as count_distinct_age,\n",
+ " MIN(age) as min_age,\n",
+ " MAX(age) as max_age,\n",
+ " AVG(age) as avg_age,\n",
+ " STDEV(age) as std_age,\n",
+ " FIRST_VALUE(age) as fist_value_age,\n",
+ " ARRAY_AGG(age) as to_list_age,\n",
+ " QUANTILE(age, 0.99) as quantile_age\n",
+ " FROM user_simple\n",
+ " GROUP BY region\n",
+ " \"\"\"\n",
+ "\n",
+ "sql_query = SQLQuery(sql_str)\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Vector search"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"*=>[KNN 10 @job_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 4 user job job_description vector_distance SORTBY vector_distance ASC\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'vector_distance': '0.823510587215',\n",
+ " 'user': 'bill',\n",
+ " 'job': 'engineer',\n",
+ " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.'},\n",
+ " {'vector_distance': '0.965160429478',\n",
+ " 'user': 'john',\n",
+ " 'job': 'software engineer',\n",
+ " 'job_description': 'Designs, develops, and maintains software applications and systems.'},\n",
+ " {'vector_distance': '1.00401365757',\n",
+ " 'user': 'mary',\n",
+ " 'job': 'doctor',\n",
+ " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.'},\n",
+ " {'vector_distance': '1.0062687397',\n",
+ " 'user': 'stacy',\n",
+ " 'job': 'project manager',\n",
+ " 'job_description': 'Plans, organizes, and oversees projects from inception to completion.'},\n",
+ " {'vector_distance': '1.01110625267',\n",
+ " 'user': 'joe',\n",
+ " 'job': 'dentist',\n",
+ " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.'}]"
+ ]
+ },
+ "execution_count": 102,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_str = \"\"\"\n",
+ " SELECT user, job, job_description, cosine_distance(job_embedding, :vec) AS vector_distance\n",
+ " FROM user_simple\n",
+ " ORDER BY vector_distance ASC\n",
+ " \"\"\"\n",
+ "\n",
+ "vec = hf.embed(\"looking for someone to use base principles to solve problems\", as_buffer=True)\n",
+ "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n",
+ "\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Resulting redis query: FT.SEARCH user_simple \"(@region:{us\\-central})=>[KNN 10 @job_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 3 user region vector_distance SORTBY vector_distance ASC\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'vector_distance': '0.823510587215', 'user': 'bill', 'region': 'us-central'},\n",
+ " {'vector_distance': '1.00401365757', 'user': 'mary', 'region': 'us-central'}]"
+ ]
+ },
+ "execution_count": 103,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql_str = \"\"\"\n",
+ " SELECT user, region, cosine_distance(job_embedding, :vec) AS vector_distance\n",
+ " FROM user_simple\n",
+ " WHERE region = 'us-central'\n",
+ " ORDER BY vector_distance ASC\n",
+ " \"\"\"\n",
+ "\n",
+ "vec = hf.embed(\"looking for someone to use base principles to solve problems\", as_buffer=True)\n",
+ "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n",
+ "\n",
+ "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n",
+ "print(\"Resulting redis query: \", redis_query)\n",
+ "results = index.query(sql_query)\n",
+ "\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Cleanup"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below we will clean up after our work. First, you can flush all data from Redis associated with the index by\n",
+ "using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n",
+ "\n",
+ "But if you want to clean up everything, including the index, just use `.delete()`\n",
+ "which will by default remove the index AND the underlying data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5"
+ ]
+ },
+ "execution_count": 104,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Clear all data from Redis associated with the index\n",
+ "index.clear()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# But the index is still in place\n",
+ "index.exists()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Remove / delete the index in its entirety\n",
+ "index.delete()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "redisvl",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md
index 602983b5..f89fe51e 100644
--- a/docs/user_guide/index.md
+++ b/docs/user_guide/index.md
@@ -23,4 +23,5 @@ User guides provide helpful resources for using RedisVL and its different compon
09_svs_vamana
10_embeddings_cache
11_advanced_queries
+12_sql_to_redis_queries
```
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 9286b066..4469c9f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "redisvl"
-version = "0.13.2"
+version = "0.14.0"
description = "Python client library and CLI for using Redis as a vector database"
authors = [{ name = "Redis Inc.", email = "applied.ai@redis.com" }]
requires-python = ">=3.9.2,<3.14"
@@ -51,6 +51,9 @@ bedrock = [
pillow = [
"pillow>=11.3.0",
]
+sql-redis = [
+ "sql-redis>=0.1.1",
+]
[project.urls]
Homepage = "https://github.com/redis/redis-vl-python"
@@ -64,6 +67,9 @@ rvl = "redisvl.cli.runner:main"
requires = ["hatchling"]
build-backend = "hatchling.build"
+[tool.hatch.metadata]
+allow-direct-references = true
+
[dependency-groups]
dev = [
"black>=25.1.0,<26",
diff --git a/redisvl/index/index.py b/redisvl/index/index.py
index 4bc66f67..40ce8ae0 100644
--- a/redisvl/index/index.py
+++ b/redisvl/index/index.py
@@ -31,6 +31,7 @@
from redisvl.query.hybrid import HybridQuery
from redisvl.query.query import VectorQuery
+from redisvl.query.sql import SQLQuery
from redisvl.redis.utils import (
_keys_share_hash_tag,
async_cluster_create_index,
@@ -917,6 +918,49 @@ def _aggregate(self, aggregation_query: AggregationQuery) -> List[Dict[str, Any]
storage_type=self.schema.index.storage_type,
)
+ def _sql_query(self, sql_query: SQLQuery) -> List[Dict[str, Any]]:
+ """Execute a SQL query and return results.
+
+ Args:
+ sql_query: The SQLQuery object containing the SQL statement.
+
+ Returns:
+ List of dictionaries containing the query results.
+
+ Raises:
+ ImportError: If sql-redis package is not installed.
+ """
+ try:
+ from sql_redis.executor import Executor
+ from sql_redis.schema import SchemaRegistry
+ except ImportError:
+ raise ImportError(
+ "sql-redis is required for SQL query support. "
+ "Install it with: pip install redisvl[sql-redis]"
+ )
+
+ registry = SchemaRegistry(self._redis_client)
+ registry.load_all() # Loads index schemas from Redis
+
+ executor = Executor(self._redis_client, registry)
+
+ # Execute the query with any params
+ result = executor.execute(sql_query.sql, params=sql_query.params)
+
+ # Decode bytes to strings in the results (Redis may return bytes)
+ decoded_rows = []
+ for row in result.rows:
+ decoded_row = {}
+ for key, value in row.items():
+ # Decode key if bytes
+ str_key = key.decode("utf-8") if isinstance(key, bytes) else key
+ # Decode value if bytes
+ str_value = value.decode("utf-8") if isinstance(value, bytes) else value
+ decoded_row[str_key] = str_value
+ decoded_rows.append(decoded_row)
+
+ return decoded_rows
+
def aggregate(self, *args, **kwargs) -> "AggregateResult":
"""Perform an aggregation operation against the index.
@@ -1118,7 +1162,7 @@ def _query(self, query: BaseQuery) -> List[Dict[str, Any]]:
return process_results(results, query=query, schema=self.schema)
def query(
- self, query: Union[BaseQuery, AggregationQuery, HybridQuery]
+ self, query: Union[BaseQuery, AggregationQuery, HybridQuery, SQLQuery]
) -> List[Dict[str, Any]]:
"""Execute a query on the index.
@@ -1146,6 +1190,8 @@ def query(
"""
if isinstance(query, AggregationQuery):
return self._aggregate(query)
+ elif isinstance(query, SQLQuery):
+ return self._sql_query(query)
elif isinstance(query, HybridQuery):
return self._hybrid_search(query)
else:
diff --git a/redisvl/query/__init__.py b/redisvl/query/__init__.py
index aa84633e..3f78c755 100644
--- a/redisvl/query/__init__.py
+++ b/redisvl/query/__init__.py
@@ -15,6 +15,7 @@
VectorQuery,
VectorRangeQuery,
)
+from redisvl.query.sql import SQLQuery
__all__ = [
"BaseQuery",
@@ -29,4 +30,5 @@
"AggregateHybridQuery",
"MultiVectorQuery",
"Vector",
+ "SQLQuery",
]
diff --git a/redisvl/query/sql.py b/redisvl/query/sql.py
new file mode 100644
index 00000000..06dd2369
--- /dev/null
+++ b/redisvl/query/sql.py
@@ -0,0 +1,159 @@
+"""SQL Query class for executing SQL-like queries against Redis."""
+
+import re
+from typing import Any, Dict, Optional
+
+
+class SQLQuery:
+ """A query class that translates SQL-like syntax into Redis queries.
+
+ This class allows users to write SQL SELECT statements that are
+ automatically translated into Redis FT.SEARCH or FT.AGGREGATE commands.
+
+ .. code-block:: python
+
+ from redisvl.query import SQLQuery
+ from redisvl.index import SearchIndex
+
+ index = SearchIndex.from_existing("products", redis_url="redis://localhost:6379")
+
+ sql_query = SQLQuery('''
+ SELECT title, price, category
+ FROM products
+ WHERE category = 'electronics' AND price < 100
+ ''')
+
+ results = index.query(sql_query)
+
+ Note:
+ Requires the optional `sql-redis` package. Install with:
+ ``pip install redisvl[sql]``
+ """
+
+ def __init__(self, sql: str, params: Optional[Dict[str, Any]] = None):
+ """Initialize a SQLQuery.
+
+ Args:
+ sql: The SQL SELECT statement to execute.
+ params: Optional dictionary of parameters for parameterized queries.
+ Useful for passing vector data for similarity searches.
+ """
+ self.sql = sql
+ self.params = params or {}
+
+ def _substitute_params(self, sql: str, params: Dict[str, Any]) -> str:
+ """Substitute parameter placeholders in SQL with actual values.
+
+ Uses token-based approach: splits SQL on :param patterns, then rebuilds
+ with substituted values. This prevents partial matching (e.g., :id
+ won't match inside :product_id) and is faster than regex at scale.
+
+ Args:
+ sql: The SQL string with :param placeholders.
+ params: Dictionary mapping parameter names to values.
+
+ Returns:
+ SQL string with parameters substituted.
+
+ Note:
+ - String values are wrapped in single quotes with proper escaping
+ - Numeric values are converted to strings
+ - Bytes values (e.g., vectors) are NOT substituted here
+ """
+ if not params:
+ return sql
+
+ # Split SQL on :param patterns, keeping the delimiters
+ # Pattern matches : followed by valid identifier (letter/underscore, then alphanumeric/underscore)
+ tokens = re.split(r"(:[a-zA-Z_][a-zA-Z0-9_]*)", sql)
+
+ result = []
+ for token in tokens:
+ if token.startswith(":"):
+ key = token[1:] # Remove leading :
+ if key in params:
+ value = params[key]
+ if isinstance(value, (int, float)):
+ result.append(str(value))
+ elif isinstance(value, str):
+ # Escape single quotes using SQL standard: ' -> ''
+ escaped = value.replace("'", "''")
+ result.append(f"'{escaped}'")
+ else:
+ # Keep placeholder for bytes (vectors handled by Executor)
+ result.append(token)
+ else:
+ # Keep unmatched placeholders as-is
+ result.append(token)
+ else:
+ result.append(token)
+
+ return "".join(result)
+
+ def redis_query_string(
+ self,
+ redis_client: Optional[Any] = None,
+ redis_url: str = "redis://localhost:6379",
+ ) -> str:
+ """Translate the SQL query to a Redis command string.
+
+ This method uses the sql-redis translator to convert the SQL statement
+ into the equivalent Redis FT.SEARCH or FT.AGGREGATE command.
+
+ Args:
+ redis_client: A Redis client connection used to load index schemas.
+ If not provided, a connection will be created using redis_url.
+ redis_url: The Redis URL to connect to if redis_client is not provided.
+ Defaults to "redis://localhost:6379".
+
+ Returns:
+ The Redis command string (e.g., 'FT.SEARCH products "@category:{electronics}"').
+
+ Raises:
+ ImportError: If sql-redis package is not installed.
+
+ Example:
+ .. code-block:: python
+
+ from redisvl.query import SQLQuery
+
+ sql_query = SQLQuery("SELECT * FROM products WHERE category = 'electronics'")
+
+ # Using redis_url
+ redis_cmd = sql_query.redis_query_string(redis_url="redis://localhost:6379")
+
+ # Or using an existing client
+ from redis import Redis
+ client = Redis()
+ redis_cmd = sql_query.redis_query_string(redis_client=client)
+
+ print(redis_cmd)
+ # Output: FT.SEARCH products "@category:{electronics}"
+ """
+ try:
+ from sql_redis.schema import SchemaRegistry
+ from sql_redis.translator import Translator
+ except ImportError:
+ raise ImportError(
+ "sql-redis is required for SQL query support. "
+ "Install it with: pip install redisvl[sql]"
+ )
+
+ # Get or create Redis client
+ if redis_client is None:
+ from redis import Redis
+
+ redis_client = Redis.from_url(redis_url)
+
+ # Load schemas from Redis
+ registry = SchemaRegistry(redis_client)
+ registry.load_all()
+
+ # Translate SQL to Redis command
+ translator = Translator(registry)
+
+ # Substitute non-bytes params in SQL before translation
+ sql = self._substitute_params(self.sql, self.params)
+
+ translated = translator.translate(sql)
+ return translated.to_command_string()
diff --git a/tests/integration/test_redis_cluster_support.py b/tests/integration/test_redis_cluster_support.py
index 80b82420..0d18dea3 100644
--- a/tests/integration/test_redis_cluster_support.py
+++ b/tests/integration/test_redis_cluster_support.py
@@ -89,6 +89,7 @@ def test_search_index_cluster_info(redis_cluster_url):
finally:
index.delete(drop=True)
+
@pytest.mark.requires_cluster
@pytest.mark.asyncio
async def test_async_search_index_cluster_info(redis_cluster_url):
@@ -110,6 +111,7 @@ async def test_async_search_index_cluster_info(redis_cluster_url):
await index.delete(drop=True)
await client.aclose()
+
@pytest.mark.requires_cluster
@pytest.mark.asyncio
async def test_async_search_index_client(redis_cluster_url):
diff --git a/tests/integration/test_search_index.py b/tests/integration/test_search_index.py
index ae64a229..ebfedbe7 100644
--- a/tests/integration/test_search_index.py
+++ b/tests/integration/test_search_index.py
@@ -304,6 +304,7 @@ def test_search_index_delete(index):
assert not index.exists()
assert index.name not in convert_bytes(index.client.execute_command("FT._LIST"))
+
@pytest.mark.parametrize("num_docs", [0, 1, 5, 10, 2042])
def test_search_index_clear(index, num_docs):
index.create(overwrite=True, drop=True)
diff --git a/tests/integration/test_sql_redis_hash.py b/tests/integration/test_sql_redis_hash.py
new file mode 100644
index 00000000..33560c35
--- /dev/null
+++ b/tests/integration/test_sql_redis_hash.py
@@ -0,0 +1,1134 @@
+"""Integration tests for SQLQuery class.
+
+These tests verify that SQLQuery can translate SQL-like syntax
+into proper Redis queries and return expected results.
+"""
+
+import uuid
+
+import pytest
+
+from redisvl.index import SearchIndex
+from redisvl.query import SQLQuery
+
+
+@pytest.fixture
+def sql_index(redis_url, worker_id):
+ """Create a products index for SQL query testing."""
+ unique_id = str(uuid.uuid4())[:8]
+ index_name = f"sql_products_{worker_id}_{unique_id}"
+
+ index = SearchIndex.from_dict(
+ {
+ "index": {
+ "name": index_name,
+ "prefix": f"product_{worker_id}_{unique_id}",
+ "storage_type": "hash",
+ },
+ "fields": [
+ {"name": "title", "type": "text", "attrs": {"sortable": True}},
+ {"name": "name", "type": "text", "attrs": {"sortable": True}},
+ {"name": "price", "type": "numeric", "attrs": {"sortable": True}},
+ {"name": "stock", "type": "numeric", "attrs": {"sortable": True}},
+ {"name": "rating", "type": "numeric", "attrs": {"sortable": True}},
+ {"name": "category", "type": "tag", "attrs": {"sortable": True}},
+ {"name": "tags", "type": "tag"},
+ ],
+ },
+ redis_url=redis_url,
+ )
+
+ index.create(overwrite=True)
+
+ # Load test data
+ products = [
+ {
+ "title": "Gaming laptop Pro",
+ "name": "Gaming Laptop",
+ "price": 899,
+ "stock": 10,
+ "rating": 4.5,
+ "category": "electronics",
+ "tags": "sale,featured",
+ },
+ {
+ "title": "Budget laptop Basic",
+ "name": "Budget Laptop",
+ "price": 499,
+ "stock": 25,
+ "rating": 3.8,
+ "category": "electronics",
+ "tags": "sale",
+ },
+ {
+ "title": "Premium laptop Ultra",
+ "name": "Premium Laptop",
+ "price": 1299,
+ "stock": 5,
+ "rating": 4.9,
+ "category": "electronics",
+ "tags": "featured",
+ },
+ {
+ "title": "Python Programming",
+ "name": "Python Book",
+ "price": 45,
+ "stock": 100,
+ "rating": 4.7,
+ "category": "books",
+ "tags": "bestseller",
+ },
+ {
+ "title": "Redis in Action",
+ "name": "Redis Book",
+ "price": 55,
+ "stock": 50,
+ "rating": 4.6,
+ "category": "books",
+ "tags": "featured",
+ },
+ {
+ "title": "Data Science Guide",
+ "name": "DS Book",
+ "price": 65,
+ "stock": 30,
+ "rating": 4.4,
+ "category": "books",
+ "tags": "sale",
+ },
+ {
+ "title": "Wireless Mouse",
+ "name": "Mouse",
+ "price": 29,
+ "stock": 200,
+ "rating": 4.2,
+ "category": "electronics",
+ "tags": "sale",
+ },
+ {
+ "title": "Mechanical Keyboard",
+ "name": "Keyboard",
+ "price": 149,
+ "stock": 75,
+ "rating": 4.6,
+ "category": "electronics",
+ "tags": "featured",
+ },
+ {
+ "title": "USB Hub",
+ "name": "Hub",
+ "price": 25,
+ "stock": 150,
+ "rating": 3.9,
+ "category": "electronics",
+ "tags": "sale",
+ },
+ {
+ "title": "Monitor Stand",
+ "name": "Stand",
+ "price": 89,
+ "stock": 40,
+ "rating": 4.1,
+ "category": "accessories",
+ "tags": "sale,featured",
+ },
+ {
+ "title": "Desk Lamp",
+ "name": "Lamp",
+ "price": 35,
+ "stock": 80,
+ "rating": 4.0,
+ "category": "accessories",
+ "tags": "sale",
+ },
+ {
+ "title": "Notebook Set",
+ "name": "Notebooks",
+ "price": 15,
+ "stock": 300,
+ "rating": 4.3,
+ "category": "stationery",
+ "tags": "bestseller",
+ },
+ {
+ "title": "Laptop and Keyboard Bundle",
+ "name": "Bundle Pack",
+ "price": 999,
+ "stock": 15,
+ "rating": 4.7,
+ "category": "electronics",
+ "tags": "featured,sale",
+ },
+ ]
+
+ index.load(products)
+
+ yield index
+
+ # Cleanup
+ index.delete(drop=True)
+
+
+class TestSQLQueryBasic:
+ """Tests for basic SQL SELECT queries."""
+
+ def test_import_sql_query(self):
+ """Verify SQLQuery can be imported from redisvl.query."""
+ from redisvl.query import SQLQuery
+
+ assert SQLQuery is not None
+
+ def test_select_all_fields(self, sql_index):
+ """Test SELECT * returns all fields."""
+ sql_query = SQLQuery(f"SELECT * FROM {sql_index.name}")
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ # Verify results contain expected fields
+ assert "title" in results[0]
+ assert "price" in results[0]
+
+ def test_select_specific_fields(self, sql_index):
+ """Test SELECT with specific field list."""
+ sql_query = SQLQuery(f"SELECT title, price FROM {sql_index.name}")
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ # Results should contain requested fields
+ assert "title" in results[0]
+ assert "price" in results[0]
+
+ def test_redis_query_string_with_client(self, sql_index):
+ """Test redis_query_string() with redis_client returns the Redis command string."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE category = 'electronics'
+ """
+ )
+
+ # Get the Redis command string using redis_client
+ redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client)
+
+ # Verify it's a valid FT.SEARCH command
+ assert redis_cmd.startswith("FT.SEARCH")
+ assert sql_index.name in redis_cmd
+ assert "electronics" in redis_cmd
+
+ def test_redis_query_string_with_url(self, sql_index, redis_url):
+ """Test redis_query_string() with redis_url returns the Redis command string."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE category = 'electronics'
+ """
+ )
+
+ # Get the Redis command string using redis_url
+ redis_cmd = sql_query.redis_query_string(redis_url=redis_url)
+
+ # Verify it's a valid FT.SEARCH command
+ assert redis_cmd.startswith("FT.SEARCH")
+ assert sql_index.name in redis_cmd
+ assert "electronics" in redis_cmd
+
+ def test_redis_query_string_aggregate(self, sql_index):
+ """Test redis_query_string() returns FT.AGGREGATE for aggregation queries."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, COUNT(*) as count
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+
+ redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client)
+
+ # Verify it's a valid FT.AGGREGATE command
+ assert redis_cmd.startswith("FT.AGGREGATE")
+ assert sql_index.name in redis_cmd
+ assert "GROUPBY" in redis_cmd
+
+
+class TestSQLQueryWhere:
+ """Tests for SQL WHERE clause filtering."""
+
+ def test_where_tag_equals(self, sql_index):
+ """Test WHERE with tag field equality."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price, category
+ FROM {sql_index.name}
+ WHERE category = 'electronics'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert result["category"] == "electronics"
+
+ def test_where_numeric_comparison(self, sql_index):
+ """Test WHERE with numeric field comparison."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price < 50
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert float(result["price"]) < 50
+
+ def test_where_combined_and(self, sql_index):
+ """Test WHERE with AND combining multiple conditions."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price, category
+ FROM {sql_index.name}
+ WHERE category = 'electronics' AND price < 100
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ for result in results:
+ assert result["category"] == "electronics"
+ assert float(result["price"]) < 100
+
+ def test_where_numeric_range(self, sql_index):
+ """Test WHERE with numeric range (BETWEEN equivalent)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price >= 25 AND price <= 50
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ for result in results:
+ price = float(result["price"])
+ assert 25 <= price <= 50
+
+
+class TestSQLQueryTagOperators:
+ """Tests for SQL tag field operators."""
+
+ def test_tag_not_equals(self, sql_index):
+ """Test tag != operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, category
+ FROM {sql_index.name}
+ WHERE category != 'electronics'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert result["category"] != "electronics"
+
+ def test_tag_in(self, sql_index):
+ """Test tag IN operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, category
+ FROM {sql_index.name}
+ WHERE category IN ('books', 'accessories')
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert result["category"] in ("books", "accessories")
+
+
+class TestSQLQueryNumericOperators:
+ """Tests for SQL numeric field operators."""
+
+ def test_numeric_greater_than(self, sql_index):
+ """Test numeric > operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price > 100
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert float(result["price"]) > 100
+
+ def test_numeric_equals(self, sql_index):
+ """Test numeric = operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price = 45
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ assert float(result["price"]) == 45
+
+ def test_numeric_not_equals(self, sql_index):
+ """Test numeric != operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price != 45
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert float(result["price"]) != 45
+
+ @pytest.mark.xfail(reason="Numeric IN operator not yet supported in sql-redis")
+ def test_numeric_in(self, sql_index):
+ """Test numeric IN operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price IN (45, 55, 65)
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ assert float(result["price"]) in (45, 55, 65)
+
+ def test_numeric_between(self, sql_index):
+ """Test numeric BETWEEN operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price BETWEEN 40 AND 60
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ price = float(result["price"])
+ assert 40 <= price <= 60
+
+
+class TestSQLQueryTextOperators:
+ """Tests for SQL text field operators."""
+
+ def test_text_equals(self, sql_index):
+ """Test text = operator (full-text search)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = 'laptop'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ assert "laptop" in result["title"].lower()
+
+ def test_text_not_equals(self, sql_index):
+ """Test text != operator (negated full-text search)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title != 'laptop'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ # Results should not contain 'laptop' as a primary match
+ assert "laptop" not in result["title"].lower()
+
+ def test_text_prefix(self, sql_index):
+ """Test text prefix search with wildcard (term*)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = 'lap*'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ # Should match titles starting with "lap" (e.g., "laptop")
+ assert "lap" in result["title"].lower()
+
+ def test_text_suffix(self, sql_index):
+ """Test text suffix search with wildcard (*term)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE name = '*book'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ # Should match names ending with "book" (e.g., "Python Book")
+ assert "book" in result["name"].lower()
+
+ def test_text_fuzzy(self, sql_index):
+ """Test text fuzzy search with Levenshtein distance (%term%)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = '%laptap%'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ # Should fuzzy match "laptop" even with typo "laptap"
+ assert "laptop" in result["title"].lower()
+
+ def test_text_phrase(self, sql_index):
+ """Test text phrase search (multi-word exact phrase)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = 'gaming laptop'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ # Should match exact phrase "gaming laptop"
+ title_lower = result["title"].lower()
+ assert "gaming" in title_lower and "laptop" in title_lower
+
+ def test_text_phrase_with_stopword(self, sql_index):
+ """Test text phrase search containing stop words.
+
+ Redis does not index stop words (like 'and', 'the', 'is') by default.
+ The sql-redis library works around this by automatically stripping
+ stop words from phrase searches and emitting a warning.
+ See: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/stopwords/
+ """
+ import warnings
+
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter("always")
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = 'laptop and keyboard'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ # Should find the "Laptop and Keyboard Bundle" product
+ assert len(results) >= 1
+ # Verify at least one result contains both "laptop" and "keyboard"
+ found_match = False
+ for result in results:
+ title_lower = result["title"].lower()
+ if "laptop" in title_lower and "keyboard" in title_lower:
+ found_match = True
+ break
+ assert found_match, "Expected to find a result with 'laptop' and 'keyboard'"
+
+ # Verify a warning was emitted about stopword removal
+ stopword_warnings = [
+ warning
+ for warning in w
+ if "Stopwords" in str(warning.message)
+ and "and" in str(warning.message).lower()
+ ]
+ assert (
+ len(stopword_warnings) >= 1
+ ), "Expected a warning about stopword removal"
+
+ @pytest.mark.xfail(reason="Text IN operator not yet supported in sql-redis")
+ def test_text_in(self, sql_index):
+ """Test text IN operator (multiple term search)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title IN ('Python', 'Redis')
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ title_lower = result["title"].lower()
+ assert "python" in title_lower or "redis" in title_lower
+
+
+class TestSQLQueryOrderBy:
+ """Tests for SQL ORDER BY clause."""
+
+ def test_order_by_asc(self, sql_index):
+ """Test ORDER BY ascending."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ ORDER BY price ASC
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ prices = [float(r["price"]) for r in results]
+ assert prices == sorted(prices)
+
+ def test_order_by_desc(self, sql_index):
+ """Test ORDER BY descending."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ ORDER BY price DESC
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ prices = [float(r["price"]) for r in results]
+ assert prices == sorted(prices, reverse=True)
+
+
+class TestSQLQueryLimit:
+ """Tests for SQL LIMIT and OFFSET clauses."""
+
+ def test_limit(self, sql_index):
+ """Test LIMIT clause."""
+ sql_query = SQLQuery(f"SELECT title FROM {sql_index.name} LIMIT 3")
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 3
+
+ def test_limit_with_offset(self, sql_index):
+ """Test LIMIT with OFFSET for pagination."""
+ # First page
+ sql_query1 = SQLQuery(
+ f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 0"
+ )
+ results1 = sql_index.query(sql_query1)
+
+ # Second page
+ sql_query2 = SQLQuery(
+ f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 3"
+ )
+ results2 = sql_index.query(sql_query2)
+
+ assert len(results1) == 3
+ assert len(results2) == 3
+ # Pages should have different results
+ titles1 = {r["title"] for r in results1}
+ titles2 = {r["title"] for r in results2}
+ assert titles1.isdisjoint(titles2)
+
+
+class TestSQLQueryAggregation:
+ """Tests for SQL aggregation (GROUP BY, COUNT, AVG, etc.)."""
+
+ def test_count_all(self, sql_index):
+ """Test COUNT(*) aggregation."""
+ sql_query = SQLQuery(f"SELECT COUNT(*) as total FROM {sql_index.name}")
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert int(results[0]["total"]) == 13 # 13 products in test data
+
+ def test_group_by_with_count(self, sql_index):
+ """Test GROUP BY with COUNT."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, COUNT(*) as count
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ # Should have groups for electronics, books, accessories, stationery
+ categories = {r["category"] for r in results}
+ assert "electronics" in categories
+ assert "books" in categories
+
+ def test_group_by_with_avg(self, sql_index):
+ """Test GROUP BY with AVG."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, AVG(price) as avg_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ # All results should have category and avg_price
+ for result in results:
+ assert "category" in result
+ assert "avg_price" in result
+ assert float(result["avg_price"]) > 0
+
+ def test_group_by_with_filter(self, sql_index):
+ """Test GROUP BY with WHERE filter."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, AVG(price) as avg_price
+ FROM {sql_index.name}
+ WHERE stock > 50
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "avg_price" in result
+
+ def test_group_by_with_sum(self, sql_index):
+ """Test GROUP BY with SUM reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, SUM(price) as total_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "total_price" in result
+ assert float(result["total_price"]) > 0
+
+ def test_group_by_with_min(self, sql_index):
+ """Test GROUP BY with MIN reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, MIN(price) as min_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "min_price" in result
+ assert float(result["min_price"]) > 0
+
+ def test_group_by_with_max(self, sql_index):
+ """Test GROUP BY with MAX reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, MAX(price) as max_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "max_price" in result
+ assert float(result["max_price"]) > 0
+
+ def test_global_sum(self, sql_index):
+ """Test global SUM aggregation (no GROUP BY)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT SUM(price) as total
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "total" in results[0]
+ assert float(results[0]["total"]) > 0
+
+ def test_global_min(self, sql_index):
+ """Test global MIN aggregation (no GROUP BY)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT MIN(price) as min_price
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "min_price" in results[0]
+ assert float(results[0]["min_price"]) > 0
+
+ def test_global_max(self, sql_index):
+ """Test global MAX aggregation (no GROUP BY)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT MAX(price) as max_price
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "max_price" in results[0]
+ assert float(results[0]["max_price"]) > 0
+
+ def test_multiple_reducers(self, sql_index):
+ """Test multiple reducers in a single query."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, COUNT(*) as count, SUM(price) as total, AVG(price) as avg_price, MIN(price) as min_price, MAX(price) as max_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "count" in result
+ assert "total" in result
+ assert "avg_price" in result
+ assert "min_price" in result
+ assert "max_price" in result
+
+ def test_count_distinct(self, sql_index):
+ """Test COUNT_DISTINCT reducer using Redis-specific syntax."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT COUNT_DISTINCT(category) as unique_categories
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "unique_categories" in results[0]
+ # Should have 4 unique categories: electronics, books, accessories, stationery
+ assert int(results[0]["unique_categories"]) == 4
+
+ def test_stddev(self, sql_index):
+ """Test STDDEV reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT STDDEV(price) as price_stddev
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "price_stddev" in results[0]
+ # Verify it's a valid numeric value
+ stddev_value = float(results[0]["price_stddev"])
+ assert stddev_value >= 0 # Standard deviation is always non-negative
+
+ def test_quantile(self, sql_index):
+ """Test QUANTILE reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT QUANTILE(price, 0.5) as median_price
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "median_price" in results[0]
+ # Verify it's a valid numeric value
+ median_value = float(results[0]["median_price"])
+ assert median_value >= 0
+
+ def test_tolist(self, sql_index):
+ """Test TOLIST reducer via ARRAY_AGG SQL function."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, ARRAY_AGG(title) as titles
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "titles" in result
+ # TOLIST returns a comma-separated string or list of values
+ assert result["titles"] is not None
+
+ def test_first_value(self, sql_index):
+ """Test FIRST_VALUE reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, FIRST_VALUE(title) as first_title
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "first_title" in result
+ # Verify it's a non-empty string
+ assert isinstance(result["first_title"], str)
+ assert len(result["first_title"]) > 0
+
+
+class TestSQLQueryIntegration:
+ """End-to-end integration tests matching proposal examples."""
+
+ def test_proposal_example_basic(self, sql_index):
+ """Test the basic example from the MLP proposal."""
+ # Example from proposal doc (adapted for our test data)
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price, category
+ FROM {sql_index.name}
+ WHERE category = 'books'
+ """
+ )
+
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert result["category"] == "books"
+ assert "title" in result
+ assert "price" in result
+
+
+@pytest.fixture
+def vector_index(redis_url, worker_id):
+ """Create a books index with vector embeddings for SQL query testing."""
+ import numpy as np
+
+ unique_id = str(uuid.uuid4())[:8]
+ index_name = f"sql_books_{worker_id}_{unique_id}"
+
+ index = SearchIndex.from_dict(
+ {
+ "index": {
+ "name": index_name,
+ "prefix": f"book_{worker_id}_{unique_id}",
+ "storage_type": "hash",
+ },
+ "fields": [
+ {"name": "title", "type": "text", "attrs": {"sortable": True}},
+ {"name": "genre", "type": "tag", "attrs": {"sortable": True}},
+ {"name": "price", "type": "numeric", "attrs": {"sortable": True}},
+ {
+ "name": "embedding",
+ "type": "vector",
+ "attrs": {
+ "dims": 4,
+ "distance_metric": "cosine",
+ "algorithm": "flat",
+ "datatype": "float32",
+ },
+ },
+ ],
+ },
+ redis_url=redis_url,
+ )
+
+ index.create(overwrite=True)
+
+ # Create test books with embeddings
+ books = [
+ {
+ "title": "Dune",
+ "genre": "Science Fiction",
+ "price": 15,
+ "embedding": np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes(),
+ },
+ {
+ "title": "Foundation",
+ "genre": "Science Fiction",
+ "price": 18,
+ "embedding": np.array([0.15, 0.25, 0.35, 0.45], dtype=np.float32).tobytes(),
+ },
+ {
+ "title": "Neuromancer",
+ "genre": "Science Fiction",
+ "price": 12,
+ "embedding": np.array([0.2, 0.3, 0.4, 0.5], dtype=np.float32).tobytes(),
+ },
+ {
+ "title": "The Hobbit",
+ "genre": "Fantasy",
+ "price": 14,
+ "embedding": np.array([0.9, 0.8, 0.7, 0.6], dtype=np.float32).tobytes(),
+ },
+ {
+ "title": "1984",
+ "genre": "Dystopian",
+ "price": 25,
+ "embedding": np.array([0.5, 0.5, 0.5, 0.5], dtype=np.float32).tobytes(),
+ },
+ ]
+
+ index.load(books)
+
+ yield index
+
+ # Cleanup
+ index.delete(drop=True)
+
+
+class TestSQLQueryVectorSearch:
+ """Tests for SQL vector similarity search using cosine_distance() and vector_distance()."""
+
+ def test_vector_distance_function(self, vector_index):
+ """Test vector search with vector_distance() function."""
+ import numpy as np
+
+ query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes()
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, vector_distance(embedding, :vec) AS score
+ FROM {vector_index.name}
+ LIMIT 3
+ """,
+ params={"vec": query_vector},
+ )
+
+ results = vector_index.query(sql_query)
+
+ assert len(results) > 0
+ assert len(results) <= 3
+ for result in results:
+ assert "title" in result
+ assert "score" in result
+ # Score should be a valid non-negative distance value
+ score = float(result["score"])
+ assert score >= 0
+
+ def test_vector_cosine_similarity(self, vector_index):
+ """Test vector search with cosine_distance() function - pgvector style."""
+ import numpy as np
+
+ # Query vector similar to Science Fiction books
+ query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes()
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT
+ title,
+ genre,
+ price,
+ cosine_distance(embedding, :query_vector) AS vector_distance
+ FROM {vector_index.name}
+ WHERE genre = 'Science Fiction'
+ AND price <= 20
+ ORDER BY cosine_distance(embedding, :query_vector)
+ LIMIT 3
+ """,
+ params={"query_vector": query_vector},
+ )
+
+ results = vector_index.query(sql_query)
+
+ # Should return Science Fiction books under $20
+ assert len(results) > 0
+ assert len(results) <= 3
+ for result in results:
+ assert result["genre"] == "Science Fiction"
+ assert float(result["price"]) <= 20
+ # Verify vector_distance is returned (like VectorQuery with return_score=True)
+ assert "vector_distance" in result
+ # Distance should be a valid non-negative value
+ distance = float(result["vector_distance"])
+ assert distance >= 0
+
+ def test_vector_redis_query_string(self, vector_index, redis_url):
+ """Test redis_query_string() returns correct KNN query for vector search."""
+ import numpy as np
+
+ # Query vector
+ query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes()
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, cosine_distance(embedding, :vec) AS vector_distance
+ FROM {vector_index.name}
+ LIMIT 3
+ """,
+ params={"vec": query_vector},
+ )
+
+ # Get the Redis command string
+ redis_cmd = sql_query.redis_query_string(redis_url=redis_url)
+
+ # Verify it's a valid FT.SEARCH with KNN syntax
+ assert redis_cmd.startswith("FT.SEARCH")
+ assert vector_index.name in redis_cmd
+ assert "KNN 3" in redis_cmd
+ assert "@embedding" in redis_cmd
+ assert "$vector" in redis_cmd
+ assert "vector_distance" in redis_cmd
+
+ def test_vector_search_with_prefilter_redis_query_string(
+ self, vector_index, redis_url
+ ):
+ """Test redis_query_string() returns correct prefiltered KNN query."""
+ import numpy as np
+
+ query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes()
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, genre, cosine_distance(embedding, :vec) AS vector_distance
+ FROM {vector_index.name}
+ WHERE genre = 'Science Fiction'
+ LIMIT 3
+ """,
+ params={"vec": query_vector},
+ )
+
+ redis_cmd = sql_query.redis_query_string(redis_url=redis_url)
+
+ # Verify prefilter syntax: (filter)=>[KNN ...]
+ assert redis_cmd.startswith("FT.SEARCH")
+ assert "Science Fiction" in redis_cmd or "Science\\ Fiction" in redis_cmd
+ assert "=>[KNN" in redis_cmd
+ assert "KNN 3" in redis_cmd
diff --git a/tests/integration/test_sql_redis_json.py b/tests/integration/test_sql_redis_json.py
new file mode 100644
index 00000000..76191af0
--- /dev/null
+++ b/tests/integration/test_sql_redis_json.py
@@ -0,0 +1,1143 @@
+"""Integration tests for SQLQuery class.
+
+These tests verify that SQLQuery can translate SQL-like syntax
+into proper Redis queries and return expected results.
+"""
+
+import uuid
+
+import pytest
+
+from redisvl.index import SearchIndex
+from redisvl.query import SQLQuery
+
+
+@pytest.fixture
+def sql_index(redis_url, worker_id):
+ """Create a products index for SQL query testing."""
+ unique_id = str(uuid.uuid4())[:8]
+ index_name = f"sql_products_{worker_id}_{unique_id}"
+
+ index = SearchIndex.from_dict(
+ {
+ "index": {
+ "name": index_name,
+ "prefix": f"product_{worker_id}_{unique_id}",
+ "storage_type": "json",
+ },
+ "fields": [
+ {"name": "title", "type": "text", "attrs": {"sortable": True}},
+ {"name": "name", "type": "text", "attrs": {"sortable": True}},
+ {"name": "price", "type": "numeric", "attrs": {"sortable": True}},
+ {"name": "stock", "type": "numeric", "attrs": {"sortable": True}},
+ {"name": "rating", "type": "numeric", "attrs": {"sortable": True}},
+ {"name": "category", "type": "tag", "attrs": {"sortable": True}},
+ {"name": "tags", "type": "tag"},
+ ],
+ },
+ redis_url=redis_url,
+ )
+
+ index.create(overwrite=True)
+
+ # Load test data
+ products = [
+ {
+ "title": "Gaming laptop Pro",
+ "name": "Gaming Laptop",
+ "price": 899,
+ "stock": 10,
+ "rating": 4.5,
+ "category": "electronics",
+ "tags": "sale,featured",
+ },
+ {
+ "title": "Budget laptop Basic",
+ "name": "Budget Laptop",
+ "price": 499,
+ "stock": 25,
+ "rating": 3.8,
+ "category": "electronics",
+ "tags": "sale",
+ },
+ {
+ "title": "Premium laptop Ultra",
+ "name": "Premium Laptop",
+ "price": 1299,
+ "stock": 5,
+ "rating": 4.9,
+ "category": "electronics",
+ "tags": "featured",
+ },
+ {
+ "title": "Python Programming",
+ "name": "Python Book",
+ "price": 45,
+ "stock": 100,
+ "rating": 4.7,
+ "category": "books",
+ "tags": "bestseller",
+ },
+ {
+ "title": "Redis in Action",
+ "name": "Redis Book",
+ "price": 55,
+ "stock": 50,
+ "rating": 4.6,
+ "category": "books",
+ "tags": "featured",
+ },
+ {
+ "title": "Data Science Guide",
+ "name": "DS Book",
+ "price": 65,
+ "stock": 30,
+ "rating": 4.4,
+ "category": "books",
+ "tags": "sale",
+ },
+ {
+ "title": "Wireless Mouse",
+ "name": "Mouse",
+ "price": 29,
+ "stock": 200,
+ "rating": 4.2,
+ "category": "electronics",
+ "tags": "sale",
+ },
+ {
+ "title": "Mechanical Keyboard",
+ "name": "Keyboard",
+ "price": 149,
+ "stock": 75,
+ "rating": 4.6,
+ "category": "electronics",
+ "tags": "featured",
+ },
+ {
+ "title": "USB Hub",
+ "name": "Hub",
+ "price": 25,
+ "stock": 150,
+ "rating": 3.9,
+ "category": "electronics",
+ "tags": "sale",
+ },
+ {
+ "title": "Monitor Stand",
+ "name": "Stand",
+ "price": 89,
+ "stock": 40,
+ "rating": 4.1,
+ "category": "accessories",
+ "tags": "sale,featured",
+ },
+ {
+ "title": "Desk Lamp",
+ "name": "Lamp",
+ "price": 35,
+ "stock": 80,
+ "rating": 4.0,
+ "category": "accessories",
+ "tags": "sale",
+ },
+ {
+ "title": "Notebook Set",
+ "name": "Notebooks",
+ "price": 15,
+ "stock": 300,
+ "rating": 4.3,
+ "category": "stationery",
+ "tags": "bestseller",
+ },
+ {
+ "title": "Laptop and Keyboard Bundle",
+ "name": "Bundle Pack",
+ "price": 999,
+ "stock": 15,
+ "rating": 4.7,
+ "category": "electronics",
+ "tags": "featured,sale",
+ },
+ ]
+
+ index.load(products)
+
+ yield index
+
+ # Cleanup
+ index.delete(drop=True)
+
+
+class TestSQLQueryBasic:
+ """Tests for basic SQL SELECT queries."""
+
+ def test_import_sql_query(self):
+ """Verify SQLQuery can be imported from redisvl.query."""
+ from redisvl.query import SQLQuery
+
+ assert SQLQuery is not None
+
+ def test_select_all_fields(self, sql_index):
+ """Test SELECT * returns all fields."""
+ sql_query = SQLQuery(f"SELECT * FROM {sql_index.name}")
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ # For JSON storage, results may contain '$' key with JSON string or parsed fields
+ first_result = results[0]
+ if "$" in first_result:
+ # JSON storage returns data under '$' key
+ import json
+
+ data = json.loads(first_result["$"])
+ assert "title" in data
+ assert "price" in data
+ else:
+ assert "title" in first_result
+ assert "price" in first_result
+
+ def test_select_specific_fields(self, sql_index):
+ """Test SELECT with specific field list."""
+ sql_query = SQLQuery(f"SELECT title, price FROM {sql_index.name}")
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ # Results should contain requested fields
+ assert "title" in results[0]
+ assert "price" in results[0]
+
+ def test_redis_query_string_with_client(self, sql_index):
+ """Test redis_query_string() with redis_client returns the Redis command string."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE category = 'electronics'
+ """
+ )
+
+ # Get the Redis command string using redis_client
+ redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client)
+
+ # Verify it's a valid FT.SEARCH command
+ assert redis_cmd.startswith("FT.SEARCH")
+ assert sql_index.name in redis_cmd
+ assert "electronics" in redis_cmd
+
+ def test_redis_query_string_with_url(self, sql_index, redis_url):
+ """Test redis_query_string() with redis_url returns the Redis command string."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE category = 'electronics'
+ """
+ )
+
+ # Get the Redis command string using redis_url
+ redis_cmd = sql_query.redis_query_string(redis_url=redis_url)
+
+ # Verify it's a valid FT.SEARCH command
+ assert redis_cmd.startswith("FT.SEARCH")
+ assert sql_index.name in redis_cmd
+ assert "electronics" in redis_cmd
+
+ def test_redis_query_string_aggregate(self, sql_index):
+ """Test redis_query_string() returns FT.AGGREGATE for aggregation queries."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, COUNT(*) as count
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+
+ redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client)
+
+ # Verify it's a valid FT.AGGREGATE command
+ assert redis_cmd.startswith("FT.AGGREGATE")
+ assert sql_index.name in redis_cmd
+ assert "GROUPBY" in redis_cmd
+
+
+class TestSQLQueryWhere:
+ """Tests for SQL WHERE clause filtering."""
+
+ def test_where_tag_equals(self, sql_index):
+ """Test WHERE with tag field equality."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price, category
+ FROM {sql_index.name}
+ WHERE category = 'electronics'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert result["category"] == "electronics"
+
+ def test_where_numeric_comparison(self, sql_index):
+ """Test WHERE with numeric field comparison."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price < 50
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert float(result["price"]) < 50
+
+ def test_where_combined_and(self, sql_index):
+ """Test WHERE with AND combining multiple conditions."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price, category
+ FROM {sql_index.name}
+ WHERE category = 'electronics' AND price < 100
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ for result in results:
+ assert result["category"] == "electronics"
+ assert float(result["price"]) < 100
+
+ def test_where_numeric_range(self, sql_index):
+ """Test WHERE with numeric range (BETWEEN equivalent)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price >= 25 AND price <= 50
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ for result in results:
+ price = float(result["price"])
+ assert 25 <= price <= 50
+
+
+class TestSQLQueryTagOperators:
+ """Tests for SQL tag field operators."""
+
+ def test_tag_not_equals(self, sql_index):
+ """Test tag != operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, category
+ FROM {sql_index.name}
+ WHERE category != 'electronics'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert result["category"] != "electronics"
+
+ def test_tag_in(self, sql_index):
+ """Test tag IN operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, category
+ FROM {sql_index.name}
+ WHERE category IN ('books', 'accessories')
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert result["category"] in ("books", "accessories")
+
+
+class TestSQLQueryNumericOperators:
+ """Tests for SQL numeric field operators."""
+
+ def test_numeric_greater_than(self, sql_index):
+ """Test numeric > operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price > 100
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert float(result["price"]) > 100
+
+ def test_numeric_equals(self, sql_index):
+ """Test numeric = operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price = 45
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ assert float(result["price"]) == 45
+
+ def test_numeric_not_equals(self, sql_index):
+ """Test numeric != operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price != 45
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert float(result["price"]) != 45
+
+ @pytest.mark.xfail(reason="Numeric IN operator not yet supported in sql-redis")
+ def test_numeric_in(self, sql_index):
+ """Test numeric IN operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price IN (45, 55, 65)
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ assert float(result["price"]) in (45, 55, 65)
+
+ def test_numeric_between(self, sql_index):
+ """Test numeric BETWEEN operator."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ WHERE price BETWEEN 40 AND 60
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ price = float(result["price"])
+ assert 40 <= price <= 60
+
+
+class TestSQLQueryTextOperators:
+ """Tests for SQL text field operators."""
+
+ def test_text_equals(self, sql_index):
+ """Test text = operator (full-text search)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = 'laptop'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ assert "laptop" in result["title"].lower()
+
+ def test_text_not_equals(self, sql_index):
+ """Test text != operator (negated full-text search)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title != 'laptop'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ # Results should not contain 'laptop' as a primary match
+ assert "laptop" not in result["title"].lower()
+
+ def test_text_prefix(self, sql_index):
+ """Test text prefix search with wildcard (term*)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = 'lap*'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ # Should match titles starting with "lap" (e.g., "laptop")
+ assert "lap" in result["title"].lower()
+
+ def test_text_suffix(self, sql_index):
+ """Test text suffix search with wildcard (*term)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE name = '*book'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ # Should match names ending with "book" (e.g., "Python Book")
+ assert "book" in result["name"].lower()
+
+ def test_text_fuzzy(self, sql_index):
+ """Test text fuzzy search with Levenshtein distance (%term%)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = '%laptap%'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ # Should fuzzy match "laptop" even with typo "laptap"
+ assert "laptop" in result["title"].lower()
+
+ def test_text_phrase(self, sql_index):
+ """Test text phrase search (multi-word exact phrase)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = 'gaming laptop'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ # Should match exact phrase "gaming laptop"
+ title_lower = result["title"].lower()
+ assert "gaming" in title_lower and "laptop" in title_lower
+
+ def test_text_phrase_with_stopword(self, sql_index):
+ """Test text phrase search containing stop words.
+
+ Redis does not index stop words (like 'and', 'the', 'is') by default.
+ The sql-redis library works around this by automatically stripping
+ stop words from phrase searches and emitting a warning.
+ See: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/stopwords/
+ """
+ import warnings
+
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter("always")
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title = 'laptop and keyboard'
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ # Should find the "Laptop and Keyboard Bundle" product
+ assert len(results) >= 1
+ # Verify at least one result contains both "laptop" and "keyboard"
+ found_match = False
+ for result in results:
+ title_lower = result["title"].lower()
+ if "laptop" in title_lower and "keyboard" in title_lower:
+ found_match = True
+ break
+ assert found_match, "Expected to find a result with 'laptop' and 'keyboard'"
+
+ # Verify a warning was emitted about stopword removal
+ stopword_warnings = [
+ warning
+ for warning in w
+ if "Stopwords" in str(warning.message)
+ and "and" in str(warning.message).lower()
+ ]
+ assert (
+ len(stopword_warnings) >= 1
+ ), "Expected a warning about stopword removal"
+
+ @pytest.mark.xfail(reason="Text IN operator not yet supported in sql-redis")
+ def test_text_in(self, sql_index):
+ """Test text IN operator (multiple term search)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, name
+ FROM {sql_index.name}
+ WHERE title IN ('Python', 'Redis')
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) >= 1
+ for result in results:
+ title_lower = result["title"].lower()
+ assert "python" in title_lower or "redis" in title_lower
+
+
+class TestSQLQueryOrderBy:
+ """Tests for SQL ORDER BY clause."""
+
+ def test_order_by_asc(self, sql_index):
+ """Test ORDER BY ascending."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ ORDER BY price ASC
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ prices = [float(r["price"]) for r in results]
+ assert prices == sorted(prices)
+
+ def test_order_by_desc(self, sql_index):
+ """Test ORDER BY descending."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price
+ FROM {sql_index.name}
+ ORDER BY price DESC
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ prices = [float(r["price"]) for r in results]
+ assert prices == sorted(prices, reverse=True)
+
+
+class TestSQLQueryLimit:
+ """Tests for SQL LIMIT and OFFSET clauses."""
+
+ def test_limit(self, sql_index):
+ """Test LIMIT clause."""
+ sql_query = SQLQuery(f"SELECT title FROM {sql_index.name} LIMIT 3")
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 3
+
+ def test_limit_with_offset(self, sql_index):
+ """Test LIMIT with OFFSET for pagination."""
+ # First page
+ sql_query1 = SQLQuery(
+ f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 0"
+ )
+ results1 = sql_index.query(sql_query1)
+
+ # Second page
+ sql_query2 = SQLQuery(
+ f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 3"
+ )
+ results2 = sql_index.query(sql_query2)
+
+ assert len(results1) == 3
+ assert len(results2) == 3
+ # Pages should have different results
+ titles1 = {r["title"] for r in results1}
+ titles2 = {r["title"] for r in results2}
+ assert titles1.isdisjoint(titles2)
+
+
+class TestSQLQueryAggregation:
+ """Tests for SQL aggregation (GROUP BY, COUNT, AVG, etc.)."""
+
+ def test_count_all(self, sql_index):
+ """Test COUNT(*) aggregation."""
+ sql_query = SQLQuery(f"SELECT COUNT(*) as total FROM {sql_index.name}")
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert int(results[0]["total"]) == 13 # 13 products in test data
+
+ def test_group_by_with_count(self, sql_index):
+ """Test GROUP BY with COUNT."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, COUNT(*) as count
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ # Should have groups for electronics, books, accessories, stationery
+ categories = {r["category"] for r in results}
+ assert "electronics" in categories
+ assert "books" in categories
+
+ def test_group_by_with_avg(self, sql_index):
+ """Test GROUP BY with AVG."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, AVG(price) as avg_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ # All results should have category and avg_price
+ for result in results:
+ assert "category" in result
+ assert "avg_price" in result
+ assert float(result["avg_price"]) > 0
+
+ def test_group_by_with_filter(self, sql_index):
+ """Test GROUP BY with WHERE filter."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, AVG(price) as avg_price
+ FROM {sql_index.name}
+ WHERE stock > 50
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "avg_price" in result
+
+ def test_group_by_with_sum(self, sql_index):
+ """Test GROUP BY with SUM reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, SUM(price) as total_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "total_price" in result
+ assert float(result["total_price"]) > 0
+
+ def test_group_by_with_min(self, sql_index):
+ """Test GROUP BY with MIN reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, MIN(price) as min_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "min_price" in result
+ assert float(result["min_price"]) > 0
+
+ def test_group_by_with_max(self, sql_index):
+ """Test GROUP BY with MAX reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, MAX(price) as max_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "max_price" in result
+ assert float(result["max_price"]) > 0
+
+ def test_global_sum(self, sql_index):
+ """Test global SUM aggregation (no GROUP BY)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT SUM(price) as total
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "total" in results[0]
+ assert float(results[0]["total"]) > 0
+
+ def test_global_min(self, sql_index):
+ """Test global MIN aggregation (no GROUP BY)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT MIN(price) as min_price
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "min_price" in results[0]
+ assert float(results[0]["min_price"]) > 0
+
+ def test_global_max(self, sql_index):
+ """Test global MAX aggregation (no GROUP BY)."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT MAX(price) as max_price
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "max_price" in results[0]
+ assert float(results[0]["max_price"]) > 0
+
+ def test_multiple_reducers(self, sql_index):
+ """Test multiple reducers in a single query."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, COUNT(*) as count, SUM(price) as total, AVG(price) as avg_price, MIN(price) as min_price, MAX(price) as max_price
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "category" in result
+ assert "count" in result
+ assert "total" in result
+ assert "avg_price" in result
+ assert "min_price" in result
+ assert "max_price" in result
+
+ def test_count_distinct(self, sql_index):
+ """Test COUNT_DISTINCT reducer using Redis-specific syntax."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT COUNT_DISTINCT(category) as unique_categories
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "unique_categories" in results[0]
+ # Should have 4 unique categories: electronics, books, accessories, stationery
+ assert int(results[0]["unique_categories"]) == 4
+
+ def test_stddev(self, sql_index):
+ """Test STDDEV reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT STDDEV(price) as price_stddev
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "price_stddev" in results[0]
+ # Verify it's a valid numeric value
+ stddev_value = float(results[0]["price_stddev"])
+ assert stddev_value >= 0 # Standard deviation is always non-negative
+
+ def test_quantile(self, sql_index):
+ """Test QUANTILE reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT QUANTILE(price, 0.5) as median_price
+ FROM {sql_index.name}
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) == 1
+ assert "median_price" in results[0]
+ # Verify it's a valid numeric value
+ median_value = float(results[0]["median_price"])
+ assert median_value >= 0
+
+ def test_tolist(self, sql_index):
+ """Test TOLIST reducer via ARRAY_AGG SQL function."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, ARRAY_AGG(title) as titles
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "titles" in result
+ # TOLIST returns a comma-separated string or list of values
+ assert result["titles"] is not None
+
+ def test_first_value(self, sql_index):
+ """Test FIRST_VALUE reducer."""
+ sql_query = SQLQuery(
+ f"""
+ SELECT category, FIRST_VALUE(title) as first_title
+ FROM {sql_index.name}
+ GROUP BY category
+ """
+ )
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert "first_title" in result
+ # Verify it's a non-empty string
+ assert isinstance(result["first_title"], str)
+ assert len(result["first_title"]) > 0
+
+
+class TestSQLQueryIntegration:
+ """End-to-end integration tests matching proposal examples."""
+
+ def test_proposal_example_basic(self, sql_index):
+ """Test the basic example from the MLP proposal."""
+ # Example from proposal doc (adapted for our test data)
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, price, category
+ FROM {sql_index.name}
+ WHERE category = 'books'
+ """
+ )
+
+ results = sql_index.query(sql_query)
+
+ assert len(results) > 0
+ for result in results:
+ assert result["category"] == "books"
+ assert "title" in result
+ assert "price" in result
+
+
+@pytest.fixture
+def vector_index(redis_url, worker_id):
+ """Create a books index with vector embeddings for SQL query testing."""
+ import numpy as np
+
+ unique_id = str(uuid.uuid4())[:8]
+ index_name = f"sql_books_{worker_id}_{unique_id}"
+
+ index = SearchIndex.from_dict(
+ {
+ "index": {
+ "name": index_name,
+ "prefix": f"book_{worker_id}_{unique_id}",
+ "storage_type": "hash",
+ },
+ "fields": [
+ {"name": "title", "type": "text", "attrs": {"sortable": True}},
+ {"name": "genre", "type": "tag", "attrs": {"sortable": True}},
+ {"name": "price", "type": "numeric", "attrs": {"sortable": True}},
+ {
+ "name": "embedding",
+ "type": "vector",
+ "attrs": {
+ "dims": 4,
+ "distance_metric": "cosine",
+ "algorithm": "flat",
+ "datatype": "float32",
+ },
+ },
+ ],
+ },
+ redis_url=redis_url,
+ )
+
+ index.create(overwrite=True)
+
+ # Create test books with embeddings
+ books = [
+ {
+ "title": "Dune",
+ "genre": "Science Fiction",
+ "price": 15,
+ "embedding": np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes(),
+ },
+ {
+ "title": "Foundation",
+ "genre": "Science Fiction",
+ "price": 18,
+ "embedding": np.array([0.15, 0.25, 0.35, 0.45], dtype=np.float32).tobytes(),
+ },
+ {
+ "title": "Neuromancer",
+ "genre": "Science Fiction",
+ "price": 12,
+ "embedding": np.array([0.2, 0.3, 0.4, 0.5], dtype=np.float32).tobytes(),
+ },
+ {
+ "title": "The Hobbit",
+ "genre": "Fantasy",
+ "price": 14,
+ "embedding": np.array([0.9, 0.8, 0.7, 0.6], dtype=np.float32).tobytes(),
+ },
+ {
+ "title": "1984",
+ "genre": "Dystopian",
+ "price": 25,
+ "embedding": np.array([0.5, 0.5, 0.5, 0.5], dtype=np.float32).tobytes(),
+ },
+ ]
+
+ index.load(books)
+
+ yield index
+
+ # Cleanup
+ index.delete(drop=True)
+
+
+class TestSQLQueryVectorSearch:
+ """Tests for SQL vector similarity search using cosine_distance() and vector_distance()."""
+
+ def test_vector_distance_function(self, vector_index):
+ """Test vector search with vector_distance() function."""
+ import numpy as np
+
+ query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes()
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, vector_distance(embedding, :vec) AS score
+ FROM {vector_index.name}
+ LIMIT 3
+ """,
+ params={"vec": query_vector},
+ )
+
+ results = vector_index.query(sql_query)
+
+ assert len(results) > 0
+ assert len(results) <= 3
+ for result in results:
+ assert "title" in result
+ assert "score" in result
+ # Score should be a valid non-negative distance value
+ score = float(result["score"])
+ assert score >= 0
+
+ def test_vector_cosine_similarity(self, vector_index):
+ """Test vector search with cosine_distance() function - pgvector style."""
+ import numpy as np
+
+ # Query vector similar to Science Fiction books
+ query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes()
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT
+ title,
+ genre,
+ price,
+ cosine_distance(embedding, :query_vector) AS vector_distance
+ FROM {vector_index.name}
+ WHERE genre = 'Science Fiction'
+ AND price <= 20
+ ORDER BY cosine_distance(embedding, :query_vector)
+ LIMIT 3
+ """,
+ params={"query_vector": query_vector},
+ )
+
+ results = vector_index.query(sql_query)
+
+ # Should return Science Fiction books under $20
+ assert len(results) > 0
+ assert len(results) <= 3
+ for result in results:
+ assert result["genre"] == "Science Fiction"
+ assert float(result["price"]) <= 20
+ # Verify vector_distance is returned (like VectorQuery with return_score=True)
+ assert "vector_distance" in result
+ # Distance should be a valid non-negative value
+ distance = float(result["vector_distance"])
+ assert distance >= 0
+
+ def test_vector_redis_query_string(self, vector_index, redis_url):
+ """Test redis_query_string() returns correct KNN query for vector search."""
+ import numpy as np
+
+ # Query vector
+ query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes()
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, cosine_distance(embedding, :vec) AS vector_distance
+ FROM {vector_index.name}
+ LIMIT 3
+ """,
+ params={"vec": query_vector},
+ )
+
+ # Get the Redis command string
+ redis_cmd = sql_query.redis_query_string(redis_url=redis_url)
+
+ # Verify it's a valid FT.SEARCH with KNN syntax
+ assert redis_cmd.startswith("FT.SEARCH")
+ assert vector_index.name in redis_cmd
+ assert "KNN 3" in redis_cmd
+ assert "@embedding" in redis_cmd
+ assert "$vector" in redis_cmd
+ assert "vector_distance" in redis_cmd
+
+ def test_vector_search_with_prefilter_redis_query_string(
+ self, vector_index, redis_url
+ ):
+ """Test redis_query_string() returns correct prefiltered KNN query."""
+ import numpy as np
+
+ query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes()
+
+ sql_query = SQLQuery(
+ f"""
+ SELECT title, genre, cosine_distance(embedding, :vec) AS vector_distance
+ FROM {vector_index.name}
+ WHERE genre = 'Science Fiction'
+ LIMIT 3
+ """,
+ params={"vec": query_vector},
+ )
+
+ redis_cmd = sql_query.redis_query_string(redis_url=redis_url)
+
+ # Verify prefilter syntax: (filter)=>[KNN ...]
+ assert redis_cmd.startswith("FT.SEARCH")
+ assert "Science Fiction" in redis_cmd or "Science\\ Fiction" in redis_cmd
+ assert "=>[KNN" in redis_cmd
+ assert "KNN 3" in redis_cmd
diff --git a/tests/unit/test_sql_parameter_substitution.py b/tests/unit/test_sql_parameter_substitution.py
new file mode 100644
index 00000000..3ec51f28
--- /dev/null
+++ b/tests/unit/test_sql_parameter_substitution.py
@@ -0,0 +1,224 @@
+"""Unit tests for SQL parameter substitution in SQLQuery.
+
+These tests verify that parameter substitution correctly handles:
+1. Partial matching bug: :id should not replace inside :product_id
+2. Quote escaping bug: Single quotes in values should be SQL-escaped
+3. Edge cases: Multiple occurrences, similar names, special characters
+"""
+
+import pytest
+
+from redisvl.query.sql import SQLQuery
+
+
+def buggy_substitute_params(sql: str, params: dict) -> str:
+ """Simulate the CURRENT buggy implementation for comparison.
+
+ This is the exact code from redisvl/query/sql.py lines 105-113.
+ """
+ for key, value in params.items():
+ placeholder = f":{key}"
+ if isinstance(value, (int, float)):
+ sql = sql.replace(placeholder, str(value))
+ elif isinstance(value, str):
+ sql = sql.replace(placeholder, f"'{value}'")
+ return sql
+
+
+class TestBuggyBehaviorDemonstration:
+ """Tests that DEMONSTRATE the bugs in the current implementation.
+
+ These tests show what goes wrong with the naive str.replace() approach.
+ They should PASS (demonstrating the bug exists) before the fix,
+ and some assertions will need to change after the fix.
+ """
+
+ def test_partial_match_bug_exists(self):
+ """Demonstrate that :id incorrectly replaces inside :product_id."""
+ sql = "SELECT * FROM idx WHERE id = :id AND product_id = :product_id"
+ params = {"id": 123, "product_id": 456}
+
+ result = buggy_substitute_params(sql, params)
+
+ # BUG: :id gets replaced inside :product_id first (dict ordering dependent)
+ # This demonstrates the bug - the result is corrupted
+ # Depending on dict ordering, we might get "product_123" corruption
+ assert ":id" not in result or "product_" in result # Some substitution happened
+
+ def test_quote_escaping_bug_exists(self):
+ """Demonstrate that quotes are NOT escaped in current implementation."""
+ sql = "SELECT * FROM idx WHERE name = :name"
+ params = {"name": "O'Brien"}
+
+ result = buggy_substitute_params(sql, params)
+
+ # BUG: The quote is NOT escaped - this produces invalid SQL
+ assert "O'Brien" in result # Raw quote, not escaped
+ assert "O''Brien" not in result # Proper escaping is missing
+
+
+class TestParameterSubstitutionPartialMatching:
+ """Tests for the partial string matching bug.
+
+ The bug: Using str.replace(':id', '123') would also replace
+ ':id' inside ':product_id', resulting in 'product_123'.
+ """
+
+ def test_similar_param_names_no_partial_match(self):
+ """Test that :id doesn't replace inside :product_id."""
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE id = :id AND product_id = :product_id",
+ params={"id": 123, "product_id": 456},
+ )
+
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+
+ assert "id = 123" in substituted
+ assert "product_id = 456" in substituted
+ # Should NOT have "product_123"
+ assert "product_123" not in substituted
+
+ def test_prefix_param_names(self):
+ """Test params where one is a prefix of another: :user, :user_id, :user_name."""
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE user = :user AND user_id = :user_id AND user_name = :user_name",
+ params={"user": "alice", "user_id": 42, "user_name": "Alice Smith"},
+ )
+
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+
+ assert "user = 'alice'" in substituted
+ assert "user_id = 42" in substituted
+ assert "user_name = 'Alice Smith'" in substituted
+ # Should NOT have corrupted values
+ assert "'alice'_id" not in substituted
+ assert "'alice'_name" not in substituted
+
+ def test_suffix_param_names(self):
+ """Test params where one is a suffix pattern: :vec, :query_vec."""
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE vec = :vec AND query_vec = :query_vec",
+ params={"vec": 1.0, "query_vec": 2.0},
+ )
+
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+
+ assert "vec = 1.0" in substituted or "vec = 1" in substituted
+ assert "query_vec = 2.0" in substituted or "query_vec = 2" in substituted
+
+
+class TestParameterSubstitutionQuoteEscaping:
+ """Tests for the quote escaping bug.
+
+ The bug: String values with single quotes like "O'Brien" would
+ produce invalid SQL: 'O'Brien' instead of 'O''Brien'.
+ """
+
+ def test_single_quote_in_value(self):
+ """Test that single quotes are properly escaped."""
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE name = :name",
+ params={"name": "O'Brien"},
+ )
+
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+
+ # SQL standard escaping: ' becomes ''
+ assert "name = 'O''Brien'" in substituted
+
+ def test_multiple_quotes_in_value(self):
+ """Test multiple single quotes in a value."""
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE phrase = :phrase",
+ params={"phrase": "It's a 'test' string"},
+ )
+
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+
+ assert "phrase = 'It''s a ''test'' string'" in substituted
+
+ def test_apostrophe_names(self):
+ """Test common names with apostrophes."""
+ test_cases = [
+ ("McDonald's", "'McDonald''s'"),
+ ("O'Reilly", "'O''Reilly'"),
+ ("D'Angelo", "'D''Angelo'"),
+ ]
+
+ for name, expected in test_cases:
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE name = :name",
+ params={"name": name},
+ )
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+ assert f"name = {expected}" in substituted, f"Failed for {name}"
+
+
+class TestParameterSubstitutionEdgeCases:
+ """Tests for edge cases in parameter substitution."""
+
+ def test_multiple_occurrences_same_param(self):
+ """Test that a parameter used multiple times is substituted everywhere."""
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE category = :cat OR subcategory = :cat",
+ params={"cat": "electronics"},
+ )
+
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+
+ assert substituted.count("'electronics'") == 2
+
+ def test_empty_string_value(self):
+ """Test empty string parameter value."""
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE name = :name",
+ params={"name": ""},
+ )
+
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+
+ assert "name = ''" in substituted
+
+ def test_numeric_types(self):
+ """Test integer and float parameter values."""
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE count = :count AND price = :price",
+ params={"count": 42, "price": 99.99},
+ )
+
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+
+ assert "count = 42" in substituted
+ assert "price = 99.99" in substituted
+
+ def test_bytes_param_not_substituted(self):
+ """Test that bytes parameters are not substituted (handled separately)."""
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE embedding = :vec",
+ params={"vec": b"\x00\x01\x02\x03"},
+ )
+
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+
+ # Bytes should remain as placeholder
+ assert ":vec" in substituted
+
+ def test_special_characters_in_value(self):
+ """Test special characters that might interfere with regex."""
+ special_values = [
+ "hello@world.com",
+ "path/to/file",
+ "price: $100",
+ "regex.*pattern",
+ "back\\slash",
+ ]
+
+ for value in special_values:
+ sql_query = SQLQuery(
+ "SELECT * FROM idx WHERE field = :field",
+ params={"field": value},
+ )
+ substituted = sql_query._substitute_params(sql_query.sql, sql_query.params)
+ # Should contain the value wrapped in quotes (with any necessary escaping)
+ assert ":field" not in substituted, f"Failed to substitute for value: {value}"
+
diff --git a/uv.lock b/uv.lock
index ad61bd62..44e652aa 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
version = 1
-revision = 3
+revision = 2
requires-python = ">=3.9.2, <3.14"
resolution-markers = [
"python_full_version >= '3.13'",
@@ -4255,7 +4255,7 @@ wheels = [
[[package]]
name = "redisvl"
-version = "0.13.2"
+version = "0.14.0"
source = { editable = "." }
dependencies = [
{ name = "jsonpath-ng" },
@@ -4299,6 +4299,9 @@ pillow = [
sentence-transformers = [
{ name = "sentence-transformers" },
]
+sql-redis = [
+ { name = "sql-redis" },
+]
vertexai = [
{ name = "google-cloud-aiplatform" },
{ name = "protobuf" },
@@ -4355,11 +4358,12 @@ requires-dist = [
{ name = "pyyaml", specifier = ">=5.4,<7.0" },
{ name = "redis", specifier = ">=5.0,<7.2" },
{ name = "sentence-transformers", marker = "extra == 'sentence-transformers'", specifier = ">=3.4.0,<4" },
+ { name = "sql-redis", marker = "extra == 'sql-redis'", specifier = ">=0.1.1" },
{ name = "tenacity", specifier = ">=8.2.2" },
{ name = "urllib3", marker = "extra == 'bedrock'", specifier = "<2.2.0" },
{ name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.2.2" },
]
-provides-extras = ["mistralai", "openai", "nltk", "cohere", "voyageai", "sentence-transformers", "langcache", "vertexai", "bedrock", "pillow"]
+provides-extras = ["mistralai", "openai", "nltk", "cohere", "voyageai", "sentence-transformers", "langcache", "vertexai", "bedrock", "pillow", "sql-redis"]
[package.metadata.requires-dev]
dev = [
@@ -5263,6 +5267,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" },
]
+[[package]]
+name = "sql-redis"
+version = "0.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "redis", version = "7.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+ { name = "redis", version = "7.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+ { name = "sqlglot" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ed/ef/9ef69125be3b8a9906010f4bfd84d3b12fce86d9ecc9ed18443ff5fa9af6/sql_redis-0.1.1.tar.gz", hash = "sha256:1b763bd33e8963811a8c3d191506d5572f6584bfa5bbfa9c8af09a51f07baf02", size = 103713, upload-time = "2026-02-03T19:29:47.878Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/04/cf/c9e13d253acb3c08dc9113dc3e75962ebb69584d6286b931f364dfb9225d/sql_redis-0.1.1-py3-none-any.whl", hash = "sha256:8369e8c61990b0f9aa5ad1a9d4b03060f770af5a7b856b84e88e819efcacb1ed", size = 18716, upload-time = "2026-02-03T19:29:46.899Z" },
+]
+
[[package]]
name = "sqlalchemy"
version = "2.0.44"
@@ -5316,6 +5334,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9c/5e/6a29fa884d9fb7ddadf6b69490a9d45fded3b38541713010dad16b77d015/sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05", size = 1928718, upload-time = "2025-10-10T15:29:45.32Z" },
]
+[[package]]
+name = "sqlglot"
+version = "28.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/46/b6/f188b9616bef49943353f3622d726af30fdb08acbd081deef28ba43ceb48/sqlglot-28.6.0.tar.gz", hash = "sha256:8c0a432a6745c6c7965bbe99a17667c5a3ca1d524a54b31997cf5422b1727f6a", size = 5676522, upload-time = "2026-01-13T17:39:24.389Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/8f/a6/21b1e19994296ba4a34bc7abaf4fcb40d7e7787477bdfde58cd843594459/sqlglot-28.6.0-py3-none-any.whl", hash = "sha256:8af76e825dc8456a49f8ce049d69bbfcd116655dda3e53051754789e2edf8eba", size = 575186, upload-time = "2026-01-13T17:39:22.327Z" },
+]
+
[[package]]
name = "stack-data"
version = "0.6.3"