diff --git a/docs/api/query.rst b/docs/api/query.rst index 22616007..70a29bf3 100644 --- a/docs/api/query.rst +++ b/docs/api/query.rst @@ -231,3 +231,23 @@ MultiVectorQuery :inherited-members: :show-inheritance: :exclude-members: add_filter,get_args,highlight,return_field,summarize + + +SQLQuery +======== + +.. currentmodule:: redisvl.query + + +.. autoclass:: SQLQuery + :members: + :show-inheritance: + +.. note:: + SQLQuery requires the optional ``sql-redis`` package. Install with: + ``pip install redisvl[sql-redis]`` + +.. note:: + SQLQuery translates SQL SELECT statements into Redis FT.SEARCH or FT.AGGREGATE commands. + The SQL syntax supports WHERE clauses, field selection, ordering, and parameterized queries + for vector similarity searches. diff --git a/docs/user_guide/02_hybrid_queries.ipynb b/docs/user_guide/02_hybrid_queries.ipynb index e7f8d225..b76f0c51 100644 --- a/docs/user_guide/02_hybrid_queries.ipynb +++ b/docs/user_guide/02_hybrid_queries.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 94, "metadata": {}, "outputs": [ { @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 95, "metadata": {}, "outputs": [], "source": [ @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 96, "metadata": {}, "outputs": [], "source": [ @@ -92,18 +92,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 52, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13:00:56 [RedisVL] INFO Indices:\n", - "13:00:56 [RedisVL] INFO 1. user_queries\n" - ] - } - ], + "outputs": [], "source": [ "# use the CLI to see the created index\n", "!rvl index listall" @@ -111,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 97, "metadata": {}, "outputs": [], "source": [ @@ -121,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 98, "metadata": {}, "outputs": [ { @@ -130,7 +121,7 @@ "7" ] }, - "execution_count": 6, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } @@ -160,13 +151,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
" ], "text/plain": [ "" @@ -174,6 +165,16 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'@credit_score:{high}=>[KNN 10 @user_embedding $vector AS vector_distance] RETURN 7 user credit_score age job office_location last_updated vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 10'" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -190,18 +191,39 @@ ")\n", "\n", "results = index.query(v)\n", - "result_print(results)" + "result_print(results)\n", + "str(v)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'@credit_score:{high}=>[KNN 10 @user_embedding $vector AS vector_distance]'" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "v.query_string()" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0derricklow14doctor-122.4194,37.77491741627789
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0derricklow14doctor-122.4194,37.77491741627789
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -242,13 +264,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -268,13 +290,13 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -305,13 +327,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -340,13 +362,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -367,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 61, "metadata": {}, "outputs": [ { @@ -393,13 +415,13 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -428,7 +450,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 63, "metadata": {}, "outputs": [ { @@ -441,7 +463,7 @@ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -466,7 +488,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -479,7 +501,7 @@ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
" ], "text/plain": [ "" @@ -505,7 +527,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 65, "metadata": {}, "outputs": [ { @@ -518,7 +540,7 @@ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
" ], "text/plain": [ "" @@ -554,7 +576,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 66, "metadata": {}, "outputs": [ { @@ -582,13 +604,13 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -608,7 +630,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 68, "metadata": {}, "outputs": [ { @@ -634,7 +656,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 69, "metadata": {}, "outputs": [ { @@ -660,7 +682,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 70, "metadata": {}, "outputs": [ { @@ -686,13 +708,13 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
vector_distanceusercredit_scoreagejoboffice_locationlast_updated
0johnhigh18engineer-122.4194,37.77491741627789
0derricklow14doctor-122.4194,37.77491741627789
0.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -719,14 +741,14 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'id': 'user_queries_docs:01JY4J5VC91SV4C91BM4D0FCV2',\n", - " 'score': 0.9090908893868948,\n", + "[{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90PY',\n", + " 'score': 1.8181817787737895,\n", " 'vector_distance': '0',\n", " 'user': 'john',\n", " 'credit_score': 'high',\n", @@ -734,7 +756,7 @@ " 'job': 'engineer',\n", " 'office_location': '-122.4194,37.7749',\n", " 'last_updated': '1741627789'},\n", - " {'id': 'user_queries_docs:01JY4J5VC90DRSFJ0WKXXN49JT',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90PZ',\n", " 'score': 0.0,\n", " 'vector_distance': '0',\n", " 'user': 'derrick',\n", @@ -743,8 +765,8 @@ " 'job': 'doctor',\n", " 'office_location': '-122.4194,37.7749',\n", " 'last_updated': '1741627789'},\n", - " {'id': 'user_queries_docs:01JY4J5VC9QTPMCD60YP40Q6PW',\n", - " 'score': 0.9090908893868948,\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q1',\n", + " 'score': 1.8181817787737895,\n", " 'vector_distance': '0.109129190445',\n", " 'user': 'tyler',\n", " 'credit_score': 'high',\n", @@ -752,25 +774,25 @@ " 'job': 'engineer',\n", " 'office_location': '-122.0839,37.3861',\n", " 'last_updated': '1742232589'},\n", - " {'id': 'user_queries_docs:01JY4J5VC9FW7QQNJKDJ4Z7PRG',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q2',\n", " 'score': 0.0,\n", - " 'vector_distance': '0.158808946609',\n", + " 'vector_distance': '0.158808887005',\n", " 'user': 'tim',\n", " 'credit_score': 'high',\n", " 'age': '12',\n", " 'job': 'dermatologist',\n", " 'office_location': '-122.0839,37.3861',\n", " 'last_updated': '1739644189'},\n", - " {'id': 'user_queries_docs:01JY4J5VC940DJ9F47EJ6KN2MH',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q3',\n", " 'score': 0.0,\n", - " 'vector_distance': '0.217882037163',\n", + " 'vector_distance': '0.217881977558',\n", " 'user': 'taimur',\n", " 'credit_score': 'low',\n", " 'age': '15',\n", " 'job': 'CEO',\n", " 'office_location': '-122.0839,37.3861',\n", " 'last_updated': '1742232589'},\n", - " {'id': 'user_queries_docs:01JY4J5VC9D53KQD7ZTRP14KCE',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q0',\n", " 'score': 0.0,\n", " 'vector_distance': '0.266666650772',\n", " 'user': 'nancy',\n", @@ -779,7 +801,7 @@ " 'job': 'doctor',\n", " 'office_location': '-122.4194,37.7749',\n", " 'last_updated': '1710696589'},\n", - " {'id': 'user_queries_docs:01JY4J5VC9806MD90GBZNP0MNY',\n", + " {'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q4',\n", " 'score': 0.0,\n", " 'vector_distance': '0.653301358223',\n", " 'user': 'joe',\n", @@ -790,7 +812,7 @@ " 'last_updated': '1742232589'}]" ] }, - "execution_count": 24, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } @@ -813,7 +835,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 73, "metadata": {}, "outputs": [ { @@ -841,13 +863,13 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.45454544469344740johnhigh18engineer-122.4194,37.77491741627789
0.45454544469344740derricklow14doctor-122.4194,37.77491741627789
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.45454544469344740.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.45454544469344740.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.45454544469344740.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.45454544469344740johnhigh18engineer-122.4194,37.77491741627789
0.45454544469344740derricklow14doctor-122.4194,37.77491741627789
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.45454544469344740.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.45454544469344740.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.77491710696589
0.45454544469344740.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -867,13 +889,13 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.00.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.00.158808946609timhigh12dermatologist-122.0839,37.38611739644189
0.00.217882037163taimurlow15CEO-122.0839,37.38611742232589
0.00.653301358223joemedium35dentist-122.0839,37.38611742232589
" + "
scorevector_distanceusercredit_scoreagejoboffice_locationlast_updated
0.00.109129190445tylerhigh100engineer-122.0839,37.38611742232589
0.00.158808887005timhigh12dermatologist-122.0839,37.38611739644189
0.00.217881977558taimurlow15CEO-122.0839,37.38611742232589
0.00.653301358223joemedium35dentist-122.0839,37.38611742232589
" ], "text/plain": [ "" @@ -904,7 +926,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 76, "metadata": {}, "outputs": [ { @@ -948,13 +970,13 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 77, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
" + "
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808887005timhigh12dermatologist-122.0839,37.3861
0.217881977558taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
" ], "text/plain": [ "" @@ -992,7 +1014,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ @@ -1007,7 +1029,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 79, "metadata": {}, "outputs": [ { @@ -1032,7 +1054,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 80, "metadata": {}, "outputs": [ { @@ -1057,7 +1079,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 81, "metadata": {}, "outputs": [ { @@ -1082,13 +1104,13 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808887005timhigh12dermatologist-122.0839,37.3861
0.217881977558taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -1116,7 +1138,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 83, "metadata": {}, "outputs": [ { @@ -1158,7 +1180,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 84, "metadata": {}, "outputs": [ { @@ -1192,13 +1214,13 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.158808946609timhigh12dermatologist
" + "
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.158808887005timhigh12dermatologist
" ], "text/plain": [ "" @@ -1233,7 +1255,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 86, "metadata": {}, "outputs": [ { @@ -1264,7 +1286,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 87, "metadata": {}, "outputs": [ { @@ -1304,7 +1326,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 88, "metadata": {}, "outputs": [ { @@ -1345,7 +1367,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 89, "metadata": {}, "outputs": [ { @@ -1354,7 +1376,7 @@ "'@job:(\"engineer\")=>[KNN 5 @user_embedding $vector AS vector_distance] RETURN 6 user credit_score age job office_location vector_distance SORTBY age DESC DIALECT 3 LIMIT 0 5'" ] }, - "execution_count": 41, + "execution_count": 89, "metadata": {}, "output_type": "execute_result" } @@ -1366,7 +1388,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 90, "metadata": {}, "outputs": [ { @@ -1375,7 +1397,7 @@ "'@credit_score:{high}'" ] }, - "execution_count": 42, + "execution_count": 90, "metadata": {}, "output_type": "execute_result" } @@ -1388,7 +1410,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 91, "metadata": {}, "outputs": [ { @@ -1397,7 +1419,7 @@ "'((@credit_score:{high} @age:[18 +inf]) @age:[-inf 100])'" ] }, - "execution_count": 43, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -1422,17 +1444,17 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'id': 'user_queries_docs:01JY4J5VC91SV4C91BM4D0FCV2', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\\x00\\x00\\x00?', 'last_updated': '1741627789'}\n", - "{'id': 'user_queries_docs:01JY4J5VC9D53KQD7ZTRP14KCE', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\\x00\\x00\\x00?', 'last_updated': '1710696589'}\n", - "{'id': 'user_queries_docs:01JY4J5VC9QTPMCD60YP40Q6PW', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\\x00\\x00\\x00?', 'last_updated': '1742232589'}\n", - "{'id': 'user_queries_docs:01JY4J5VC9FW7QQNJKDJ4Z7PRG', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\\x00\\x00\\x00?', 'last_updated': '1739644189'}\n" + "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90PY', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\\x00\\x00\\x00?', 'last_updated': '1741627789'}\n", + "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q0', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\\x00\\x00\\x00?', 'last_updated': '1710696589'}\n", + "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q1', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\\x00\\x00\\x00?', 'last_updated': '1742232589'}\n", + "{'id': 'user_queries_docs:01KG0AV1K9NY8H2BBKRSRZ90Q2', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\\x00\\x00\\x00?', 'last_updated': '1739644189'}\n" ] } ], @@ -1444,7 +1466,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ @@ -1455,7 +1477,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "redisvl", "language": "python", "name": "python3" }, @@ -1469,10 +1491,10 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.11.9" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/user_guide/12_sql_to_redis_queries.ipynb b/docs/user_guide/12_sql_to_redis_queries.ipynb new file mode 100644 index 00000000..7f26d08d --- /dev/null +++ b/docs/user_guide/12_sql_to_redis_queries.ipynb @@ -0,0 +1,1008 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SQLQuery class\n", + "\n", + "It may arise that you want to use SQL-like queries to interact with your Redis vector database. While Redis does not natively support SQL, the `redisvl` library provides a `SQLQuery` class that allows you to write SQL-like queries that are automatically translated into Redis queries.\n", + "\n", + "The `SQLQuery` class is a wrapper around the [`sql-redis`](https://pypi.org/project/sql-redis/) package, which provides a SQL-to-Redis query translator. The `sql-redis` package is not installed by default with `redisvl`, so you will need to install with the optional syntax:\n", + "\n", + "`pip install redisvl[sql-redis]` or, if running locally, you can `uv sync --all-extras --all-groups`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an index to search" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.utils.vectorize import HFTextVectorizer\n", + "\n", + "hf = HFTextVectorizer()\n", + "\n", + "schema = {\n", + " \"index\": {\n", + " \"name\": \"user_simple\",\n", + " \"prefix\": \"user_simple_docs\",\n", + " \"storage_type\": \"json\",\n", + " },\n", + " \"fields\": [\n", + " {\"name\": \"user\", \"type\": \"tag\"},\n", + " {\"name\": \"region\", \"type\": \"tag\"},\n", + " {\"name\": \"job\", \"type\": \"tag\"},\n", + " {\"name\": \"job_description\", \"type\": \"text\"},\n", + " {\"name\": \"age\", \"type\": \"numeric\"},\n", + " {\n", + " \"name\": \"job_embedding\",\n", + " \"type\": \"vector\",\n", + " \"attrs\": {\n", + " \"dims\": len(hf.embed(\"get embed length\")),\n", + " \"distance_metric\": \"cosine\",\n", + " \"algorithm\": \"flat\",\n", + " \"datatype\": \"float32\"\n", + " }\n", + " }\n", + " ]\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create sample dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "data = [\n", + " {\n", + " 'user': 'john',\n", + " 'age': 34,\n", + " 'job': 'software engineer',\n", + " 'region': 'us-west',\n", + " 'job_description': 'Designs, develops, and maintains software applications and systems.'\n", + " },\n", + " {\n", + " 'user': 'bill',\n", + " 'age': 54,\n", + " 'job': 'engineer',\n", + " 'region': 'us-central',\n", + " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.'\n", + " },\n", + " {\n", + " 'user': 'mary',\n", + " 'age': 24,\n", + " 'job': 'doctor',\n", + " 'region': 'us-central',\n", + " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.'\n", + " },\n", + " {\n", + " 'user': 'joe',\n", + " 'age': 27,\n", + " 'job': 'dentist',\n", + " 'region': 'us-east',\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.'\n", + " },\n", + " {\n", + " 'user': 'stacy',\n", + " 'age': 61,\n", + " 'job': 'project manager',\n", + " 'region': 'us-west',\n", + " 'job_description': 'Plans, organizes, and oversees projects from inception to completion.'\n", + " }\n", + "]\n", + "\n", + "data = [\n", + " { \n", + " **d,\n", + " \"job_embedding\": hf.embed(f\"{d['job_description']=} {d['job']=}\"),\n", + " } \n", + " for d in data\n", + "]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a `SearchIndex`\n", + "\n", + "With the schema and sample dataset ready, create a `SearchIndex`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bring your own Redis connection instance\n", + "\n", + "This is ideal in scenarios where you have custom settings on the connection instance or if your application will share a connection pool:" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.index import SearchIndex\n", + "from redis import Redis\n", + "\n", + "client = Redis.from_url(\"redis://localhost:6379\")\n", + "index = SearchIndex.from_dict(schema, redis_client=client, validate_on_load=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let the index manage the connection instance\n", + "\n", + "This is ideal for simple cases:" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\", validate_on_load=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the index\n", + "\n", + "Now that we are connected to Redis, we need to run the create command." + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "index.create(overwrite=True, drop=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data to `SearchIndex`\n", + "\n", + "Load the sample dataset to Redis.\n", + "\n", + "### Validate data entries on load\n", + "RedisVL uses pydantic validation under the hood to ensure loaded data is valid and confirms to your schema. This setting is optional and can be configured in the `SearchIndex` class." + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['user_simple_docs:01KGJDNVDAZ9A6XY69Q6BCRMX5', 'user_simple_docs:01KGJDNVE024SKZB3804R8B6C3', 'user_simple_docs:01KGJDNVEKW1BV836X4926K7S3', 'user_simple_docs:01KGJDNVF62TBDR9Y9V4WZ59ZG', 'user_simple_docs:01KGJDNVFS0BXM7GF6DA66JHSN']\n" + ] + } + ], + "source": [ + "keys = index.load(data)\n", + "\n", + "print(keys)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a `SQLQuery` Object\n", + "\n", + "First, let's test a simple select statement such as the one below." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.query import SQLQuery\n", + "\n", + "sql_str = \"\"\"\n", + " SELECT user, region, job, age\n", + " FROM user_simple\n", + " WHERE age > 17\n", + " \"\"\"\n", + "\n", + "sql_query = SQLQuery(sql_str) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Check the created query string" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'FT.SEARCH user_simple \"@age:[(17 +inf]\" RETURN 4 user region job age'" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Executing the query" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'user': 'john',\n", + " 'region': 'us-west',\n", + " 'job': 'software engineer',\n", + " 'age': '34'},\n", + " {'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'mary', 'region': 'us-central', 'job': 'doctor', 'age': '24'},\n", + " {'user': 'joe', 'region': 'us-east', 'job': 'dentist', 'age': '27'},\n", + " {'user': 'stacy', 'region': 'us-west', 'job': 'project manager', 'age': '61'}]" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Additional query support\n", + "\n", + "### Conditional operators" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@age:[(17 +inf] @region:{us\\-west}\" RETURN 4 user region job age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'john',\n", + " 'region': 'us-west',\n", + " 'job': 'software engineer',\n", + " 'age': '34'},\n", + " {'user': 'stacy', 'region': 'us-west', 'job': 'project manager', 'age': '61'}]" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, region, job, age\n", + " FROM user_simple\n", + " WHERE age > 17 and region = 'us-west'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"((@region:{us\\-west})|(@region:{us\\-central}))\" RETURN 4 user region job age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'john',\n", + " 'region': 'us-west',\n", + " 'job': 'software engineer',\n", + " 'age': '34'},\n", + " {'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'},\n", + " {'user': 'stacy', 'region': 'us-west', 'job': 'project manager', 'age': '61'},\n", + " {'user': 'mary', 'region': 'us-central', 'job': 'doctor', 'age': '24'}]" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, region, job, age\n", + " FROM user_simple\n", + " WHERE region = 'us-west' or region = 'us-central'\n", + " \"\"\"\n", + "\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job:{software engineer|engineer|pancake tester}\" RETURN 4 user region job age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'john',\n", + " 'region': 'us-west',\n", + " 'job': 'software engineer',\n", + " 'age': '34'},\n", + " {'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'}]" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# job is a tag field therefore this syntax works\n", + "sql_str = \"\"\"\n", + " SELECT user, region, job, age\n", + " FROM user_simple\n", + " WHERE job IN ('software engineer', 'engineer', 'pancake tester')\n", + " \"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Text based searches\n", + "\n", + "See [the docs](https://redis.io/docs/latest/develop/ai/search-and-query/query/full-text/) for available text queries in Redis.\n", + "\n", + "For more on exact matching see [here](https://redis.io/docs/latest/develop/ai/search-and-query/query/exact-match/)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:sci*\" RETURN 5 user region job job_description age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'bill',\n", + " 'region': 'us-central',\n", + " 'job': 'engineer',\n", + " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.',\n", + " 'age': '54'}]" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Prefix\n", + "sql_str = \"\"\"\n", + " SELECT user, region, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = 'sci*'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:*care\" RETURN 5 user region job job_description age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'mary',\n", + " 'region': 'us-central',\n", + " 'job': 'doctor',\n", + " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.',\n", + " 'age': '24'},\n", + " {'user': 'joe',\n", + " 'region': 'us-east',\n", + " 'job': 'dentist',\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n", + " 'age': '27'}]" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Suffix\n", + "sql_str = \"\"\"\n", + " SELECT user, region, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = '*care'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:%diagnose%\" RETURN 5 user region job job_description age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'mary',\n", + " 'region': 'us-central',\n", + " 'job': 'doctor',\n", + " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.',\n", + " 'age': '24'}]" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fuzzy\n", + "sql_str = \"\"\"\n", + " SELECT user, region, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = '%diagnose%'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"healthcare including\"\" RETURN 5 user region job job_description age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'joe',\n", + " 'region': 'us-east',\n", + " 'job': 'dentist',\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n", + " 'age': '27'}]" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Phrase no stop words\n", + "sql_str = \"\"\"\n", + " SELECT user, region, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = 'healthcare including'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@job_description:\"diagnosing treating\"\" RETURN 5 user region job job_description age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'joe',\n", + " 'region': 'us-east',\n", + " 'job': 'dentist',\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.',\n", + " 'age': '27'}]" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Phrase with stop words currently limitation of core Redis\n", + "sql_str = \"\"\"\n", + " SELECT user, region, job, job_description, age\n", + " FROM user_simple\n", + " WHERE job_description = 'diagnosing and treating'\n", + "\"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"@age:[40 60]\" RETURN 4 user region job age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'user': 'bill', 'region': 'us-central', 'job': 'engineer', 'age': '54'}]" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, region, job, age\n", + " FROM user_simple\n", + " WHERE age BETWEEN 40 and 60\n", + " \"\"\"\n", + "\n", + "# could maybe be nice to set a connection string at the class level\n", + "# this would deviate from our other query like classes though so thinking on it\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Aggregations\n", + "\n", + "See docs for redis supported reducer functions: [https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/aggregations/#supported-groupby-reducers](docs)." + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.AGGREGATE user_simple \"*\" LOAD 2 age region GROUPBY 1 @region REDUCE COUNT 0 AS count_age REDUCE COUNT_DISTINCT 1 @age AS count_distinct_age REDUCE MIN 1 @age AS min_age REDUCE MAX 1 @age AS max_age REDUCE AVG 1 @age AS avg_age REDUCE STDDEV 1 @age AS std_age REDUCE FIRST_VALUE 1 @age AS fist_value_age REDUCE TOLIST 1 @age AS to_list_age REDUCE QUANTILE 2 @age 0.99 AS quantile_age\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'region': 'us-west',\n", + " 'count_age': '2',\n", + " 'count_distinct_age': '2',\n", + " 'min_age': '34',\n", + " 'max_age': '61',\n", + " 'avg_age': '47.5',\n", + " 'std_age': '19.091883092',\n", + " 'fist_value_age': '34',\n", + " 'to_list_age': [b'34', b'61'],\n", + " 'quantile_age': '61'},\n", + " {'region': 'us-central',\n", + " 'count_age': '2',\n", + " 'count_distinct_age': '2',\n", + " 'min_age': '24',\n", + " 'max_age': '54',\n", + " 'avg_age': '39',\n", + " 'std_age': '21.2132034356',\n", + " 'fist_value_age': '54',\n", + " 'to_list_age': [b'24', b'54'],\n", + " 'quantile_age': '54'},\n", + " {'region': 'us-east',\n", + " 'count_age': '1',\n", + " 'count_distinct_age': '1',\n", + " 'min_age': '27',\n", + " 'max_age': '27',\n", + " 'avg_age': '27',\n", + " 'std_age': '0',\n", + " 'fist_value_age': '27',\n", + " 'to_list_age': [b'27'],\n", + " 'quantile_age': '27'}]" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT\n", + " user,\n", + " COUNT(age) as count_age,\n", + " COUNT_DISTINCT(age) as count_distinct_age,\n", + " MIN(age) as min_age,\n", + " MAX(age) as max_age,\n", + " AVG(age) as avg_age,\n", + " STDEV(age) as std_age,\n", + " FIRST_VALUE(age) as fist_value_age,\n", + " ARRAY_AGG(age) as to_list_age,\n", + " QUANTILE(age, 0.99) as quantile_age\n", + " FROM user_simple\n", + " GROUP BY region\n", + " \"\"\"\n", + "\n", + "sql_query = SQLQuery(sql_str)\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Vector search" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"*=>[KNN 10 @job_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 4 user job job_description vector_distance SORTBY vector_distance ASC\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'vector_distance': '0.823510587215',\n", + " 'user': 'bill',\n", + " 'job': 'engineer',\n", + " 'job_description': 'Applies scientific and mathematical principles to solve technical problems.'},\n", + " {'vector_distance': '0.965160429478',\n", + " 'user': 'john',\n", + " 'job': 'software engineer',\n", + " 'job_description': 'Designs, develops, and maintains software applications and systems.'},\n", + " {'vector_distance': '1.00401365757',\n", + " 'user': 'mary',\n", + " 'job': 'doctor',\n", + " 'job_description': 'Diagnoses and treats illnesses, injuries, and other medical conditions in the healthcare field.'},\n", + " {'vector_distance': '1.0062687397',\n", + " 'user': 'stacy',\n", + " 'job': 'project manager',\n", + " 'job_description': 'Plans, organizes, and oversees projects from inception to completion.'},\n", + " {'vector_distance': '1.01110625267',\n", + " 'user': 'joe',\n", + " 'job': 'dentist',\n", + " 'job_description': 'Provides oral healthcare including diagnosing and treating teeth and gum issues.'}]" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, job, job_description, cosine_distance(job_embedding, :vec) AS vector_distance\n", + " FROM user_simple\n", + " ORDER BY vector_distance ASC\n", + " \"\"\"\n", + "\n", + "vec = hf.embed(\"looking for someone to use base principles to solve problems\", as_buffer=True)\n", + "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", + "\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resulting redis query: FT.SEARCH user_simple \"(@region:{us\\-central})=>[KNN 10 @job_embedding $vector AS vector_distance]\" PARAMS 2 vector $vector DIALECT 2 RETURN 3 user region vector_distance SORTBY vector_distance ASC\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'vector_distance': '0.823510587215', 'user': 'bill', 'region': 'us-central'},\n", + " {'vector_distance': '1.00401365757', 'user': 'mary', 'region': 'us-central'}]" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_str = \"\"\"\n", + " SELECT user, region, cosine_distance(job_embedding, :vec) AS vector_distance\n", + " FROM user_simple\n", + " WHERE region = 'us-central'\n", + " ORDER BY vector_distance ASC\n", + " \"\"\"\n", + "\n", + "vec = hf.embed(\"looking for someone to use base principles to solve problems\", as_buffer=True)\n", + "sql_query = SQLQuery(sql_str, params={\"vec\": vec})\n", + "\n", + "redis_query = sql_query.redis_query_string(redis_url=\"redis://localhost:6379\")\n", + "print(\"Resulting redis query: \", redis_query)\n", + "results = index.query(sql_query)\n", + "\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleanup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below we will clean up after our work. First, you can flush all data from Redis associated with the index by\n", + "using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n", + "\n", + "But if you want to clean up everything, including the index, just use `.delete()`\n", + "which will by default remove the index AND the underlying data." + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Clear all data from Redis associated with the index\n", + "index.clear()" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# But the index is still in place\n", + "index.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove / delete the index in its entirety\n", + "index.delete()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "redisvl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 602983b5..f89fe51e 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -23,4 +23,5 @@ User guides provide helpful resources for using RedisVL and its different compon 09_svs_vamana 10_embeddings_cache 11_advanced_queries +12_sql_to_redis_queries ``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 9286b066..4469c9f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "redisvl" -version = "0.13.2" +version = "0.14.0" description = "Python client library and CLI for using Redis as a vector database" authors = [{ name = "Redis Inc.", email = "applied.ai@redis.com" }] requires-python = ">=3.9.2,<3.14" @@ -51,6 +51,9 @@ bedrock = [ pillow = [ "pillow>=11.3.0", ] +sql-redis = [ + "sql-redis>=0.1.1", +] [project.urls] Homepage = "https://github.com/redis/redis-vl-python" @@ -64,6 +67,9 @@ rvl = "redisvl.cli.runner:main" requires = ["hatchling"] build-backend = "hatchling.build" +[tool.hatch.metadata] +allow-direct-references = true + [dependency-groups] dev = [ "black>=25.1.0,<26", diff --git a/redisvl/index/index.py b/redisvl/index/index.py index 4bc66f67..40ce8ae0 100644 --- a/redisvl/index/index.py +++ b/redisvl/index/index.py @@ -31,6 +31,7 @@ from redisvl.query.hybrid import HybridQuery from redisvl.query.query import VectorQuery +from redisvl.query.sql import SQLQuery from redisvl.redis.utils import ( _keys_share_hash_tag, async_cluster_create_index, @@ -917,6 +918,49 @@ def _aggregate(self, aggregation_query: AggregationQuery) -> List[Dict[str, Any] storage_type=self.schema.index.storage_type, ) + def _sql_query(self, sql_query: SQLQuery) -> List[Dict[str, Any]]: + """Execute a SQL query and return results. + + Args: + sql_query: The SQLQuery object containing the SQL statement. + + Returns: + List of dictionaries containing the query results. + + Raises: + ImportError: If sql-redis package is not installed. + """ + try: + from sql_redis.executor import Executor + from sql_redis.schema import SchemaRegistry + except ImportError: + raise ImportError( + "sql-redis is required for SQL query support. " + "Install it with: pip install redisvl[sql-redis]" + ) + + registry = SchemaRegistry(self._redis_client) + registry.load_all() # Loads index schemas from Redis + + executor = Executor(self._redis_client, registry) + + # Execute the query with any params + result = executor.execute(sql_query.sql, params=sql_query.params) + + # Decode bytes to strings in the results (Redis may return bytes) + decoded_rows = [] + for row in result.rows: + decoded_row = {} + for key, value in row.items(): + # Decode key if bytes + str_key = key.decode("utf-8") if isinstance(key, bytes) else key + # Decode value if bytes + str_value = value.decode("utf-8") if isinstance(value, bytes) else value + decoded_row[str_key] = str_value + decoded_rows.append(decoded_row) + + return decoded_rows + def aggregate(self, *args, **kwargs) -> "AggregateResult": """Perform an aggregation operation against the index. @@ -1118,7 +1162,7 @@ def _query(self, query: BaseQuery) -> List[Dict[str, Any]]: return process_results(results, query=query, schema=self.schema) def query( - self, query: Union[BaseQuery, AggregationQuery, HybridQuery] + self, query: Union[BaseQuery, AggregationQuery, HybridQuery, SQLQuery] ) -> List[Dict[str, Any]]: """Execute a query on the index. @@ -1146,6 +1190,8 @@ def query( """ if isinstance(query, AggregationQuery): return self._aggregate(query) + elif isinstance(query, SQLQuery): + return self._sql_query(query) elif isinstance(query, HybridQuery): return self._hybrid_search(query) else: diff --git a/redisvl/query/__init__.py b/redisvl/query/__init__.py index aa84633e..3f78c755 100644 --- a/redisvl/query/__init__.py +++ b/redisvl/query/__init__.py @@ -15,6 +15,7 @@ VectorQuery, VectorRangeQuery, ) +from redisvl.query.sql import SQLQuery __all__ = [ "BaseQuery", @@ -29,4 +30,5 @@ "AggregateHybridQuery", "MultiVectorQuery", "Vector", + "SQLQuery", ] diff --git a/redisvl/query/sql.py b/redisvl/query/sql.py new file mode 100644 index 00000000..06dd2369 --- /dev/null +++ b/redisvl/query/sql.py @@ -0,0 +1,159 @@ +"""SQL Query class for executing SQL-like queries against Redis.""" + +import re +from typing import Any, Dict, Optional + + +class SQLQuery: + """A query class that translates SQL-like syntax into Redis queries. + + This class allows users to write SQL SELECT statements that are + automatically translated into Redis FT.SEARCH or FT.AGGREGATE commands. + + .. code-block:: python + + from redisvl.query import SQLQuery + from redisvl.index import SearchIndex + + index = SearchIndex.from_existing("products", redis_url="redis://localhost:6379") + + sql_query = SQLQuery(''' + SELECT title, price, category + FROM products + WHERE category = 'electronics' AND price < 100 + ''') + + results = index.query(sql_query) + + Note: + Requires the optional `sql-redis` package. Install with: + ``pip install redisvl[sql]`` + """ + + def __init__(self, sql: str, params: Optional[Dict[str, Any]] = None): + """Initialize a SQLQuery. + + Args: + sql: The SQL SELECT statement to execute. + params: Optional dictionary of parameters for parameterized queries. + Useful for passing vector data for similarity searches. + """ + self.sql = sql + self.params = params or {} + + def _substitute_params(self, sql: str, params: Dict[str, Any]) -> str: + """Substitute parameter placeholders in SQL with actual values. + + Uses token-based approach: splits SQL on :param patterns, then rebuilds + with substituted values. This prevents partial matching (e.g., :id + won't match inside :product_id) and is faster than regex at scale. + + Args: + sql: The SQL string with :param placeholders. + params: Dictionary mapping parameter names to values. + + Returns: + SQL string with parameters substituted. + + Note: + - String values are wrapped in single quotes with proper escaping + - Numeric values are converted to strings + - Bytes values (e.g., vectors) are NOT substituted here + """ + if not params: + return sql + + # Split SQL on :param patterns, keeping the delimiters + # Pattern matches : followed by valid identifier (letter/underscore, then alphanumeric/underscore) + tokens = re.split(r"(:[a-zA-Z_][a-zA-Z0-9_]*)", sql) + + result = [] + for token in tokens: + if token.startswith(":"): + key = token[1:] # Remove leading : + if key in params: + value = params[key] + if isinstance(value, (int, float)): + result.append(str(value)) + elif isinstance(value, str): + # Escape single quotes using SQL standard: ' -> '' + escaped = value.replace("'", "''") + result.append(f"'{escaped}'") + else: + # Keep placeholder for bytes (vectors handled by Executor) + result.append(token) + else: + # Keep unmatched placeholders as-is + result.append(token) + else: + result.append(token) + + return "".join(result) + + def redis_query_string( + self, + redis_client: Optional[Any] = None, + redis_url: str = "redis://localhost:6379", + ) -> str: + """Translate the SQL query to a Redis command string. + + This method uses the sql-redis translator to convert the SQL statement + into the equivalent Redis FT.SEARCH or FT.AGGREGATE command. + + Args: + redis_client: A Redis client connection used to load index schemas. + If not provided, a connection will be created using redis_url. + redis_url: The Redis URL to connect to if redis_client is not provided. + Defaults to "redis://localhost:6379". + + Returns: + The Redis command string (e.g., 'FT.SEARCH products "@category:{electronics}"'). + + Raises: + ImportError: If sql-redis package is not installed. + + Example: + .. code-block:: python + + from redisvl.query import SQLQuery + + sql_query = SQLQuery("SELECT * FROM products WHERE category = 'electronics'") + + # Using redis_url + redis_cmd = sql_query.redis_query_string(redis_url="redis://localhost:6379") + + # Or using an existing client + from redis import Redis + client = Redis() + redis_cmd = sql_query.redis_query_string(redis_client=client) + + print(redis_cmd) + # Output: FT.SEARCH products "@category:{electronics}" + """ + try: + from sql_redis.schema import SchemaRegistry + from sql_redis.translator import Translator + except ImportError: + raise ImportError( + "sql-redis is required for SQL query support. " + "Install it with: pip install redisvl[sql]" + ) + + # Get or create Redis client + if redis_client is None: + from redis import Redis + + redis_client = Redis.from_url(redis_url) + + # Load schemas from Redis + registry = SchemaRegistry(redis_client) + registry.load_all() + + # Translate SQL to Redis command + translator = Translator(registry) + + # Substitute non-bytes params in SQL before translation + sql = self._substitute_params(self.sql, self.params) + + translated = translator.translate(sql) + return translated.to_command_string() diff --git a/tests/integration/test_redis_cluster_support.py b/tests/integration/test_redis_cluster_support.py index 80b82420..0d18dea3 100644 --- a/tests/integration/test_redis_cluster_support.py +++ b/tests/integration/test_redis_cluster_support.py @@ -89,6 +89,7 @@ def test_search_index_cluster_info(redis_cluster_url): finally: index.delete(drop=True) + @pytest.mark.requires_cluster @pytest.mark.asyncio async def test_async_search_index_cluster_info(redis_cluster_url): @@ -110,6 +111,7 @@ async def test_async_search_index_cluster_info(redis_cluster_url): await index.delete(drop=True) await client.aclose() + @pytest.mark.requires_cluster @pytest.mark.asyncio async def test_async_search_index_client(redis_cluster_url): diff --git a/tests/integration/test_search_index.py b/tests/integration/test_search_index.py index ae64a229..ebfedbe7 100644 --- a/tests/integration/test_search_index.py +++ b/tests/integration/test_search_index.py @@ -304,6 +304,7 @@ def test_search_index_delete(index): assert not index.exists() assert index.name not in convert_bytes(index.client.execute_command("FT._LIST")) + @pytest.mark.parametrize("num_docs", [0, 1, 5, 10, 2042]) def test_search_index_clear(index, num_docs): index.create(overwrite=True, drop=True) diff --git a/tests/integration/test_sql_redis_hash.py b/tests/integration/test_sql_redis_hash.py new file mode 100644 index 00000000..33560c35 --- /dev/null +++ b/tests/integration/test_sql_redis_hash.py @@ -0,0 +1,1134 @@ +"""Integration tests for SQLQuery class. + +These tests verify that SQLQuery can translate SQL-like syntax +into proper Redis queries and return expected results. +""" + +import uuid + +import pytest + +from redisvl.index import SearchIndex +from redisvl.query import SQLQuery + + +@pytest.fixture +def sql_index(redis_url, worker_id): + """Create a products index for SQL query testing.""" + unique_id = str(uuid.uuid4())[:8] + index_name = f"sql_products_{worker_id}_{unique_id}" + + index = SearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": f"product_{worker_id}_{unique_id}", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text", "attrs": {"sortable": True}}, + {"name": "name", "type": "text", "attrs": {"sortable": True}}, + {"name": "price", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "stock", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "rating", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "category", "type": "tag", "attrs": {"sortable": True}}, + {"name": "tags", "type": "tag"}, + ], + }, + redis_url=redis_url, + ) + + index.create(overwrite=True) + + # Load test data + products = [ + { + "title": "Gaming laptop Pro", + "name": "Gaming Laptop", + "price": 899, + "stock": 10, + "rating": 4.5, + "category": "electronics", + "tags": "sale,featured", + }, + { + "title": "Budget laptop Basic", + "name": "Budget Laptop", + "price": 499, + "stock": 25, + "rating": 3.8, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Premium laptop Ultra", + "name": "Premium Laptop", + "price": 1299, + "stock": 5, + "rating": 4.9, + "category": "electronics", + "tags": "featured", + }, + { + "title": "Python Programming", + "name": "Python Book", + "price": 45, + "stock": 100, + "rating": 4.7, + "category": "books", + "tags": "bestseller", + }, + { + "title": "Redis in Action", + "name": "Redis Book", + "price": 55, + "stock": 50, + "rating": 4.6, + "category": "books", + "tags": "featured", + }, + { + "title": "Data Science Guide", + "name": "DS Book", + "price": 65, + "stock": 30, + "rating": 4.4, + "category": "books", + "tags": "sale", + }, + { + "title": "Wireless Mouse", + "name": "Mouse", + "price": 29, + "stock": 200, + "rating": 4.2, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Mechanical Keyboard", + "name": "Keyboard", + "price": 149, + "stock": 75, + "rating": 4.6, + "category": "electronics", + "tags": "featured", + }, + { + "title": "USB Hub", + "name": "Hub", + "price": 25, + "stock": 150, + "rating": 3.9, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Monitor Stand", + "name": "Stand", + "price": 89, + "stock": 40, + "rating": 4.1, + "category": "accessories", + "tags": "sale,featured", + }, + { + "title": "Desk Lamp", + "name": "Lamp", + "price": 35, + "stock": 80, + "rating": 4.0, + "category": "accessories", + "tags": "sale", + }, + { + "title": "Notebook Set", + "name": "Notebooks", + "price": 15, + "stock": 300, + "rating": 4.3, + "category": "stationery", + "tags": "bestseller", + }, + { + "title": "Laptop and Keyboard Bundle", + "name": "Bundle Pack", + "price": 999, + "stock": 15, + "rating": 4.7, + "category": "electronics", + "tags": "featured,sale", + }, + ] + + index.load(products) + + yield index + + # Cleanup + index.delete(drop=True) + + +class TestSQLQueryBasic: + """Tests for basic SQL SELECT queries.""" + + def test_import_sql_query(self): + """Verify SQLQuery can be imported from redisvl.query.""" + from redisvl.query import SQLQuery + + assert SQLQuery is not None + + def test_select_all_fields(self, sql_index): + """Test SELECT * returns all fields.""" + sql_query = SQLQuery(f"SELECT * FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) > 0 + # Verify results contain expected fields + assert "title" in results[0] + assert "price" in results[0] + + def test_select_specific_fields(self, sql_index): + """Test SELECT with specific field list.""" + sql_query = SQLQuery(f"SELECT title, price FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) > 0 + # Results should contain requested fields + assert "title" in results[0] + assert "price" in results[0] + + def test_redis_query_string_with_client(self, sql_index): + """Test redis_query_string() with redis_client returns the Redis command string.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + + # Get the Redis command string using redis_client + redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client) + + # Verify it's a valid FT.SEARCH command + assert redis_cmd.startswith("FT.SEARCH") + assert sql_index.name in redis_cmd + assert "electronics" in redis_cmd + + def test_redis_query_string_with_url(self, sql_index, redis_url): + """Test redis_query_string() with redis_url returns the Redis command string.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + + # Get the Redis command string using redis_url + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify it's a valid FT.SEARCH command + assert redis_cmd.startswith("FT.SEARCH") + assert sql_index.name in redis_cmd + assert "electronics" in redis_cmd + + def test_redis_query_string_aggregate(self, sql_index): + """Test redis_query_string() returns FT.AGGREGATE for aggregation queries.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count + FROM {sql_index.name} + GROUP BY category + """ + ) + + redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client) + + # Verify it's a valid FT.AGGREGATE command + assert redis_cmd.startswith("FT.AGGREGATE") + assert sql_index.name in redis_cmd + assert "GROUPBY" in redis_cmd + + +class TestSQLQueryWhere: + """Tests for SQL WHERE clause filtering.""" + + def test_where_tag_equals(self, sql_index): + """Test WHERE with tag field equality.""" + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] == "electronics" + + def test_where_numeric_comparison(self, sql_index): + """Test WHERE with numeric field comparison.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price < 50 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) < 50 + + def test_where_combined_and(self, sql_index): + """Test WHERE with AND combining multiple conditions.""" + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'electronics' AND price < 100 + """ + ) + results = sql_index.query(sql_query) + + for result in results: + assert result["category"] == "electronics" + assert float(result["price"]) < 100 + + def test_where_numeric_range(self, sql_index): + """Test WHERE with numeric range (BETWEEN equivalent).""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price >= 25 AND price <= 50 + """ + ) + results = sql_index.query(sql_query) + + for result in results: + price = float(result["price"]) + assert 25 <= price <= 50 + + +class TestSQLQueryTagOperators: + """Tests for SQL tag field operators.""" + + def test_tag_not_equals(self, sql_index): + """Test tag != operator.""" + sql_query = SQLQuery( + f""" + SELECT title, category + FROM {sql_index.name} + WHERE category != 'electronics' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] != "electronics" + + def test_tag_in(self, sql_index): + """Test tag IN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, category + FROM {sql_index.name} + WHERE category IN ('books', 'accessories') + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] in ("books", "accessories") + + +class TestSQLQueryNumericOperators: + """Tests for SQL numeric field operators.""" + + def test_numeric_greater_than(self, sql_index): + """Test numeric > operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price > 100 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) > 100 + + def test_numeric_equals(self, sql_index): + """Test numeric = operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price = 45 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert float(result["price"]) == 45 + + def test_numeric_not_equals(self, sql_index): + """Test numeric != operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price != 45 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) != 45 + + @pytest.mark.xfail(reason="Numeric IN operator not yet supported in sql-redis") + def test_numeric_in(self, sql_index): + """Test numeric IN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price IN (45, 55, 65) + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert float(result["price"]) in (45, 55, 65) + + def test_numeric_between(self, sql_index): + """Test numeric BETWEEN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price BETWEEN 40 AND 60 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + price = float(result["price"]) + assert 40 <= price <= 60 + + +class TestSQLQueryTextOperators: + """Tests for SQL text field operators.""" + + def test_text_equals(self, sql_index): + """Test text = operator (full-text search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert "laptop" in result["title"].lower() + + def test_text_not_equals(self, sql_index): + """Test text != operator (negated full-text search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title != 'laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + # Results should not contain 'laptop' as a primary match + assert "laptop" not in result["title"].lower() + + def test_text_prefix(self, sql_index): + """Test text prefix search with wildcard (term*).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'lap*' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match titles starting with "lap" (e.g., "laptop") + assert "lap" in result["title"].lower() + + def test_text_suffix(self, sql_index): + """Test text suffix search with wildcard (*term).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE name = '*book' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match names ending with "book" (e.g., "Python Book") + assert "book" in result["name"].lower() + + def test_text_fuzzy(self, sql_index): + """Test text fuzzy search with Levenshtein distance (%term%).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = '%laptap%' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should fuzzy match "laptop" even with typo "laptap" + assert "laptop" in result["title"].lower() + + def test_text_phrase(self, sql_index): + """Test text phrase search (multi-word exact phrase).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'gaming laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match exact phrase "gaming laptop" + title_lower = result["title"].lower() + assert "gaming" in title_lower and "laptop" in title_lower + + def test_text_phrase_with_stopword(self, sql_index): + """Test text phrase search containing stop words. + + Redis does not index stop words (like 'and', 'the', 'is') by default. + The sql-redis library works around this by automatically stripping + stop words from phrase searches and emitting a warning. + See: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/stopwords/ + """ + import warnings + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'laptop and keyboard' + """ + ) + results = sql_index.query(sql_query) + + # Should find the "Laptop and Keyboard Bundle" product + assert len(results) >= 1 + # Verify at least one result contains both "laptop" and "keyboard" + found_match = False + for result in results: + title_lower = result["title"].lower() + if "laptop" in title_lower and "keyboard" in title_lower: + found_match = True + break + assert found_match, "Expected to find a result with 'laptop' and 'keyboard'" + + # Verify a warning was emitted about stopword removal + stopword_warnings = [ + warning + for warning in w + if "Stopwords" in str(warning.message) + and "and" in str(warning.message).lower() + ] + assert ( + len(stopword_warnings) >= 1 + ), "Expected a warning about stopword removal" + + @pytest.mark.xfail(reason="Text IN operator not yet supported in sql-redis") + def test_text_in(self, sql_index): + """Test text IN operator (multiple term search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title IN ('Python', 'Redis') + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + title_lower = result["title"].lower() + assert "python" in title_lower or "redis" in title_lower + + +class TestSQLQueryOrderBy: + """Tests for SQL ORDER BY clause.""" + + def test_order_by_asc(self, sql_index): + """Test ORDER BY ascending.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + ORDER BY price ASC + """ + ) + results = sql_index.query(sql_query) + + prices = [float(r["price"]) for r in results] + assert prices == sorted(prices) + + def test_order_by_desc(self, sql_index): + """Test ORDER BY descending.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + ORDER BY price DESC + """ + ) + results = sql_index.query(sql_query) + + prices = [float(r["price"]) for r in results] + assert prices == sorted(prices, reverse=True) + + +class TestSQLQueryLimit: + """Tests for SQL LIMIT and OFFSET clauses.""" + + def test_limit(self, sql_index): + """Test LIMIT clause.""" + sql_query = SQLQuery(f"SELECT title FROM {sql_index.name} LIMIT 3") + results = sql_index.query(sql_query) + + assert len(results) == 3 + + def test_limit_with_offset(self, sql_index): + """Test LIMIT with OFFSET for pagination.""" + # First page + sql_query1 = SQLQuery( + f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 0" + ) + results1 = sql_index.query(sql_query1) + + # Second page + sql_query2 = SQLQuery( + f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 3" + ) + results2 = sql_index.query(sql_query2) + + assert len(results1) == 3 + assert len(results2) == 3 + # Pages should have different results + titles1 = {r["title"] for r in results1} + titles2 = {r["title"] for r in results2} + assert titles1.isdisjoint(titles2) + + +class TestSQLQueryAggregation: + """Tests for SQL aggregation (GROUP BY, COUNT, AVG, etc.).""" + + def test_count_all(self, sql_index): + """Test COUNT(*) aggregation.""" + sql_query = SQLQuery(f"SELECT COUNT(*) as total FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert int(results[0]["total"]) == 13 # 13 products in test data + + def test_group_by_with_count(self, sql_index): + """Test GROUP BY with COUNT.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + # Should have groups for electronics, books, accessories, stationery + categories = {r["category"] for r in results} + assert "electronics" in categories + assert "books" in categories + + def test_group_by_with_avg(self, sql_index): + """Test GROUP BY with AVG.""" + sql_query = SQLQuery( + f""" + SELECT category, AVG(price) as avg_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + # All results should have category and avg_price + for result in results: + assert "category" in result + assert "avg_price" in result + assert float(result["avg_price"]) > 0 + + def test_group_by_with_filter(self, sql_index): + """Test GROUP BY with WHERE filter.""" + sql_query = SQLQuery( + f""" + SELECT category, AVG(price) as avg_price + FROM {sql_index.name} + WHERE stock > 50 + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "avg_price" in result + + def test_group_by_with_sum(self, sql_index): + """Test GROUP BY with SUM reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, SUM(price) as total_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "total_price" in result + assert float(result["total_price"]) > 0 + + def test_group_by_with_min(self, sql_index): + """Test GROUP BY with MIN reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, MIN(price) as min_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "min_price" in result + assert float(result["min_price"]) > 0 + + def test_group_by_with_max(self, sql_index): + """Test GROUP BY with MAX reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, MAX(price) as max_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "max_price" in result + assert float(result["max_price"]) > 0 + + def test_global_sum(self, sql_index): + """Test global SUM aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT SUM(price) as total + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "total" in results[0] + assert float(results[0]["total"]) > 0 + + def test_global_min(self, sql_index): + """Test global MIN aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT MIN(price) as min_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "min_price" in results[0] + assert float(results[0]["min_price"]) > 0 + + def test_global_max(self, sql_index): + """Test global MAX aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT MAX(price) as max_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "max_price" in results[0] + assert float(results[0]["max_price"]) > 0 + + def test_multiple_reducers(self, sql_index): + """Test multiple reducers in a single query.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count, SUM(price) as total, AVG(price) as avg_price, MIN(price) as min_price, MAX(price) as max_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "count" in result + assert "total" in result + assert "avg_price" in result + assert "min_price" in result + assert "max_price" in result + + def test_count_distinct(self, sql_index): + """Test COUNT_DISTINCT reducer using Redis-specific syntax.""" + sql_query = SQLQuery( + f""" + SELECT COUNT_DISTINCT(category) as unique_categories + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "unique_categories" in results[0] + # Should have 4 unique categories: electronics, books, accessories, stationery + assert int(results[0]["unique_categories"]) == 4 + + def test_stddev(self, sql_index): + """Test STDDEV reducer.""" + sql_query = SQLQuery( + f""" + SELECT STDDEV(price) as price_stddev + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "price_stddev" in results[0] + # Verify it's a valid numeric value + stddev_value = float(results[0]["price_stddev"]) + assert stddev_value >= 0 # Standard deviation is always non-negative + + def test_quantile(self, sql_index): + """Test QUANTILE reducer.""" + sql_query = SQLQuery( + f""" + SELECT QUANTILE(price, 0.5) as median_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "median_price" in results[0] + # Verify it's a valid numeric value + median_value = float(results[0]["median_price"]) + assert median_value >= 0 + + def test_tolist(self, sql_index): + """Test TOLIST reducer via ARRAY_AGG SQL function.""" + sql_query = SQLQuery( + f""" + SELECT category, ARRAY_AGG(title) as titles + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "titles" in result + # TOLIST returns a comma-separated string or list of values + assert result["titles"] is not None + + def test_first_value(self, sql_index): + """Test FIRST_VALUE reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, FIRST_VALUE(title) as first_title + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "first_title" in result + # Verify it's a non-empty string + assert isinstance(result["first_title"], str) + assert len(result["first_title"]) > 0 + + +class TestSQLQueryIntegration: + """End-to-end integration tests matching proposal examples.""" + + def test_proposal_example_basic(self, sql_index): + """Test the basic example from the MLP proposal.""" + # Example from proposal doc (adapted for our test data) + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'books' + """ + ) + + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] == "books" + assert "title" in result + assert "price" in result + + +@pytest.fixture +def vector_index(redis_url, worker_id): + """Create a books index with vector embeddings for SQL query testing.""" + import numpy as np + + unique_id = str(uuid.uuid4())[:8] + index_name = f"sql_books_{worker_id}_{unique_id}" + + index = SearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": f"book_{worker_id}_{unique_id}", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text", "attrs": {"sortable": True}}, + {"name": "genre", "type": "tag", "attrs": {"sortable": True}}, + {"name": "price", "type": "numeric", "attrs": {"sortable": True}}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "dims": 4, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32", + }, + }, + ], + }, + redis_url=redis_url, + ) + + index.create(overwrite=True) + + # Create test books with embeddings + books = [ + { + "title": "Dune", + "genre": "Science Fiction", + "price": 15, + "embedding": np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes(), + }, + { + "title": "Foundation", + "genre": "Science Fiction", + "price": 18, + "embedding": np.array([0.15, 0.25, 0.35, 0.45], dtype=np.float32).tobytes(), + }, + { + "title": "Neuromancer", + "genre": "Science Fiction", + "price": 12, + "embedding": np.array([0.2, 0.3, 0.4, 0.5], dtype=np.float32).tobytes(), + }, + { + "title": "The Hobbit", + "genre": "Fantasy", + "price": 14, + "embedding": np.array([0.9, 0.8, 0.7, 0.6], dtype=np.float32).tobytes(), + }, + { + "title": "1984", + "genre": "Dystopian", + "price": 25, + "embedding": np.array([0.5, 0.5, 0.5, 0.5], dtype=np.float32).tobytes(), + }, + ] + + index.load(books) + + yield index + + # Cleanup + index.delete(drop=True) + + +class TestSQLQueryVectorSearch: + """Tests for SQL vector similarity search using cosine_distance() and vector_distance().""" + + def test_vector_distance_function(self, vector_index): + """Test vector search with vector_distance() function.""" + import numpy as np + + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, vector_distance(embedding, :vec) AS score + FROM {vector_index.name} + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + results = vector_index.query(sql_query) + + assert len(results) > 0 + assert len(results) <= 3 + for result in results: + assert "title" in result + assert "score" in result + # Score should be a valid non-negative distance value + score = float(result["score"]) + assert score >= 0 + + def test_vector_cosine_similarity(self, vector_index): + """Test vector search with cosine_distance() function - pgvector style.""" + import numpy as np + + # Query vector similar to Science Fiction books + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT + title, + genre, + price, + cosine_distance(embedding, :query_vector) AS vector_distance + FROM {vector_index.name} + WHERE genre = 'Science Fiction' + AND price <= 20 + ORDER BY cosine_distance(embedding, :query_vector) + LIMIT 3 + """, + params={"query_vector": query_vector}, + ) + + results = vector_index.query(sql_query) + + # Should return Science Fiction books under $20 + assert len(results) > 0 + assert len(results) <= 3 + for result in results: + assert result["genre"] == "Science Fiction" + assert float(result["price"]) <= 20 + # Verify vector_distance is returned (like VectorQuery with return_score=True) + assert "vector_distance" in result + # Distance should be a valid non-negative value + distance = float(result["vector_distance"]) + assert distance >= 0 + + def test_vector_redis_query_string(self, vector_index, redis_url): + """Test redis_query_string() returns correct KNN query for vector search.""" + import numpy as np + + # Query vector + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, cosine_distance(embedding, :vec) AS vector_distance + FROM {vector_index.name} + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + # Get the Redis command string + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify it's a valid FT.SEARCH with KNN syntax + assert redis_cmd.startswith("FT.SEARCH") + assert vector_index.name in redis_cmd + assert "KNN 3" in redis_cmd + assert "@embedding" in redis_cmd + assert "$vector" in redis_cmd + assert "vector_distance" in redis_cmd + + def test_vector_search_with_prefilter_redis_query_string( + self, vector_index, redis_url + ): + """Test redis_query_string() returns correct prefiltered KNN query.""" + import numpy as np + + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, genre, cosine_distance(embedding, :vec) AS vector_distance + FROM {vector_index.name} + WHERE genre = 'Science Fiction' + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify prefilter syntax: (filter)=>[KNN ...] + assert redis_cmd.startswith("FT.SEARCH") + assert "Science Fiction" in redis_cmd or "Science\\ Fiction" in redis_cmd + assert "=>[KNN" in redis_cmd + assert "KNN 3" in redis_cmd diff --git a/tests/integration/test_sql_redis_json.py b/tests/integration/test_sql_redis_json.py new file mode 100644 index 00000000..76191af0 --- /dev/null +++ b/tests/integration/test_sql_redis_json.py @@ -0,0 +1,1143 @@ +"""Integration tests for SQLQuery class. + +These tests verify that SQLQuery can translate SQL-like syntax +into proper Redis queries and return expected results. +""" + +import uuid + +import pytest + +from redisvl.index import SearchIndex +from redisvl.query import SQLQuery + + +@pytest.fixture +def sql_index(redis_url, worker_id): + """Create a products index for SQL query testing.""" + unique_id = str(uuid.uuid4())[:8] + index_name = f"sql_products_{worker_id}_{unique_id}" + + index = SearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": f"product_{worker_id}_{unique_id}", + "storage_type": "json", + }, + "fields": [ + {"name": "title", "type": "text", "attrs": {"sortable": True}}, + {"name": "name", "type": "text", "attrs": {"sortable": True}}, + {"name": "price", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "stock", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "rating", "type": "numeric", "attrs": {"sortable": True}}, + {"name": "category", "type": "tag", "attrs": {"sortable": True}}, + {"name": "tags", "type": "tag"}, + ], + }, + redis_url=redis_url, + ) + + index.create(overwrite=True) + + # Load test data + products = [ + { + "title": "Gaming laptop Pro", + "name": "Gaming Laptop", + "price": 899, + "stock": 10, + "rating": 4.5, + "category": "electronics", + "tags": "sale,featured", + }, + { + "title": "Budget laptop Basic", + "name": "Budget Laptop", + "price": 499, + "stock": 25, + "rating": 3.8, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Premium laptop Ultra", + "name": "Premium Laptop", + "price": 1299, + "stock": 5, + "rating": 4.9, + "category": "electronics", + "tags": "featured", + }, + { + "title": "Python Programming", + "name": "Python Book", + "price": 45, + "stock": 100, + "rating": 4.7, + "category": "books", + "tags": "bestseller", + }, + { + "title": "Redis in Action", + "name": "Redis Book", + "price": 55, + "stock": 50, + "rating": 4.6, + "category": "books", + "tags": "featured", + }, + { + "title": "Data Science Guide", + "name": "DS Book", + "price": 65, + "stock": 30, + "rating": 4.4, + "category": "books", + "tags": "sale", + }, + { + "title": "Wireless Mouse", + "name": "Mouse", + "price": 29, + "stock": 200, + "rating": 4.2, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Mechanical Keyboard", + "name": "Keyboard", + "price": 149, + "stock": 75, + "rating": 4.6, + "category": "electronics", + "tags": "featured", + }, + { + "title": "USB Hub", + "name": "Hub", + "price": 25, + "stock": 150, + "rating": 3.9, + "category": "electronics", + "tags": "sale", + }, + { + "title": "Monitor Stand", + "name": "Stand", + "price": 89, + "stock": 40, + "rating": 4.1, + "category": "accessories", + "tags": "sale,featured", + }, + { + "title": "Desk Lamp", + "name": "Lamp", + "price": 35, + "stock": 80, + "rating": 4.0, + "category": "accessories", + "tags": "sale", + }, + { + "title": "Notebook Set", + "name": "Notebooks", + "price": 15, + "stock": 300, + "rating": 4.3, + "category": "stationery", + "tags": "bestseller", + }, + { + "title": "Laptop and Keyboard Bundle", + "name": "Bundle Pack", + "price": 999, + "stock": 15, + "rating": 4.7, + "category": "electronics", + "tags": "featured,sale", + }, + ] + + index.load(products) + + yield index + + # Cleanup + index.delete(drop=True) + + +class TestSQLQueryBasic: + """Tests for basic SQL SELECT queries.""" + + def test_import_sql_query(self): + """Verify SQLQuery can be imported from redisvl.query.""" + from redisvl.query import SQLQuery + + assert SQLQuery is not None + + def test_select_all_fields(self, sql_index): + """Test SELECT * returns all fields.""" + sql_query = SQLQuery(f"SELECT * FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) > 0 + # For JSON storage, results may contain '$' key with JSON string or parsed fields + first_result = results[0] + if "$" in first_result: + # JSON storage returns data under '$' key + import json + + data = json.loads(first_result["$"]) + assert "title" in data + assert "price" in data + else: + assert "title" in first_result + assert "price" in first_result + + def test_select_specific_fields(self, sql_index): + """Test SELECT with specific field list.""" + sql_query = SQLQuery(f"SELECT title, price FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) > 0 + # Results should contain requested fields + assert "title" in results[0] + assert "price" in results[0] + + def test_redis_query_string_with_client(self, sql_index): + """Test redis_query_string() with redis_client returns the Redis command string.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + + # Get the Redis command string using redis_client + redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client) + + # Verify it's a valid FT.SEARCH command + assert redis_cmd.startswith("FT.SEARCH") + assert sql_index.name in redis_cmd + assert "electronics" in redis_cmd + + def test_redis_query_string_with_url(self, sql_index, redis_url): + """Test redis_query_string() with redis_url returns the Redis command string.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + + # Get the Redis command string using redis_url + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify it's a valid FT.SEARCH command + assert redis_cmd.startswith("FT.SEARCH") + assert sql_index.name in redis_cmd + assert "electronics" in redis_cmd + + def test_redis_query_string_aggregate(self, sql_index): + """Test redis_query_string() returns FT.AGGREGATE for aggregation queries.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count + FROM {sql_index.name} + GROUP BY category + """ + ) + + redis_cmd = sql_query.redis_query_string(redis_client=sql_index._redis_client) + + # Verify it's a valid FT.AGGREGATE command + assert redis_cmd.startswith("FT.AGGREGATE") + assert sql_index.name in redis_cmd + assert "GROUPBY" in redis_cmd + + +class TestSQLQueryWhere: + """Tests for SQL WHERE clause filtering.""" + + def test_where_tag_equals(self, sql_index): + """Test WHERE with tag field equality.""" + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'electronics' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] == "electronics" + + def test_where_numeric_comparison(self, sql_index): + """Test WHERE with numeric field comparison.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price < 50 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) < 50 + + def test_where_combined_and(self, sql_index): + """Test WHERE with AND combining multiple conditions.""" + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'electronics' AND price < 100 + """ + ) + results = sql_index.query(sql_query) + + for result in results: + assert result["category"] == "electronics" + assert float(result["price"]) < 100 + + def test_where_numeric_range(self, sql_index): + """Test WHERE with numeric range (BETWEEN equivalent).""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price >= 25 AND price <= 50 + """ + ) + results = sql_index.query(sql_query) + + for result in results: + price = float(result["price"]) + assert 25 <= price <= 50 + + +class TestSQLQueryTagOperators: + """Tests for SQL tag field operators.""" + + def test_tag_not_equals(self, sql_index): + """Test tag != operator.""" + sql_query = SQLQuery( + f""" + SELECT title, category + FROM {sql_index.name} + WHERE category != 'electronics' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] != "electronics" + + def test_tag_in(self, sql_index): + """Test tag IN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, category + FROM {sql_index.name} + WHERE category IN ('books', 'accessories') + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] in ("books", "accessories") + + +class TestSQLQueryNumericOperators: + """Tests for SQL numeric field operators.""" + + def test_numeric_greater_than(self, sql_index): + """Test numeric > operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price > 100 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) > 100 + + def test_numeric_equals(self, sql_index): + """Test numeric = operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price = 45 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert float(result["price"]) == 45 + + def test_numeric_not_equals(self, sql_index): + """Test numeric != operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price != 45 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert float(result["price"]) != 45 + + @pytest.mark.xfail(reason="Numeric IN operator not yet supported in sql-redis") + def test_numeric_in(self, sql_index): + """Test numeric IN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price IN (45, 55, 65) + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert float(result["price"]) in (45, 55, 65) + + def test_numeric_between(self, sql_index): + """Test numeric BETWEEN operator.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + WHERE price BETWEEN 40 AND 60 + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + price = float(result["price"]) + assert 40 <= price <= 60 + + +class TestSQLQueryTextOperators: + """Tests for SQL text field operators.""" + + def test_text_equals(self, sql_index): + """Test text = operator (full-text search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + assert "laptop" in result["title"].lower() + + def test_text_not_equals(self, sql_index): + """Test text != operator (negated full-text search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title != 'laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + # Results should not contain 'laptop' as a primary match + assert "laptop" not in result["title"].lower() + + def test_text_prefix(self, sql_index): + """Test text prefix search with wildcard (term*).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'lap*' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match titles starting with "lap" (e.g., "laptop") + assert "lap" in result["title"].lower() + + def test_text_suffix(self, sql_index): + """Test text suffix search with wildcard (*term).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE name = '*book' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match names ending with "book" (e.g., "Python Book") + assert "book" in result["name"].lower() + + def test_text_fuzzy(self, sql_index): + """Test text fuzzy search with Levenshtein distance (%term%).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = '%laptap%' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should fuzzy match "laptop" even with typo "laptap" + assert "laptop" in result["title"].lower() + + def test_text_phrase(self, sql_index): + """Test text phrase search (multi-word exact phrase).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'gaming laptop' + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + # Should match exact phrase "gaming laptop" + title_lower = result["title"].lower() + assert "gaming" in title_lower and "laptop" in title_lower + + def test_text_phrase_with_stopword(self, sql_index): + """Test text phrase search containing stop words. + + Redis does not index stop words (like 'and', 'the', 'is') by default. + The sql-redis library works around this by automatically stripping + stop words from phrase searches and emitting a warning. + See: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/stopwords/ + """ + import warnings + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title = 'laptop and keyboard' + """ + ) + results = sql_index.query(sql_query) + + # Should find the "Laptop and Keyboard Bundle" product + assert len(results) >= 1 + # Verify at least one result contains both "laptop" and "keyboard" + found_match = False + for result in results: + title_lower = result["title"].lower() + if "laptop" in title_lower and "keyboard" in title_lower: + found_match = True + break + assert found_match, "Expected to find a result with 'laptop' and 'keyboard'" + + # Verify a warning was emitted about stopword removal + stopword_warnings = [ + warning + for warning in w + if "Stopwords" in str(warning.message) + and "and" in str(warning.message).lower() + ] + assert ( + len(stopword_warnings) >= 1 + ), "Expected a warning about stopword removal" + + @pytest.mark.xfail(reason="Text IN operator not yet supported in sql-redis") + def test_text_in(self, sql_index): + """Test text IN operator (multiple term search).""" + sql_query = SQLQuery( + f""" + SELECT title, name + FROM {sql_index.name} + WHERE title IN ('Python', 'Redis') + """ + ) + results = sql_index.query(sql_query) + + assert len(results) >= 1 + for result in results: + title_lower = result["title"].lower() + assert "python" in title_lower or "redis" in title_lower + + +class TestSQLQueryOrderBy: + """Tests for SQL ORDER BY clause.""" + + def test_order_by_asc(self, sql_index): + """Test ORDER BY ascending.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + ORDER BY price ASC + """ + ) + results = sql_index.query(sql_query) + + prices = [float(r["price"]) for r in results] + assert prices == sorted(prices) + + def test_order_by_desc(self, sql_index): + """Test ORDER BY descending.""" + sql_query = SQLQuery( + f""" + SELECT title, price + FROM {sql_index.name} + ORDER BY price DESC + """ + ) + results = sql_index.query(sql_query) + + prices = [float(r["price"]) for r in results] + assert prices == sorted(prices, reverse=True) + + +class TestSQLQueryLimit: + """Tests for SQL LIMIT and OFFSET clauses.""" + + def test_limit(self, sql_index): + """Test LIMIT clause.""" + sql_query = SQLQuery(f"SELECT title FROM {sql_index.name} LIMIT 3") + results = sql_index.query(sql_query) + + assert len(results) == 3 + + def test_limit_with_offset(self, sql_index): + """Test LIMIT with OFFSET for pagination.""" + # First page + sql_query1 = SQLQuery( + f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 0" + ) + results1 = sql_index.query(sql_query1) + + # Second page + sql_query2 = SQLQuery( + f"SELECT title FROM {sql_index.name} ORDER BY price ASC LIMIT 3 OFFSET 3" + ) + results2 = sql_index.query(sql_query2) + + assert len(results1) == 3 + assert len(results2) == 3 + # Pages should have different results + titles1 = {r["title"] for r in results1} + titles2 = {r["title"] for r in results2} + assert titles1.isdisjoint(titles2) + + +class TestSQLQueryAggregation: + """Tests for SQL aggregation (GROUP BY, COUNT, AVG, etc.).""" + + def test_count_all(self, sql_index): + """Test COUNT(*) aggregation.""" + sql_query = SQLQuery(f"SELECT COUNT(*) as total FROM {sql_index.name}") + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert int(results[0]["total"]) == 13 # 13 products in test data + + def test_group_by_with_count(self, sql_index): + """Test GROUP BY with COUNT.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + # Should have groups for electronics, books, accessories, stationery + categories = {r["category"] for r in results} + assert "electronics" in categories + assert "books" in categories + + def test_group_by_with_avg(self, sql_index): + """Test GROUP BY with AVG.""" + sql_query = SQLQuery( + f""" + SELECT category, AVG(price) as avg_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + # All results should have category and avg_price + for result in results: + assert "category" in result + assert "avg_price" in result + assert float(result["avg_price"]) > 0 + + def test_group_by_with_filter(self, sql_index): + """Test GROUP BY with WHERE filter.""" + sql_query = SQLQuery( + f""" + SELECT category, AVG(price) as avg_price + FROM {sql_index.name} + WHERE stock > 50 + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "avg_price" in result + + def test_group_by_with_sum(self, sql_index): + """Test GROUP BY with SUM reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, SUM(price) as total_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "total_price" in result + assert float(result["total_price"]) > 0 + + def test_group_by_with_min(self, sql_index): + """Test GROUP BY with MIN reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, MIN(price) as min_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "min_price" in result + assert float(result["min_price"]) > 0 + + def test_group_by_with_max(self, sql_index): + """Test GROUP BY with MAX reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, MAX(price) as max_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "max_price" in result + assert float(result["max_price"]) > 0 + + def test_global_sum(self, sql_index): + """Test global SUM aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT SUM(price) as total + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "total" in results[0] + assert float(results[0]["total"]) > 0 + + def test_global_min(self, sql_index): + """Test global MIN aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT MIN(price) as min_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "min_price" in results[0] + assert float(results[0]["min_price"]) > 0 + + def test_global_max(self, sql_index): + """Test global MAX aggregation (no GROUP BY).""" + sql_query = SQLQuery( + f""" + SELECT MAX(price) as max_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "max_price" in results[0] + assert float(results[0]["max_price"]) > 0 + + def test_multiple_reducers(self, sql_index): + """Test multiple reducers in a single query.""" + sql_query = SQLQuery( + f""" + SELECT category, COUNT(*) as count, SUM(price) as total, AVG(price) as avg_price, MIN(price) as min_price, MAX(price) as max_price + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "category" in result + assert "count" in result + assert "total" in result + assert "avg_price" in result + assert "min_price" in result + assert "max_price" in result + + def test_count_distinct(self, sql_index): + """Test COUNT_DISTINCT reducer using Redis-specific syntax.""" + sql_query = SQLQuery( + f""" + SELECT COUNT_DISTINCT(category) as unique_categories + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "unique_categories" in results[0] + # Should have 4 unique categories: electronics, books, accessories, stationery + assert int(results[0]["unique_categories"]) == 4 + + def test_stddev(self, sql_index): + """Test STDDEV reducer.""" + sql_query = SQLQuery( + f""" + SELECT STDDEV(price) as price_stddev + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "price_stddev" in results[0] + # Verify it's a valid numeric value + stddev_value = float(results[0]["price_stddev"]) + assert stddev_value >= 0 # Standard deviation is always non-negative + + def test_quantile(self, sql_index): + """Test QUANTILE reducer.""" + sql_query = SQLQuery( + f""" + SELECT QUANTILE(price, 0.5) as median_price + FROM {sql_index.name} + """ + ) + results = sql_index.query(sql_query) + + assert len(results) == 1 + assert "median_price" in results[0] + # Verify it's a valid numeric value + median_value = float(results[0]["median_price"]) + assert median_value >= 0 + + def test_tolist(self, sql_index): + """Test TOLIST reducer via ARRAY_AGG SQL function.""" + sql_query = SQLQuery( + f""" + SELECT category, ARRAY_AGG(title) as titles + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "titles" in result + # TOLIST returns a comma-separated string or list of values + assert result["titles"] is not None + + def test_first_value(self, sql_index): + """Test FIRST_VALUE reducer.""" + sql_query = SQLQuery( + f""" + SELECT category, FIRST_VALUE(title) as first_title + FROM {sql_index.name} + GROUP BY category + """ + ) + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert "first_title" in result + # Verify it's a non-empty string + assert isinstance(result["first_title"], str) + assert len(result["first_title"]) > 0 + + +class TestSQLQueryIntegration: + """End-to-end integration tests matching proposal examples.""" + + def test_proposal_example_basic(self, sql_index): + """Test the basic example from the MLP proposal.""" + # Example from proposal doc (adapted for our test data) + sql_query = SQLQuery( + f""" + SELECT title, price, category + FROM {sql_index.name} + WHERE category = 'books' + """ + ) + + results = sql_index.query(sql_query) + + assert len(results) > 0 + for result in results: + assert result["category"] == "books" + assert "title" in result + assert "price" in result + + +@pytest.fixture +def vector_index(redis_url, worker_id): + """Create a books index with vector embeddings for SQL query testing.""" + import numpy as np + + unique_id = str(uuid.uuid4())[:8] + index_name = f"sql_books_{worker_id}_{unique_id}" + + index = SearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": f"book_{worker_id}_{unique_id}", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text", "attrs": {"sortable": True}}, + {"name": "genre", "type": "tag", "attrs": {"sortable": True}}, + {"name": "price", "type": "numeric", "attrs": {"sortable": True}}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "dims": 4, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32", + }, + }, + ], + }, + redis_url=redis_url, + ) + + index.create(overwrite=True) + + # Create test books with embeddings + books = [ + { + "title": "Dune", + "genre": "Science Fiction", + "price": 15, + "embedding": np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes(), + }, + { + "title": "Foundation", + "genre": "Science Fiction", + "price": 18, + "embedding": np.array([0.15, 0.25, 0.35, 0.45], dtype=np.float32).tobytes(), + }, + { + "title": "Neuromancer", + "genre": "Science Fiction", + "price": 12, + "embedding": np.array([0.2, 0.3, 0.4, 0.5], dtype=np.float32).tobytes(), + }, + { + "title": "The Hobbit", + "genre": "Fantasy", + "price": 14, + "embedding": np.array([0.9, 0.8, 0.7, 0.6], dtype=np.float32).tobytes(), + }, + { + "title": "1984", + "genre": "Dystopian", + "price": 25, + "embedding": np.array([0.5, 0.5, 0.5, 0.5], dtype=np.float32).tobytes(), + }, + ] + + index.load(books) + + yield index + + # Cleanup + index.delete(drop=True) + + +class TestSQLQueryVectorSearch: + """Tests for SQL vector similarity search using cosine_distance() and vector_distance().""" + + def test_vector_distance_function(self, vector_index): + """Test vector search with vector_distance() function.""" + import numpy as np + + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, vector_distance(embedding, :vec) AS score + FROM {vector_index.name} + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + results = vector_index.query(sql_query) + + assert len(results) > 0 + assert len(results) <= 3 + for result in results: + assert "title" in result + assert "score" in result + # Score should be a valid non-negative distance value + score = float(result["score"]) + assert score >= 0 + + def test_vector_cosine_similarity(self, vector_index): + """Test vector search with cosine_distance() function - pgvector style.""" + import numpy as np + + # Query vector similar to Science Fiction books + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT + title, + genre, + price, + cosine_distance(embedding, :query_vector) AS vector_distance + FROM {vector_index.name} + WHERE genre = 'Science Fiction' + AND price <= 20 + ORDER BY cosine_distance(embedding, :query_vector) + LIMIT 3 + """, + params={"query_vector": query_vector}, + ) + + results = vector_index.query(sql_query) + + # Should return Science Fiction books under $20 + assert len(results) > 0 + assert len(results) <= 3 + for result in results: + assert result["genre"] == "Science Fiction" + assert float(result["price"]) <= 20 + # Verify vector_distance is returned (like VectorQuery with return_score=True) + assert "vector_distance" in result + # Distance should be a valid non-negative value + distance = float(result["vector_distance"]) + assert distance >= 0 + + def test_vector_redis_query_string(self, vector_index, redis_url): + """Test redis_query_string() returns correct KNN query for vector search.""" + import numpy as np + + # Query vector + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, cosine_distance(embedding, :vec) AS vector_distance + FROM {vector_index.name} + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + # Get the Redis command string + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify it's a valid FT.SEARCH with KNN syntax + assert redis_cmd.startswith("FT.SEARCH") + assert vector_index.name in redis_cmd + assert "KNN 3" in redis_cmd + assert "@embedding" in redis_cmd + assert "$vector" in redis_cmd + assert "vector_distance" in redis_cmd + + def test_vector_search_with_prefilter_redis_query_string( + self, vector_index, redis_url + ): + """Test redis_query_string() returns correct prefiltered KNN query.""" + import numpy as np + + query_vector = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32).tobytes() + + sql_query = SQLQuery( + f""" + SELECT title, genre, cosine_distance(embedding, :vec) AS vector_distance + FROM {vector_index.name} + WHERE genre = 'Science Fiction' + LIMIT 3 + """, + params={"vec": query_vector}, + ) + + redis_cmd = sql_query.redis_query_string(redis_url=redis_url) + + # Verify prefilter syntax: (filter)=>[KNN ...] + assert redis_cmd.startswith("FT.SEARCH") + assert "Science Fiction" in redis_cmd or "Science\\ Fiction" in redis_cmd + assert "=>[KNN" in redis_cmd + assert "KNN 3" in redis_cmd diff --git a/tests/unit/test_sql_parameter_substitution.py b/tests/unit/test_sql_parameter_substitution.py new file mode 100644 index 00000000..3ec51f28 --- /dev/null +++ b/tests/unit/test_sql_parameter_substitution.py @@ -0,0 +1,224 @@ +"""Unit tests for SQL parameter substitution in SQLQuery. + +These tests verify that parameter substitution correctly handles: +1. Partial matching bug: :id should not replace inside :product_id +2. Quote escaping bug: Single quotes in values should be SQL-escaped +3. Edge cases: Multiple occurrences, similar names, special characters +""" + +import pytest + +from redisvl.query.sql import SQLQuery + + +def buggy_substitute_params(sql: str, params: dict) -> str: + """Simulate the CURRENT buggy implementation for comparison. + + This is the exact code from redisvl/query/sql.py lines 105-113. + """ + for key, value in params.items(): + placeholder = f":{key}" + if isinstance(value, (int, float)): + sql = sql.replace(placeholder, str(value)) + elif isinstance(value, str): + sql = sql.replace(placeholder, f"'{value}'") + return sql + + +class TestBuggyBehaviorDemonstration: + """Tests that DEMONSTRATE the bugs in the current implementation. + + These tests show what goes wrong with the naive str.replace() approach. + They should PASS (demonstrating the bug exists) before the fix, + and some assertions will need to change after the fix. + """ + + def test_partial_match_bug_exists(self): + """Demonstrate that :id incorrectly replaces inside :product_id.""" + sql = "SELECT * FROM idx WHERE id = :id AND product_id = :product_id" + params = {"id": 123, "product_id": 456} + + result = buggy_substitute_params(sql, params) + + # BUG: :id gets replaced inside :product_id first (dict ordering dependent) + # This demonstrates the bug - the result is corrupted + # Depending on dict ordering, we might get "product_123" corruption + assert ":id" not in result or "product_" in result # Some substitution happened + + def test_quote_escaping_bug_exists(self): + """Demonstrate that quotes are NOT escaped in current implementation.""" + sql = "SELECT * FROM idx WHERE name = :name" + params = {"name": "O'Brien"} + + result = buggy_substitute_params(sql, params) + + # BUG: The quote is NOT escaped - this produces invalid SQL + assert "O'Brien" in result # Raw quote, not escaped + assert "O''Brien" not in result # Proper escaping is missing + + +class TestParameterSubstitutionPartialMatching: + """Tests for the partial string matching bug. + + The bug: Using str.replace(':id', '123') would also replace + ':id' inside ':product_id', resulting in 'product_123'. + """ + + def test_similar_param_names_no_partial_match(self): + """Test that :id doesn't replace inside :product_id.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE id = :id AND product_id = :product_id", + params={"id": 123, "product_id": 456}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "id = 123" in substituted + assert "product_id = 456" in substituted + # Should NOT have "product_123" + assert "product_123" not in substituted + + def test_prefix_param_names(self): + """Test params where one is a prefix of another: :user, :user_id, :user_name.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE user = :user AND user_id = :user_id AND user_name = :user_name", + params={"user": "alice", "user_id": 42, "user_name": "Alice Smith"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "user = 'alice'" in substituted + assert "user_id = 42" in substituted + assert "user_name = 'Alice Smith'" in substituted + # Should NOT have corrupted values + assert "'alice'_id" not in substituted + assert "'alice'_name" not in substituted + + def test_suffix_param_names(self): + """Test params where one is a suffix pattern: :vec, :query_vec.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE vec = :vec AND query_vec = :query_vec", + params={"vec": 1.0, "query_vec": 2.0}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "vec = 1.0" in substituted or "vec = 1" in substituted + assert "query_vec = 2.0" in substituted or "query_vec = 2" in substituted + + +class TestParameterSubstitutionQuoteEscaping: + """Tests for the quote escaping bug. + + The bug: String values with single quotes like "O'Brien" would + produce invalid SQL: 'O'Brien' instead of 'O''Brien'. + """ + + def test_single_quote_in_value(self): + """Test that single quotes are properly escaped.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE name = :name", + params={"name": "O'Brien"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + # SQL standard escaping: ' becomes '' + assert "name = 'O''Brien'" in substituted + + def test_multiple_quotes_in_value(self): + """Test multiple single quotes in a value.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE phrase = :phrase", + params={"phrase": "It's a 'test' string"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "phrase = 'It''s a ''test'' string'" in substituted + + def test_apostrophe_names(self): + """Test common names with apostrophes.""" + test_cases = [ + ("McDonald's", "'McDonald''s'"), + ("O'Reilly", "'O''Reilly'"), + ("D'Angelo", "'D''Angelo'"), + ] + + for name, expected in test_cases: + sql_query = SQLQuery( + "SELECT * FROM idx WHERE name = :name", + params={"name": name}, + ) + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + assert f"name = {expected}" in substituted, f"Failed for {name}" + + +class TestParameterSubstitutionEdgeCases: + """Tests for edge cases in parameter substitution.""" + + def test_multiple_occurrences_same_param(self): + """Test that a parameter used multiple times is substituted everywhere.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE category = :cat OR subcategory = :cat", + params={"cat": "electronics"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert substituted.count("'electronics'") == 2 + + def test_empty_string_value(self): + """Test empty string parameter value.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE name = :name", + params={"name": ""}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "name = ''" in substituted + + def test_numeric_types(self): + """Test integer and float parameter values.""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE count = :count AND price = :price", + params={"count": 42, "price": 99.99}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + assert "count = 42" in substituted + assert "price = 99.99" in substituted + + def test_bytes_param_not_substituted(self): + """Test that bytes parameters are not substituted (handled separately).""" + sql_query = SQLQuery( + "SELECT * FROM idx WHERE embedding = :vec", + params={"vec": b"\x00\x01\x02\x03"}, + ) + + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + + # Bytes should remain as placeholder + assert ":vec" in substituted + + def test_special_characters_in_value(self): + """Test special characters that might interfere with regex.""" + special_values = [ + "hello@world.com", + "path/to/file", + "price: $100", + "regex.*pattern", + "back\\slash", + ] + + for value in special_values: + sql_query = SQLQuery( + "SELECT * FROM idx WHERE field = :field", + params={"field": value}, + ) + substituted = sql_query._substitute_params(sql_query.sql, sql_query.params) + # Should contain the value wrapped in quotes (with any necessary escaping) + assert ":field" not in substituted, f"Failed to substitute for value: {value}" + diff --git a/uv.lock b/uv.lock index ad61bd62..44e652aa 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.9.2, <3.14" resolution-markers = [ "python_full_version >= '3.13'", @@ -4255,7 +4255,7 @@ wheels = [ [[package]] name = "redisvl" -version = "0.13.2" +version = "0.14.0" source = { editable = "." } dependencies = [ { name = "jsonpath-ng" }, @@ -4299,6 +4299,9 @@ pillow = [ sentence-transformers = [ { name = "sentence-transformers" }, ] +sql-redis = [ + { name = "sql-redis" }, +] vertexai = [ { name = "google-cloud-aiplatform" }, { name = "protobuf" }, @@ -4355,11 +4358,12 @@ requires-dist = [ { name = "pyyaml", specifier = ">=5.4,<7.0" }, { name = "redis", specifier = ">=5.0,<7.2" }, { name = "sentence-transformers", marker = "extra == 'sentence-transformers'", specifier = ">=3.4.0,<4" }, + { name = "sql-redis", marker = "extra == 'sql-redis'", specifier = ">=0.1.1" }, { name = "tenacity", specifier = ">=8.2.2" }, { name = "urllib3", marker = "extra == 'bedrock'", specifier = "<2.2.0" }, { name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.2.2" }, ] -provides-extras = ["mistralai", "openai", "nltk", "cohere", "voyageai", "sentence-transformers", "langcache", "vertexai", "bedrock", "pillow"] +provides-extras = ["mistralai", "openai", "nltk", "cohere", "voyageai", "sentence-transformers", "langcache", "vertexai", "bedrock", "pillow", "sql-redis"] [package.metadata.requires-dev] dev = [ @@ -5263,6 +5267,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" }, ] +[[package]] +name = "sql-redis" +version = "0.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "redis", version = "7.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "redis", version = "7.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sqlglot" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/ef/9ef69125be3b8a9906010f4bfd84d3b12fce86d9ecc9ed18443ff5fa9af6/sql_redis-0.1.1.tar.gz", hash = "sha256:1b763bd33e8963811a8c3d191506d5572f6584bfa5bbfa9c8af09a51f07baf02", size = 103713, upload-time = "2026-02-03T19:29:47.878Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/cf/c9e13d253acb3c08dc9113dc3e75962ebb69584d6286b931f364dfb9225d/sql_redis-0.1.1-py3-none-any.whl", hash = "sha256:8369e8c61990b0f9aa5ad1a9d4b03060f770af5a7b856b84e88e819efcacb1ed", size = 18716, upload-time = "2026-02-03T19:29:46.899Z" }, +] + [[package]] name = "sqlalchemy" version = "2.0.44" @@ -5316,6 +5334,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/5e/6a29fa884d9fb7ddadf6b69490a9d45fded3b38541713010dad16b77d015/sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05", size = 1928718, upload-time = "2025-10-10T15:29:45.32Z" }, ] +[[package]] +name = "sqlglot" +version = "28.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/46/b6/f188b9616bef49943353f3622d726af30fdb08acbd081deef28ba43ceb48/sqlglot-28.6.0.tar.gz", hash = "sha256:8c0a432a6745c6c7965bbe99a17667c5a3ca1d524a54b31997cf5422b1727f6a", size = 5676522, upload-time = "2026-01-13T17:39:24.389Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/a6/21b1e19994296ba4a34bc7abaf4fcb40d7e7787477bdfde58cd843594459/sqlglot-28.6.0-py3-none-any.whl", hash = "sha256:8af76e825dc8456a49f8ce049d69bbfcd116655dda3e53051754789e2edf8eba", size = 575186, upload-time = "2026-01-13T17:39:22.327Z" }, +] + [[package]] name = "stack-data" version = "0.6.3"