From d931c2eeb10d3f8f61e26adc1f06772f50467669 Mon Sep 17 00:00:00 2001 From: shchepinova Date: Fri, 27 Feb 2026 20:29:28 +0100 Subject: [PATCH 1/2] fix(490): add question mark to escaped chars in TokenEscaper --- redisvl/utils/token_escaper.py | 6 +++--- tests/unit/test_token_escaper.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/redisvl/utils/token_escaper.py b/redisvl/utils/token_escaper.py index 04e04cd2..2d50ca00 100644 --- a/redisvl/utils/token_escaper.py +++ b/redisvl/utils/token_escaper.py @@ -9,11 +9,11 @@ class TokenEscaper: """ # Characters that RediSearch requires us to escape during queries. - # Source: https://redis.io/docs/stack/search/reference/escaping/#the-rules-of-text-field-tokenization - DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]" + # Source: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/escaping/#tokenization-rules-for-text-fields + DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ \?]" # Same as above but excludes * to allow wildcard patterns - ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ ]" + ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ \?]" def __init__(self, escape_chars_re: Optional[Pattern] = None): if escape_chars_re: diff --git a/tests/unit/test_token_escaper.py b/tests/unit/test_token_escaper.py index 0adb2d11..c1d6fd89 100644 --- a/tests/unit/test_token_escaper.py +++ b/tests/unit/test_token_escaper.py @@ -19,8 +19,8 @@ def escaper(): ), ( r"& symbols, like * and ?", - r"\&\ symbols\,\ like\ \*\ and\ ?", - ), # TODO: question marks are not caught? + r"\&\ symbols\,\ like\ \*\ and\ \?", + ), # underscores are ignored (r"-dashes_and_underscores-", r"\-dashes_and_underscores\-"), ], @@ -57,7 +57,7 @@ def test_escape_text_chars(escaper, test_input, expected): ("(parentheses)", r"\(parentheses\)"), ("[brackets]", r"\[brackets\]"), ("{braces}", r"\{braces\}"), - # ("question?mark", r"question\?mark"), #TODO - question marks are not caught? + ("question?mark", r"question\?mark"), # Unicode characters in tags ("你好", r"你好"), # Assuming non-Latin characters don't need escaping ("emoji:😊", r"emoji\:😊"), @@ -81,6 +81,7 @@ def test_escape_text_chars(escaper, test_input, expected): "parentheses", "brackets", "braces", + "question", "non-latin", "emoji", ], From fa0316ff74bfc91146b4531a8d3a1684a6a88e2f Mon Sep 17 00:00:00 2001 From: shchepinova Date: Tue, 3 Mar 2026 20:31:38 +0100 Subject: [PATCH 2/2] fix(490): remove question mark from ESCAPED_CHARS_NO_WILDCARD --- redisvl/utils/token_escaper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/redisvl/utils/token_escaper.py b/redisvl/utils/token_escaper.py index 2d50ca00..effa1c96 100644 --- a/redisvl/utils/token_escaper.py +++ b/redisvl/utils/token_escaper.py @@ -12,8 +12,8 @@ class TokenEscaper: # Source: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/escaping/#tokenization-rules-for-text-fields DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ \?]" - # Same as above but excludes * to allow wildcard patterns - ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ \?]" + # Same as above but excludes * and ? to allow wildcard patterns + ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ ]" def __init__(self, escape_chars_re: Optional[Pattern] = None): if escape_chars_re: