From 74f6702d1ecd4bb172402e49329b29dc99cb21f4 Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 11 Feb 2026 15:21:26 +0300 Subject: [PATCH 1/4] don't raise exception if comment goes after the semicolon exception message for multiple queries --- mindsdb_sql_parser/__init__.py | 9 +++++++++ tests/test_base_sql/test_base_sql.py | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/mindsdb_sql_parser/__init__.py b/mindsdb_sql_parser/__init__.py index 03ecb86..cc763a5 100644 --- a/mindsdb_sql_parser/__init__.py +++ b/mindsdb_sql_parser/__init__.py @@ -25,6 +25,11 @@ def process(self) -> str: # show error location msgs = self.error_location() + if self.bad_token is not None and self.bad_token.value == ';': + # unexpected semicolon in the middle of the query, it might be delimiter of statements + msgs.append('Only a single sql statement is expected. Got multiple instead') + return '\n'.join(msgs) + # suggestion suggestions = self.make_suggestion() @@ -171,6 +176,10 @@ def parse_sql(sql, dialect=None): from mindsdb_sql_parser.parser import MindsDBParser lexer, parser = MindsDBLexer(), MindsDBParser() + # remove comments + sql = re.sub(r'--.*?$', '', sql, flags=re.MULTILINE) + sql = re.sub(r'/\*.*?\*/', '', sql, flags=re.DOTALL) + # remove ending semicolon and spaces sql = re.sub(r'[\s;]+$', '', sql) diff --git a/tests/test_base_sql/test_base_sql.py b/tests/test_base_sql/test_base_sql.py index 7a3816c..d124ce1 100644 --- a/tests/test_base_sql/test_base_sql.py +++ b/tests/test_base_sql/test_base_sql.py @@ -1,5 +1,9 @@ from textwrap import dedent + +import pytest + from mindsdb_sql_parser import parse_sql +from mindsdb_sql_parser.exceptions import ParsingException from mindsdb_sql_parser.ast import * @@ -86,3 +90,24 @@ def test_quotes_identifier(self): assert str(ast).lower() == str(expected_ast).lower() assert ast.to_tree() == expected_ast.to_tree() + + def test_multy_statement(self): + sql = """ + select 1; + select 2 + """ + + with pytest.raises(ParsingException) as excinfo: + parse_sql(sql) + + assert "Only a single sql statement is expected" in str(excinfo.value) + + def test_comment_after_semicolon(self): + sql = """ + select 1; -- my query + """ + + query = parse_sql(sql) + assert query == Select(targets=[Constant(1)]) + + From 2fe9f7434cf931e0008b0a341456707cc6eb3ead Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 11 Feb 2026 15:21:45 +0300 Subject: [PATCH 2/4] bump version --- mindsdb_sql_parser/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb_sql_parser/__about__.py b/mindsdb_sql_parser/__about__.py index 58de06c..dd10ca9 100644 --- a/mindsdb_sql_parser/__about__.py +++ b/mindsdb_sql_parser/__about__.py @@ -1,6 +1,6 @@ __title__ = 'mindsdb_sql_parser' __package_name__ = 'mindsdb_sql_parser' -__version__ = '0.13.7' +__version__ = '0.13.8' __description__ = "Mindsdb SQL parser" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' From 7f677b99ff447a45cc1ef68f3c5aa8aa548fbb2c Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 11 Feb 2026 18:36:57 +0300 Subject: [PATCH 3/4] test_comment_symbols_in_string --- tests/test_base_sql/test_base_sql.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_base_sql/test_base_sql.py b/tests/test_base_sql/test_base_sql.py index d124ce1..13708d3 100644 --- a/tests/test_base_sql/test_base_sql.py +++ b/tests/test_base_sql/test_base_sql.py @@ -110,4 +110,22 @@ def test_comment_after_semicolon(self): query = parse_sql(sql) assert query == Select(targets=[Constant(1)]) + def test_comment_symbols_in_string(self): + expected_query = Select(targets=[Constant('--x')]) + + query = parse_sql("select '--x'") + assert query == expected_query + + query = parse_sql('select "--x"') + assert query == expected_query + + # multiline + expected_query = Select(targets=[Constant('/* x */')]) + + query = parse_sql("select '/* x */'") + assert query == expected_query + + query = parse_sql('select "/* x */"') + assert query == expected_query + From 1bc1b70519c6f5bcec6b74df84ab04fe6a23820e Mon Sep 17 00:00:00 2001 From: andrew Date: Thu, 12 Feb 2026 12:09:03 +0300 Subject: [PATCH 4/4] omit ; in a different way --- mindsdb_sql_parser/__init__.py | 24 +++++++++++++++++------- mindsdb_sql_parser/parser.py | 1 + tests/test_base_sql/test_base_sql.py | 5 ++++- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/mindsdb_sql_parser/__init__.py b/mindsdb_sql_parser/__init__.py index cc763a5..5d60f92 100644 --- a/mindsdb_sql_parser/__init__.py +++ b/mindsdb_sql_parser/__init__.py @@ -176,15 +176,25 @@ def parse_sql(sql, dialect=None): from mindsdb_sql_parser.parser import MindsDBParser lexer, parser = MindsDBLexer(), MindsDBParser() - # remove comments - sql = re.sub(r'--.*?$', '', sql, flags=re.MULTILINE) - sql = re.sub(r'/\*.*?\*/', '', sql, flags=re.DOTALL) - - # remove ending semicolon and spaces - sql = re.sub(r'[\s;]+$', '', sql) + def semicolon_checker(generator): + """ + Repeat the same elements from generator except trailing SEMICOLON tokens. + They are kept in buffer till any other token appear + """ + + buffer = [] + for token in generator: + if token.type == 'SEMICOLON': + buffer.append(token) + continue + elif len(buffer) > 0: + for buf_token in buffer: + yield buf_token + buffer = [] + yield token tokens = lexer.tokenize(sql) - ast = parser.parse(tokens) + ast = parser.parse(semicolon_checker(tokens)) if ast is None: diff --git a/mindsdb_sql_parser/parser.py b/mindsdb_sql_parser/parser.py index f503f42..f8414ab 100644 --- a/mindsdb_sql_parser/parser.py +++ b/mindsdb_sql_parser/parser.py @@ -43,6 +43,7 @@ class MindsDBParser(Parser): log = ParserLogger() tokens = MindsDBLexer.tokens + start = "query" precedence = ( ('left', OR), diff --git a/tests/test_base_sql/test_base_sql.py b/tests/test_base_sql/test_base_sql.py index 13708d3..3241a6a 100644 --- a/tests/test_base_sql/test_base_sql.py +++ b/tests/test_base_sql/test_base_sql.py @@ -102,6 +102,10 @@ def test_multy_statement(self): assert "Only a single sql statement is expected" in str(excinfo.value) + def test_trailing_semicolon(self): + query = parse_sql("select 1;") + assert query == Select(targets=[Constant(1)]) + def test_comment_after_semicolon(self): sql = """ select 1; -- my query @@ -128,4 +132,3 @@ def test_comment_symbols_in_string(self): query = parse_sql('select "/* x */"') assert query == expected_query -