From dd347ac269722f97975d0d557d7f4abbafca2fec Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Tue, 31 Mar 2026 10:19:39 +0800 Subject: [PATCH 01/10] Fix deterministic memory leak and dangling pointer in SQLParser::tokenize --- src/SQLParser.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/SQLParser.cpp b/src/SQLParser.cpp index b3bf0dfe..5334b9fe 100644 --- a/src/SQLParser.cpp +++ b/src/SQLParser.cpp @@ -59,11 +59,16 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { int16_t token = hsql_lex(&yylval, &yylloc, scanner); while (token != 0) { tokens->push_back(token); - token = hsql_lex(&yylval, &yylloc, scanner); - + if (token == SQL_IDENTIFIER || token == SQL_STRING) { free(yylval.sval); + yylval.sval = nullptr; + } + + token = hsql_lex(&yylval, &yylloc, scanner); + + } hsql__delete_buffer(state, scanner); From 2b254f355652de187c312f984c4964eb05f53855 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Tue, 31 Mar 2026 16:20:19 +0800 Subject: [PATCH 02/10] Add regression test for tokenize memory leak --- test/sql_parser.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/sql_parser.cpp b/test/sql_parser.cpp index 31b9be1f..c94f74c8 100644 --- a/test/sql_parser.cpp +++ b/test/sql_parser.cpp @@ -42,3 +42,17 @@ TEST(SQLParserTokenizeStringifyTest) { ASSERT(query == cache[token_string]); ASSERT(&query != &cache[token_string]); } + +TEST(SQLParserTokenizeLeakRegressionTest) { + + const std::string query = "'string_1' 'string_2' 'string_3';"; + std::vector tokens; + + ASSERT(SQLParser::tokenize(query, &tokens)); + + ASSERT_EQ(tokens.size(), 4); + ASSERT_EQ(tokens[0], SQL_STRING); + ASSERT_EQ(tokens[1], SQL_STRING); + ASSERT_EQ(tokens[2], SQL_STRING); + ASSERT_EQ(tokens[3], ';'); +} \ No newline at end of file From 571dc227c7ee23fd13b13d6a184b302706dda40c Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Tue, 31 Mar 2026 20:21:53 +0800 Subject: [PATCH 03/10] update CI with Clang sanitizer builds --- .github/workflows/ci.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d9122e3b..c505d6d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,19 @@ jobs: cxx: clang++ os: macos-latest + - name: clang-sanitizer-ubuntu + cc: clang-19 + cxx: clang++-19 + os: ubuntu-latest + container: ubuntu:24.04 + build_options: "CXXFLAGS='-fsanitize=address,undefined -g' LDFLAGS='-fsanitize=address,undefined'" + + - name: clang-sanitizer-macOS + cc: clang + cxx: clang++ + os: macos-latest + build_options: "CXXFLAGS='-fsanitize=address,undefined -g' LDFLAGS='-fsanitize=address,undefined'" + steps: - name: Checkout uses: actions/checkout@v4 From 14c56213ecd5eef2efde6e5f6ca6e137882efe81 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Tue, 31 Mar 2026 21:51:19 +0800 Subject: [PATCH 04/10] Fix CI OS detection --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c505d6d1..c52e9ba5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -80,14 +80,14 @@ jobs: git checkout $GITHUB_HEAD_REF - name: Setup (macOS) - if: matrix.name == 'clang-macOS' + if: matrix.os == 'macos-latest' run: | brew install bison flex echo "BISON=$(brew --prefix bison)/bin/bison" >> $GITHUB_ENV echo "FLEX=$(brew --prefix flex)/bin/flex" >> $GITHUB_ENV - name: Setup (Ubuntu) - if: matrix.name != 'clang-macOS' + if: matrix.os == 'ubuntu-latest' run: | apt-get update apt-get install --no-install-recommends -y bison flex ${CC} ${CXX} make valgrind From 23e894df727ab4d37a71a55814bf14748672c357 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Wed, 1 Apr 2026 09:22:14 +0800 Subject: [PATCH 05/10] Temporarily remove fix to verify sanitizer failure --- .github/workflows/ci.yml | 2 +- src/SQLParser.cpp | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c52e9ba5..37e52eee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,7 +61,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 if: matrix.name != 'gcc-6' - name: Checkout (Ubuntu 18.04) diff --git a/src/SQLParser.cpp b/src/SQLParser.cpp index 5334b9fe..22459e85 100644 --- a/src/SQLParser.cpp +++ b/src/SQLParser.cpp @@ -59,16 +59,11 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { int16_t token = hsql_lex(&yylval, &yylloc, scanner); while (token != 0) { tokens->push_back(token); - + token = hsql_lex(&yylval, &yylloc, scanner); + if (token == SQL_IDENTIFIER || token == SQL_STRING) { free(yylval.sval); - yylval.sval = nullptr; - } - - token = hsql_lex(&yylval, &yylloc, scanner); - - } hsql__delete_buffer(state, scanner); @@ -76,4 +71,4 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { return true; } -} // namespace hsql +} // namespace hsql \ No newline at end of file From 20603d2ce09176004a1f40ebc3497cedb10f2d91 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Wed, 1 Apr 2026 14:21:32 +0800 Subject: [PATCH 06/10] Fix newline EOF and keep fix removed for verification --- src/SQLParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SQLParser.cpp b/src/SQLParser.cpp index 22459e85..b3bf0dfe 100644 --- a/src/SQLParser.cpp +++ b/src/SQLParser.cpp @@ -71,4 +71,4 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { return true; } -} // namespace hsql \ No newline at end of file +} // namespace hsql From 79f3c33ff55bd82473aecda058e11b4ab6704f67 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Wed, 1 Apr 2026 15:34:23 +0800 Subject: [PATCH 07/10] Re-apply the fix: Tests now pass and memory leaks are resolved --- src/SQLParser.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/SQLParser.cpp b/src/SQLParser.cpp index b3bf0dfe..c4806b69 100644 --- a/src/SQLParser.cpp +++ b/src/SQLParser.cpp @@ -59,11 +59,13 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { int16_t token = hsql_lex(&yylval, &yylloc, scanner); while (token != 0) { tokens->push_back(token); - token = hsql_lex(&yylval, &yylloc, scanner); if (token == SQL_IDENTIFIER || token == SQL_STRING) { free(yylval.sval); + yylval.sval = nullptr; } + token = hsql_lex(&yylval, &yylloc, scanner); + } hsql__delete_buffer(state, scanner); From 644e35a32df0f0d2ef313888a77b24848198ad28 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Fri, 10 Apr 2026 16:36:57 +0800 Subject: [PATCH 08/10] Implement sanitizer builds, update Makefile patch, and add issue #261 reference --- .github/workflows/ci.yml | 8 ++++++-- Makefile | 8 +++++--- test/sql_parser.cpp | 3 ++- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 37e52eee..5fd2d781 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,6 +14,8 @@ jobs: env: CC: ${{matrix.cc}} CXX: ${{matrix.cxx}} + CXXFLAGS: ${{matrix.env_cxxflags}} + LDFLAGS: ${{matrix.env_ldflags}} defaults: run: shell: bash @@ -51,13 +53,15 @@ jobs: cxx: clang++-19 os: ubuntu-latest container: ubuntu:24.04 - build_options: "CXXFLAGS='-fsanitize=address,undefined -g' LDFLAGS='-fsanitize=address,undefined'" + env_cxxflags: "-fsanitize=address,undefined" + env_ldflags: "-fsanitize=address,undefined" - name: clang-sanitizer-macOS cc: clang cxx: clang++ os: macos-latest - build_options: "CXXFLAGS='-fsanitize=address,undefined -g' LDFLAGS='-fsanitize=address,undefined'" + env_cxxflags: "-fsanitize=address,undefined" + env_ldflags: "-fsanitize=address,undefined" steps: - name: Checkout diff --git a/Makefile b/Makefile index 4b036045..cab55e1c 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,9 @@ GMAKE = make mode=$(mode) NAME := sqlparser PARSER_CPP = $(SRCPARSER)/bison_parser.cpp $(SRCPARSER)/flex_lexer.cpp PARSER_H = $(SRCPARSER)/bison_parser.h $(SRCPARSER)/flex_lexer.h -LIB_CFLAGS = -std=c++17 $(OPT_FLAG) +LIB_CFLAGS = -std=c++17 $(OPT_FLAG) $(CXXFLAGS) +LIB_LFLAGS = $(LDFLAGS) + relaxed_build ?= "off" ifeq ($(relaxed_build), on) @@ -54,12 +56,12 @@ static ?= no ifeq ($(static), yes) LIB_BUILD = lib$(NAME).a LIBLINKER = $(AR) - LIB_LFLAGS = rs + LIB_LFLAGS += rs else LIB_BUILD = lib$(NAME).so LIBLINKER = $(CXX) LIB_CFLAGS += -fPIC - LIB_LFLAGS = -shared -o + LIB_LFLAGS += -shared -o endif LIB_CPP = $(sort $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(PARSER_CPP)) LIB_H = $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*") $(PARSER_H) diff --git a/test/sql_parser.cpp b/test/sql_parser.cpp index c94f74c8..92a439e1 100644 --- a/test/sql_parser.cpp +++ b/test/sql_parser.cpp @@ -43,6 +43,7 @@ TEST(SQLParserTokenizeStringifyTest) { ASSERT(&query != &cache[token_string]); } +// Regression test for the memory leak reported in issue #261. TEST(SQLParserTokenizeLeakRegressionTest) { const std::string query = "'string_1' 'string_2' 'string_3';"; @@ -55,4 +56,4 @@ TEST(SQLParserTokenizeLeakRegressionTest) { ASSERT_EQ(tokens[1], SQL_STRING); ASSERT_EQ(tokens[2], SQL_STRING); ASSERT_EQ(tokens[3], ';'); -} \ No newline at end of file +} From 745c9300ea5c42778ff3f8600522bae1d4178a66 Mon Sep 17 00:00:00 2001 From: Daniel Lindner <27929897+dey4ss@users.noreply.github.com> Date: Fri, 10 Apr 2026 13:28:10 +0200 Subject: [PATCH 09/10] Apply suggestion from @dey4ss --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5fd2d781..b630ba4d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,7 +94,7 @@ jobs: if: matrix.os == 'ubuntu-latest' run: | apt-get update - apt-get install --no-install-recommends -y bison flex ${CC} ${CXX} make valgrind + apt-get install --no-install-recommends -y bison flex ${CC} ${CXX} make valgrind libclang-rt-dev echo "BISON=bison" >> $GITHUB_ENV echo "FLEX=flex" >> $GITHUB_ENV From c67194c7ac18076f7750bd6dd2591f6e539e9345 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Fri, 10 Apr 2026 20:05:28 +0800 Subject: [PATCH 10/10] Fix sanitizer link error and maintain gcc-6 compatibility using conditional pkg install --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5fd2d781..0cde8d31 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,7 +94,7 @@ jobs: if: matrix.os == 'ubuntu-latest' run: | apt-get update - apt-get install --no-install-recommends -y bison flex ${CC} ${CXX} make valgrind + apt-get install --no-install-recommends -y bison flex ${CC} ${CXX} make valgrind ${{ matrix.name == 'clang-sanitizer-ubuntu' && 'libclang-rt-19-dev' || '' }} echo "BISON=bison" >> $GITHUB_ENV echo "FLEX=flex" >> $GITHUB_ENV