From 7110f63a7c1a05ec8b6a6d06eba8194816a6f78b Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Thu, 26 Mar 2026 11:47:11 -0700 Subject: [PATCH] Add UnifiedQueryParser with language-specific implementations Extract parsing logic from UnifiedQueryPlanner into a UnifiedQueryParser interface with language-specific implementations: PPLQueryParser (returns UnresolvedPlan) and CalciteSqlQueryParser (returns SqlNode). UnifiedQueryContext owns the parser instance, created eagerly by the builder which has direct access to query type and future SQL config. Each implementation receives only its required dependencies: PPLQueryParser takes Settings, CalciteSqlQueryParser takes CalcitePlanContext. UnifiedQueryPlanner.CustomVisitorStrategy now obtains the parser from the context via the interface type. Signed-off-by: Chen Dai --- api/README.md | 17 ++- .../sql/api/UnifiedQueryContext.java | 25 +++- .../sql/api/UnifiedQueryPlanner.java | 35 ++---- .../sql/api/parser/CalciteSqlQueryParser.java | 31 +++++ .../sql/api/parser/PPLQueryParser.java | 43 +++++++ .../sql/api/parser/UnifiedQueryParser.java | 23 ++++ .../api/parser/UnifiedQueryParserSqlTest.java | 115 ++++++++++++++++++ .../api/parser/UnifiedQueryParserTest.java | 87 +++++++++++++ 8 files changed, 345 insertions(+), 31 deletions(-) create mode 100644 api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java create mode 100644 api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java create mode 100644 api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java create mode 100644 api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java create mode 100644 api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java diff --git a/api/README.md b/api/README.md index eec4868432..ee45a8c2dc 100644 --- a/api/README.md +++ b/api/README.md @@ -8,7 +8,8 @@ This module provides components organized into two main areas aligned with the [ ### Unified Language Specification -- **`UnifiedQueryPlanner`**: Accepts PPL (Piped Processing Language) or SQL queries and returns Calcite `RelNode` logical plans as intermediate representation. +- **`UnifiedQueryParser`**: Parses PPL (Piped Processing Language) or SQL queries and returns the native parse result (`UnresolvedPlan` for PPL, `SqlNode` for Calcite SQL). +- **`UnifiedQueryPlanner`**: Accepts PPL or SQL queries and returns Calcite `RelNode` logical plans as intermediate representation. - **`UnifiedQueryTranspiler`**: Converts Calcite logical plans (`RelNode`) into SQL strings for various target databases using different SQL dialects. ### Unified Execution Runtime @@ -42,6 +43,20 @@ UnifiedQueryContext context = UnifiedQueryContext.builder() .build(); ``` +### UnifiedQueryParser + +Use `UnifiedQueryParser` to parse queries into their native parse tree. The parser is owned by `UnifiedQueryContext` and returns the native parse result for each language. + +```java +// PPL parsing +UnresolvedPlan ast = (UnresolvedPlan) context.getParser().parse("source = logs | where status = 200"); + +// SQL parsing (with QueryType.SQL context) +SqlNode sqlNode = (SqlNode) sqlContext.getParser().parse("SELECT * FROM logs WHERE status = 200"); +``` + +Callers can then use each language's native visitor infrastructure (`AbstractNodeVisitor` for PPL, `SqlBasicVisitor` for Calcite SQL) on the typed result for further analysis. + ### UnifiedQueryPlanner Use `UnifiedQueryPlanner` to parse and analyze PPL or SQL queries into Calcite logical plans. The planner accepts a `UnifiedQueryContext` and can be reused for multiple queries. diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 0b4218a549..4332ff1766 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -15,7 +15,8 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.Callable; -import lombok.Value; +import lombok.AllArgsConstructor; +import lombok.Getter; import org.apache.calcite.avatica.util.Casing; import org.apache.calcite.jdbc.CalciteSchema; import org.apache.calcite.plan.RelTraitDef; @@ -26,6 +27,9 @@ import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Programs; +import org.opensearch.sql.api.parser.CalciteSqlQueryParser; +import org.opensearch.sql.api.parser.PPLQueryParser; +import org.opensearch.sql.api.parser.UnifiedQueryParser; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.SysLimit; import org.opensearch.sql.common.setting.Settings; @@ -40,14 +44,18 @@ * centralizes configuration for catalog schemas, query type, execution limits, and other settings, * enabling consistent behavior across all unified query operations. */ -@Value +@AllArgsConstructor +@Getter public class UnifiedQueryContext implements AutoCloseable { /** CalcitePlanContext containing Calcite framework configuration and query type. */ - CalcitePlanContext planContext; + private final CalcitePlanContext planContext; /** Settings containing execution limits and feature flags used by parsers and planners. */ - Settings settings; + private final Settings settings; + + /** Query parser created eagerly from this context's configuration. */ + private final UnifiedQueryParser parser; /** * Returns the profiling result. Call after query execution to retrieve collected metrics. Returns @@ -202,7 +210,14 @@ public UnifiedQueryContext build() { CalcitePlanContext.create( buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); QueryProfiling.activate(profiling); - return new UnifiedQueryContext(planContext, settings); + return new UnifiedQueryContext(planContext, settings, createParser(planContext, settings)); + } + + private UnifiedQueryParser createParser(CalcitePlanContext planContext, Settings settings) { + return switch (queryType) { + case PPL -> new PPLQueryParser(settings); + case SQL -> new CalciteSqlQueryParser(planContext); + }; } private Settings buildSettings() { diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java index 2d1c04a243..af4d9f518a 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java @@ -8,7 +8,6 @@ import static org.opensearch.sql.monitor.profile.MetricName.ANALYZE; import lombok.RequiredArgsConstructor; -import org.antlr.v4.runtime.tree.ParseTree; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; @@ -18,15 +17,11 @@ import org.apache.calcite.sql.SqlNode; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Planner; -import org.opensearch.sql.ast.statement.Query; -import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.api.parser.UnifiedQueryParser; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.calcite.CalciteRelNodeVisitor; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.executor.QueryType; -import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; -import org.opensearch.sql.ppl.parser.AstBuilder; -import org.opensearch.sql.ppl.parser.AstStatementBuilder; /** * {@code UnifiedQueryPlanner} provides a high-level API for parsing and analyzing queries using the @@ -93,36 +88,26 @@ public RelNode plan(String query) throws Exception { } } - /** AST-based planning via ANTLR parser → UnresolvedPlan → CalciteRelNodeVisitor. */ - @RequiredArgsConstructor + /** AST-based planning via context-owned parser → UnresolvedPlan → CalciteRelNodeVisitor. */ private static class CustomVisitorStrategy implements PlanningStrategy { private final UnifiedQueryContext context; - private final PPLSyntaxParser parser = new PPLSyntaxParser(); + private final UnifiedQueryParser parser; private final CalciteRelNodeVisitor relNodeVisitor = new CalciteRelNodeVisitor(new EmptyDataSourceService()); + @SuppressWarnings("unchecked") + CustomVisitorStrategy(UnifiedQueryContext context) { + this.context = context; + this.parser = (UnifiedQueryParser) context.getParser(); + } + @Override public RelNode plan(String query) { - UnresolvedPlan ast = parse(query); + UnresolvedPlan ast = parser.parse(query); RelNode logical = relNodeVisitor.analyze(ast, context.getPlanContext()); return preserveCollation(logical); } - private UnresolvedPlan parse(String query) { - ParseTree cst = parser.parse(query); - AstStatementBuilder astStmtBuilder = - new AstStatementBuilder( - new AstBuilder(query, context.getSettings()), - AstStatementBuilder.StatementBuilderContext.builder().build()); - Statement statement = cst.accept(astStmtBuilder); - - if (statement instanceof Query) { - return ((Query) statement).getPlan(); - } - throw new UnsupportedOperationException( - "Only query statements are supported but got " + statement.getClass().getSimpleName()); - } - private RelNode preserveCollation(RelNode logical) { RelCollation collation = logical.getTraitSet().getCollation(); if (!(logical instanceof Sort) && collation != RelCollations.EMPTY) { diff --git a/api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java new file mode 100644 index 0000000000..b92e75bf34 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import lombok.RequiredArgsConstructor; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlParser; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.common.antlr.SyntaxCheckException; + +/** Calcite SQL query parser that produces {@link SqlNode} as the native parse result. */ +@RequiredArgsConstructor +public class CalciteSqlQueryParser implements UnifiedQueryParser { + + /** Calcite plan context providing parser configuration (e.g., case sensitivity, conformance). */ + private final CalcitePlanContext planContext; + + @Override + public SqlNode parse(String query) { + try { + SqlParser parser = SqlParser.create(query, planContext.config.getParserConfig()); + return parser.parseQuery(); + } catch (SqlParseException e) { + throw new SyntaxCheckException("Failed to parse SQL query: " + e.getMessage()); + } + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java new file mode 100644 index 0000000000..af094404d7 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import lombok.RequiredArgsConstructor; +import org.antlr.v4.runtime.tree.ParseTree; +import org.opensearch.sql.ast.statement.Query; +import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; +import org.opensearch.sql.ppl.parser.AstBuilder; +import org.opensearch.sql.ppl.parser.AstStatementBuilder; + +/** PPL query parser that produces {@link UnresolvedPlan} as the native parse result. */ +@RequiredArgsConstructor +public class PPLQueryParser implements UnifiedQueryParser { + + /** Settings containing execution limits and feature flags used by AST builders. */ + private final Settings settings; + + /** Reusable ANTLR-based PPL syntax parser. Stateless and thread-safe. */ + private final PPLSyntaxParser syntaxParser = new PPLSyntaxParser(); + + @Override + public UnresolvedPlan parse(String query) { + ParseTree cst = syntaxParser.parse(query); + AstStatementBuilder astStmtBuilder = + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder().build()); + Statement statement = cst.accept(astStmtBuilder); + + if (statement instanceof Query) { + return ((Query) statement).getPlan(); + } + throw new UnsupportedOperationException( + "Only query statements are supported but got " + statement.getClass().getSimpleName()); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java new file mode 100644 index 0000000000..76918f93ea --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java @@ -0,0 +1,23 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +/** + * Language-neutral query parser interface. Returns the native parse result for the language (e.g., + * {@code UnresolvedPlan} for PPL, {@code SqlNode} for Calcite SQL). + * + * @param the native parse result type for this language + */ +public interface UnifiedQueryParser { + + /** + * Parses the query and returns the native parse result. + * + * @param query the raw query string + * @return the native parse result + */ + T parse(String query); +} diff --git a/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java new file mode 100644 index 0000000000..0cba9f7b71 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java @@ -0,0 +1,115 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import org.apache.calcite.sql.parser.SqlParserFixture; +import org.junit.Test; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.executor.QueryType; + +/** + * SQL parser tests using Calcite's {@link SqlParserFixture} for idiomatic parse-unparse assertions. + * Parser config is read from {@link org.opensearch.sql.api.UnifiedQueryContext} to stay in sync + * with production. + */ +public class UnifiedQueryParserSqlTest extends UnifiedQueryTestBase { + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Test + public void testParseSelectStar() { + sql("SELECT * FROM catalog.employees") + .ok( + """ + SELECT * + FROM `catalog`.`employees`\ + """); + } + + @Test + public void testParseSelectColumns() { + sql("SELECT id, name FROM catalog.employees") + .ok( + """ + SELECT `id`, `name` + FROM `catalog`.`employees`\ + """); + } + + @Test + public void testParseFilter() { + sql(""" + SELECT name + FROM catalog.employees + WHERE age > 30\ + """) + .ok( + """ + SELECT `name` + FROM `catalog`.`employees` + WHERE (`age` > 30)\ + """); + } + + @Test + public void testParseAggregate() { + sql(""" + SELECT department, count(*) AS cnt + FROM catalog.employees + GROUP BY department\ + """) + .ok( + """ + SELECT `department`, COUNT(*) AS `cnt` + FROM `catalog`.`employees` + GROUP BY `department`\ + """); + } + + @Test + public void testParseOrderBy() { + sql(""" + SELECT name + FROM catalog.employees + ORDER BY age DESC\ + """) + .ok( + """ + SELECT `name` + FROM `catalog`.`employees` + ORDER BY `age` DESC\ + """); + } + + @Test + public void testParseJoin() { + sql(""" + SELECT a.id, b.name + FROM catalog.employees a + JOIN catalog.employees b ON a.id = b.age\ + """) + .ok( + """ + SELECT `a`.`id`, `b`.`name` + FROM `catalog`.`employees` AS `a` + INNER JOIN `catalog`.`employees` AS `b` ON (`a`.`id` = `b`.`age`)\ + """); + } + + @Test + public void testSyntaxErrorFails() { + sql("SELECT ^FROM^").fails("(?s).*Incorrect syntax near the keyword 'FROM'.*"); + } + + private SqlParserFixture sql(String sql) { + return SqlParserFixture.DEFAULT + .withConfig(c -> context.getPlanContext().config.getParserConfig()) + .sql(sql); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java new file mode 100644 index 0000000000..1b6b5181ae --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java @@ -0,0 +1,87 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import static java.util.Collections.emptyList; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.opensearch.sql.ast.dsl.AstDSL.agg; +import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; +import static org.opensearch.sql.ast.dsl.AstDSL.alias; +import static org.opensearch.sql.ast.dsl.AstDSL.allFields; +import static org.opensearch.sql.ast.dsl.AstDSL.compare; +import static org.opensearch.sql.ast.dsl.AstDSL.defaultStatsArgs; +import static org.opensearch.sql.ast.dsl.AstDSL.eval; +import static org.opensearch.sql.ast.dsl.AstDSL.exprList; +import static org.opensearch.sql.ast.dsl.AstDSL.field; +import static org.opensearch.sql.ast.dsl.AstDSL.filter; +import static org.opensearch.sql.ast.dsl.AstDSL.function; +import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.let; +import static org.opensearch.sql.ast.dsl.AstDSL.project; +import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; +import static org.opensearch.sql.ast.dsl.AstDSL.relation; + +import org.junit.Test; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.common.antlr.SyntaxCheckException; + +public class UnifiedQueryParserTest extends UnifiedQueryTestBase { + + @Test + public void testParseSource() { + assertEqual( + "source = catalog.employees", + project(relation(qualifiedName("catalog", "employees")), allFields())); + } + + @Test + public void testParseFilter() { + assertEqual( + "source = catalog.employees | where age > 30", + project( + filter( + relation(qualifiedName("catalog", "employees")), + compare(">", field("age"), intLiteral(30))), + allFields())); + } + + @Test + public void testParseEval() { + assertEqual( + "source = catalog.employees | eval f = abs(id)", + project( + eval( + relation(qualifiedName("catalog", "employees")), + let(field("f"), function("abs", field("id")))), + allFields())); + } + + @Test + public void testParseStats() { + assertEqual( + "source = catalog.employees | stats count(age) by department", + project( + agg( + relation(qualifiedName("catalog", "employees")), + exprList(alias("count(age)", aggregate("count", field("age")))), + emptyList(), + exprList(alias("department", field("department"))), + defaultStatsArgs()), + allFields())); + } + + @Test + public void testSyntaxErrorThrows() { + assertThrows(SyntaxCheckException.class, () -> context.getParser().parse("not a valid query")); + } + + private void assertEqual(String query, UnresolvedPlan expected) { + UnresolvedPlan actual = (UnresolvedPlan) context.getParser().parse(query); + assertEquals(expected, actual); + } +}