diff --git a/api/README.md b/api/README.md index eec4868432..ee45a8c2dc 100644 --- a/api/README.md +++ b/api/README.md @@ -8,7 +8,8 @@ This module provides components organized into two main areas aligned with the [ ### Unified Language Specification -- **`UnifiedQueryPlanner`**: Accepts PPL (Piped Processing Language) or SQL queries and returns Calcite `RelNode` logical plans as intermediate representation. +- **`UnifiedQueryParser`**: Parses PPL (Piped Processing Language) or SQL queries and returns the native parse result (`UnresolvedPlan` for PPL, `SqlNode` for Calcite SQL). +- **`UnifiedQueryPlanner`**: Accepts PPL or SQL queries and returns Calcite `RelNode` logical plans as intermediate representation. - **`UnifiedQueryTranspiler`**: Converts Calcite logical plans (`RelNode`) into SQL strings for various target databases using different SQL dialects. ### Unified Execution Runtime @@ -42,6 +43,20 @@ UnifiedQueryContext context = UnifiedQueryContext.builder() .build(); ``` +### UnifiedQueryParser + +Use `UnifiedQueryParser` to parse queries into their native parse tree. The parser is owned by `UnifiedQueryContext` and returns the native parse result for each language. + +```java +// PPL parsing +UnresolvedPlan ast = (UnresolvedPlan) context.getParser().parse("source = logs | where status = 200"); + +// SQL parsing (with QueryType.SQL context) +SqlNode sqlNode = (SqlNode) sqlContext.getParser().parse("SELECT * FROM logs WHERE status = 200"); +``` + +Callers can then use each language's native visitor infrastructure (`AbstractNodeVisitor` for PPL, `SqlBasicVisitor` for Calcite SQL) on the typed result for further analysis. + ### UnifiedQueryPlanner Use `UnifiedQueryPlanner` to parse and analyze PPL or SQL queries into Calcite logical plans. The planner accepts a `UnifiedQueryContext` and can be reused for multiple queries. diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 0b4218a549..4332ff1766 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -15,7 +15,8 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.Callable; -import lombok.Value; +import lombok.AllArgsConstructor; +import lombok.Getter; import org.apache.calcite.avatica.util.Casing; import org.apache.calcite.jdbc.CalciteSchema; import org.apache.calcite.plan.RelTraitDef; @@ -26,6 +27,9 @@ import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Programs; +import org.opensearch.sql.api.parser.CalciteSqlQueryParser; +import org.opensearch.sql.api.parser.PPLQueryParser; +import org.opensearch.sql.api.parser.UnifiedQueryParser; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.SysLimit; import org.opensearch.sql.common.setting.Settings; @@ -40,14 +44,18 @@ * centralizes configuration for catalog schemas, query type, execution limits, and other settings, * enabling consistent behavior across all unified query operations. */ -@Value +@AllArgsConstructor +@Getter public class UnifiedQueryContext implements AutoCloseable { /** CalcitePlanContext containing Calcite framework configuration and query type. */ - CalcitePlanContext planContext; + private final CalcitePlanContext planContext; /** Settings containing execution limits and feature flags used by parsers and planners. */ - Settings settings; + private final Settings settings; + + /** Query parser created eagerly from this context's configuration. */ + private final UnifiedQueryParser parser; /** * Returns the profiling result. Call after query execution to retrieve collected metrics. Returns @@ -202,7 +210,14 @@ public UnifiedQueryContext build() { CalcitePlanContext.create( buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); QueryProfiling.activate(profiling); - return new UnifiedQueryContext(planContext, settings); + return new UnifiedQueryContext(planContext, settings, createParser(planContext, settings)); + } + + private UnifiedQueryParser createParser(CalcitePlanContext planContext, Settings settings) { + return switch (queryType) { + case PPL -> new PPLQueryParser(settings); + case SQL -> new CalciteSqlQueryParser(planContext); + }; } private Settings buildSettings() { diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java index 2d1c04a243..af4d9f518a 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java @@ -8,7 +8,6 @@ import static org.opensearch.sql.monitor.profile.MetricName.ANALYZE; import lombok.RequiredArgsConstructor; -import org.antlr.v4.runtime.tree.ParseTree; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; @@ -18,15 +17,11 @@ import org.apache.calcite.sql.SqlNode; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Planner; -import org.opensearch.sql.ast.statement.Query; -import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.api.parser.UnifiedQueryParser; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.calcite.CalciteRelNodeVisitor; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.executor.QueryType; -import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; -import org.opensearch.sql.ppl.parser.AstBuilder; -import org.opensearch.sql.ppl.parser.AstStatementBuilder; /** * {@code UnifiedQueryPlanner} provides a high-level API for parsing and analyzing queries using the @@ -93,36 +88,26 @@ public RelNode plan(String query) throws Exception { } } - /** AST-based planning via ANTLR parser → UnresolvedPlan → CalciteRelNodeVisitor. */ - @RequiredArgsConstructor + /** AST-based planning via context-owned parser → UnresolvedPlan → CalciteRelNodeVisitor. */ private static class CustomVisitorStrategy implements PlanningStrategy { private final UnifiedQueryContext context; - private final PPLSyntaxParser parser = new PPLSyntaxParser(); + private final UnifiedQueryParser parser; private final CalciteRelNodeVisitor relNodeVisitor = new CalciteRelNodeVisitor(new EmptyDataSourceService()); + @SuppressWarnings("unchecked") + CustomVisitorStrategy(UnifiedQueryContext context) { + this.context = context; + this.parser = (UnifiedQueryParser) context.getParser(); + } + @Override public RelNode plan(String query) { - UnresolvedPlan ast = parse(query); + UnresolvedPlan ast = parser.parse(query); RelNode logical = relNodeVisitor.analyze(ast, context.getPlanContext()); return preserveCollation(logical); } - private UnresolvedPlan parse(String query) { - ParseTree cst = parser.parse(query); - AstStatementBuilder astStmtBuilder = - new AstStatementBuilder( - new AstBuilder(query, context.getSettings()), - AstStatementBuilder.StatementBuilderContext.builder().build()); - Statement statement = cst.accept(astStmtBuilder); - - if (statement instanceof Query) { - return ((Query) statement).getPlan(); - } - throw new UnsupportedOperationException( - "Only query statements are supported but got " + statement.getClass().getSimpleName()); - } - private RelNode preserveCollation(RelNode logical) { RelCollation collation = logical.getTraitSet().getCollation(); if (!(logical instanceof Sort) && collation != RelCollations.EMPTY) { diff --git a/api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java new file mode 100644 index 0000000000..b92e75bf34 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import lombok.RequiredArgsConstructor; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlParser; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.common.antlr.SyntaxCheckException; + +/** Calcite SQL query parser that produces {@link SqlNode} as the native parse result. */ +@RequiredArgsConstructor +public class CalciteSqlQueryParser implements UnifiedQueryParser { + + /** Calcite plan context providing parser configuration (e.g., case sensitivity, conformance). */ + private final CalcitePlanContext planContext; + + @Override + public SqlNode parse(String query) { + try { + SqlParser parser = SqlParser.create(query, planContext.config.getParserConfig()); + return parser.parseQuery(); + } catch (SqlParseException e) { + throw new SyntaxCheckException("Failed to parse SQL query: " + e.getMessage()); + } + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java new file mode 100644 index 0000000000..af094404d7 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import lombok.RequiredArgsConstructor; +import org.antlr.v4.runtime.tree.ParseTree; +import org.opensearch.sql.ast.statement.Query; +import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; +import org.opensearch.sql.ppl.parser.AstBuilder; +import org.opensearch.sql.ppl.parser.AstStatementBuilder; + +/** PPL query parser that produces {@link UnresolvedPlan} as the native parse result. */ +@RequiredArgsConstructor +public class PPLQueryParser implements UnifiedQueryParser { + + /** Settings containing execution limits and feature flags used by AST builders. */ + private final Settings settings; + + /** Reusable ANTLR-based PPL syntax parser. Stateless and thread-safe. */ + private final PPLSyntaxParser syntaxParser = new PPLSyntaxParser(); + + @Override + public UnresolvedPlan parse(String query) { + ParseTree cst = syntaxParser.parse(query); + AstStatementBuilder astStmtBuilder = + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder().build()); + Statement statement = cst.accept(astStmtBuilder); + + if (statement instanceof Query) { + return ((Query) statement).getPlan(); + } + throw new UnsupportedOperationException( + "Only query statements are supported but got " + statement.getClass().getSimpleName()); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java new file mode 100644 index 0000000000..76918f93ea --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java @@ -0,0 +1,23 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +/** + * Language-neutral query parser interface. Returns the native parse result for the language (e.g., + * {@code UnresolvedPlan} for PPL, {@code SqlNode} for Calcite SQL). + * + * @param the native parse result type for this language + */ +public interface UnifiedQueryParser { + + /** + * Parses the query and returns the native parse result. + * + * @param query the raw query string + * @return the native parse result + */ + T parse(String query); +} diff --git a/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java new file mode 100644 index 0000000000..0cba9f7b71 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java @@ -0,0 +1,115 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import org.apache.calcite.sql.parser.SqlParserFixture; +import org.junit.Test; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.executor.QueryType; + +/** + * SQL parser tests using Calcite's {@link SqlParserFixture} for idiomatic parse-unparse assertions. + * Parser config is read from {@link org.opensearch.sql.api.UnifiedQueryContext} to stay in sync + * with production. + */ +public class UnifiedQueryParserSqlTest extends UnifiedQueryTestBase { + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Test + public void testParseSelectStar() { + sql("SELECT * FROM catalog.employees") + .ok( + """ + SELECT * + FROM `catalog`.`employees`\ + """); + } + + @Test + public void testParseSelectColumns() { + sql("SELECT id, name FROM catalog.employees") + .ok( + """ + SELECT `id`, `name` + FROM `catalog`.`employees`\ + """); + } + + @Test + public void testParseFilter() { + sql(""" + SELECT name + FROM catalog.employees + WHERE age > 30\ + """) + .ok( + """ + SELECT `name` + FROM `catalog`.`employees` + WHERE (`age` > 30)\ + """); + } + + @Test + public void testParseAggregate() { + sql(""" + SELECT department, count(*) AS cnt + FROM catalog.employees + GROUP BY department\ + """) + .ok( + """ + SELECT `department`, COUNT(*) AS `cnt` + FROM `catalog`.`employees` + GROUP BY `department`\ + """); + } + + @Test + public void testParseOrderBy() { + sql(""" + SELECT name + FROM catalog.employees + ORDER BY age DESC\ + """) + .ok( + """ + SELECT `name` + FROM `catalog`.`employees` + ORDER BY `age` DESC\ + """); + } + + @Test + public void testParseJoin() { + sql(""" + SELECT a.id, b.name + FROM catalog.employees a + JOIN catalog.employees b ON a.id = b.age\ + """) + .ok( + """ + SELECT `a`.`id`, `b`.`name` + FROM `catalog`.`employees` AS `a` + INNER JOIN `catalog`.`employees` AS `b` ON (`a`.`id` = `b`.`age`)\ + """); + } + + @Test + public void testSyntaxErrorFails() { + sql("SELECT ^FROM^").fails("(?s).*Incorrect syntax near the keyword 'FROM'.*"); + } + + private SqlParserFixture sql(String sql) { + return SqlParserFixture.DEFAULT + .withConfig(c -> context.getPlanContext().config.getParserConfig()) + .sql(sql); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java new file mode 100644 index 0000000000..1b6b5181ae --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java @@ -0,0 +1,87 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import static java.util.Collections.emptyList; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.opensearch.sql.ast.dsl.AstDSL.agg; +import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; +import static org.opensearch.sql.ast.dsl.AstDSL.alias; +import static org.opensearch.sql.ast.dsl.AstDSL.allFields; +import static org.opensearch.sql.ast.dsl.AstDSL.compare; +import static org.opensearch.sql.ast.dsl.AstDSL.defaultStatsArgs; +import static org.opensearch.sql.ast.dsl.AstDSL.eval; +import static org.opensearch.sql.ast.dsl.AstDSL.exprList; +import static org.opensearch.sql.ast.dsl.AstDSL.field; +import static org.opensearch.sql.ast.dsl.AstDSL.filter; +import static org.opensearch.sql.ast.dsl.AstDSL.function; +import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.let; +import static org.opensearch.sql.ast.dsl.AstDSL.project; +import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; +import static org.opensearch.sql.ast.dsl.AstDSL.relation; + +import org.junit.Test; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.common.antlr.SyntaxCheckException; + +public class UnifiedQueryParserTest extends UnifiedQueryTestBase { + + @Test + public void testParseSource() { + assertEqual( + "source = catalog.employees", + project(relation(qualifiedName("catalog", "employees")), allFields())); + } + + @Test + public void testParseFilter() { + assertEqual( + "source = catalog.employees | where age > 30", + project( + filter( + relation(qualifiedName("catalog", "employees")), + compare(">", field("age"), intLiteral(30))), + allFields())); + } + + @Test + public void testParseEval() { + assertEqual( + "source = catalog.employees | eval f = abs(id)", + project( + eval( + relation(qualifiedName("catalog", "employees")), + let(field("f"), function("abs", field("id")))), + allFields())); + } + + @Test + public void testParseStats() { + assertEqual( + "source = catalog.employees | stats count(age) by department", + project( + agg( + relation(qualifiedName("catalog", "employees")), + exprList(alias("count(age)", aggregate("count", field("age")))), + emptyList(), + exprList(alias("department", field("department"))), + defaultStatsArgs()), + allFields())); + } + + @Test + public void testSyntaxErrorThrows() { + assertThrows(SyntaxCheckException.class, () -> context.getParser().parse("not a valid query")); + } + + private void assertEqual(String query, UnresolvedPlan expected) { + UnresolvedPlan actual = (UnresolvedPlan) context.getParser().parse(query); + assertEquals(expected, actual); + } +}