From 4dd757389907f3a065d54cd32dd4dc154f11c37b Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 01:22:21 +0000 Subject: [PATCH 01/47] Add kql() function transformation to view() in EXPLAIN AST output Implements KQL (Kusto Query Language) parsing for the kql() table function. The kql() function is transformed to view() with the KQL content parsed into equivalent SQL. Supports: - Table names as first pipe segment - project operator for column selection - filter operator for WHERE conditions with comparison operators Fixes 02366_kql_create_table test (3 statements: stmt5, stmt7, stmt11). --- internal/explain/functions.go | 207 ++++++++++++++++++ .../02366_kql_create_table/metadata.json | 8 +- 2 files changed, 208 insertions(+), 7 deletions(-) diff --git a/internal/explain/functions.go b/internal/explain/functions.go index dba21ee756..e89e319c6a 100644 --- a/internal/explain/functions.go +++ b/internal/explain/functions.go @@ -211,6 +211,11 @@ func windowSpecHasContent(w *ast.WindowSpec) bool { func handleSpecialFunction(sb *strings.Builder, n *ast.FunctionCall, alias string, indent string, depth int) bool { fnName := strings.ToUpper(n.Name) + // Handle kql() function - transforms KQL (Kusto Query Language) to SQL + if fnName == "KQL" { + return handleKQLFunction(sb, n, alias, indent, depth) + } + // Handle quantified comparison operators (ANY/ALL with comparison operators) if handled := handleQuantifiedComparison(sb, n, alias, indent, depth); handled { return true @@ -1672,3 +1677,205 @@ func explainWindowSpec(sb *strings.Builder, n *ast.WindowSpec, indent string, de fmt.Fprintf(sb, "%sWindowDefinition\n", indent) } } + +// handleKQLFunction handles the kql() table function. +// kql() transforms Kusto Query Language (KQL) into SQL and wraps it in a view() function. +// Example: kql($$Customers|project FirstName$$) -> view(SELECT FirstName FROM Customers) +func handleKQLFunction(sb *strings.Builder, n *ast.FunctionCall, alias string, indent string, depth int) bool { + if len(n.Arguments) != 1 { + return false + } + + // Get the KQL string from the argument + lit, ok := n.Arguments[0].(*ast.Literal) + if !ok || lit.Type != ast.LiteralString { + return false + } + + kqlStr, ok := lit.Value.(string) + if !ok { + return false + } + + // Parse the KQL string + parsed := parseKQL(kqlStr) + if parsed == nil { + return false + } + + // Output as Function view + fmt.Fprintf(sb, "%sFunction view (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s SelectWithUnionQuery (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + + // Calculate children count for SelectQuery + // Always have: TablesInSelectQuery, ExpressionList (columns) + // Optionally: WHERE clause (Function equals/etc) + selectChildren := 2 + if parsed.filter != nil { + selectChildren = 3 + } + + fmt.Fprintf(sb, "%s SelectQuery (children %d)\n", indent, selectChildren) + + // Output TablesInSelectQuery first + fmt.Fprintf(sb, "%s TablesInSelectQuery (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s TablesInSelectQueryElement (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s TableExpression (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s TableIdentifier %s\n", indent, parsed.tableName) + + // Output WHERE clause if present (before columns in the order shown in expected output) + if parsed.filter != nil { + explainKQLFilter(sb, parsed.filter, indent+" ", depth+5) + } + + // Output columns (ExpressionList) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(parsed.columns)) + for _, col := range parsed.columns { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, col) + } + + return true +} + +// kqlParsed represents a parsed KQL query +type kqlParsed struct { + tableName string + columns []string + filter *kqlFilter +} + +// kqlFilter represents a KQL filter condition +type kqlFilter struct { + left string + operator string + right string +} + +// parseKQL parses a KQL string into its components +// Supports: TableName | project col1, col2, ... | filter condition +func parseKQL(kql string) *kqlParsed { + // Split by pipe operator + parts := splitKQLPipes(kql) + if len(parts) == 0 { + return nil + } + + result := &kqlParsed{} + + // First part is always the table name + result.tableName = strings.TrimSpace(parts[0]) + + // Process remaining operators + for i := 1; i < len(parts); i++ { + part := strings.TrimSpace(parts[i]) + + if strings.HasPrefix(strings.ToLower(part), "project ") { + // project col1, col2, ... + columnsStr := strings.TrimPrefix(part, "project ") + columnsStr = strings.TrimPrefix(columnsStr, "PROJECT ") + cols := strings.Split(columnsStr, ",") + for _, col := range cols { + result.columns = append(result.columns, strings.TrimSpace(col)) + } + } else if strings.HasPrefix(strings.ToLower(part), "filter ") { + // filter condition + conditionStr := strings.TrimPrefix(part, "filter ") + conditionStr = strings.TrimPrefix(conditionStr, "FILTER ") + result.filter = parseKQLCondition(conditionStr) + } + } + + return result +} + +// splitKQLPipes splits a KQL string by pipe operators +func splitKQLPipes(kql string) []string { + var parts []string + var current strings.Builder + inQuote := false + quoteChar := byte(0) + + for i := 0; i < len(kql); i++ { + c := kql[i] + + if !inQuote && (c == '\'' || c == '"') { + inQuote = true + quoteChar = c + current.WriteByte(c) + } else if inQuote && c == quoteChar { + inQuote = false + current.WriteByte(c) + } else if !inQuote && c == '|' { + parts = append(parts, current.String()) + current.Reset() + } else { + current.WriteByte(c) + } + } + + if current.Len() > 0 { + parts = append(parts, current.String()) + } + + return parts +} + +// parseKQLCondition parses a KQL condition like "LastName=='Diaz'" +func parseKQLCondition(cond string) *kqlFilter { + cond = strings.TrimSpace(cond) + + // Try to match comparison operators + // KQL uses == for equality + operators := []string{"==", "!=", ">=", "<=", ">", "<"} + for _, op := range operators { + if idx := strings.Index(cond, op); idx > 0 { + left := strings.TrimSpace(cond[:idx]) + right := strings.TrimSpace(cond[idx+len(op):]) + return &kqlFilter{ + left: left, + operator: op, + right: right, + } + } + } + + return nil +} + +// explainKQLFilter outputs the EXPLAIN AST for a KQL filter condition +func explainKQLFilter(sb *strings.Builder, filter *kqlFilter, indent string, depth int) { + // Map KQL operators to ClickHouse function names + fnName := "equals" + switch filter.operator { + case "==": + fnName = "equals" + case "!=": + fnName = "notEquals" + case ">": + fnName = "greater" + case "<": + fnName = "less" + case ">=": + fnName = "greaterOrEquals" + case "<=": + fnName = "lessOrEquals" + } + + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, filter.left) + + // Output the right side - could be a string literal or identifier + rightVal := filter.right + if (strings.HasPrefix(rightVal, "'") && strings.HasSuffix(rightVal, "'")) || + (strings.HasPrefix(rightVal, "\"") && strings.HasSuffix(rightVal, "\"")) { + // String literal - remove quotes and escape for output + rightVal = rightVal[1 : len(rightVal)-1] + fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, rightVal) + } else { + // Identifier + fmt.Fprintf(sb, "%s Identifier %s\n", indent, rightVal) + } +} diff --git a/parser/testdata/02366_kql_create_table/metadata.json b/parser/testdata/02366_kql_create_table/metadata.json index 2021dee830..0967ef424b 100644 --- a/parser/testdata/02366_kql_create_table/metadata.json +++ b/parser/testdata/02366_kql_create_table/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt11": true, - "stmt5": true, - "stmt7": true - } -} +{} From 2e961f19e8b5b39d31d8e6e6d86ac928696c17df Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 01:27:56 +0000 Subject: [PATCH 02/47] Support dotted identifiers in ALTER TABLE RENAME COLUMN Add parseDottedIdentifier helper to handle nested column names like n.x in RENAME COLUMN statements. Previously only single-part identifiers were captured, causing statements like "RENAME COLUMN n.x TO n.renamed_x" to lose the dot-separated suffix. Fixes tests: - 01213_alter_table_rename_nested (3 statements) - 01213_alter_rename_nested (2 statements) - 01278_alter_rename_combination (2 statements) - 03526_columns_substreams_in_wide_parts (2 statements) --- parser/parser.go | 35 +++++++++++++++---- .../01213_alter_rename_nested/metadata.json | 8 +---- .../metadata.json | 8 +---- .../metadata.json | 7 +--- .../metadata.json | 7 +--- 5 files changed, 33 insertions(+), 32 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 0c3bae6c16..e61b526782 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -5657,15 +5657,15 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { if p.currentIs(token.COLUMN) { cmd.Type = ast.AlterRenameColumn p.nextToken() - if p.currentIs(token.IDENT) { - cmd.ColumnName = p.current.Value - p.nextToken() + // Parse column name (can be dotted like n.x for nested columns) + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + cmd.ColumnName = p.parseDottedIdentifier() } if p.currentIs(token.TO) { p.nextToken() - if p.currentIs(token.IDENT) { - cmd.NewName = p.current.Value - p.nextToken() + // Parse new column name (can also be dotted) + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + cmd.NewName = p.parseDottedIdentifier() } } } @@ -7248,6 +7248,29 @@ func (p *Parser) parseIdentifierName() string { return "" } +// parseDottedIdentifier parses an identifier that may contain dots (e.g., n.x for nested columns) +func (p *Parser) parseDottedIdentifier() string { + if !p.currentIs(token.IDENT) && !p.current.Token.IsKeyword() { + return "" + } + + parts := []string{p.current.Value} + p.nextToken() + + // Continue parsing if followed by dots + for p.currentIs(token.DOT) { + p.nextToken() // skip . + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + parts = append(parts, p.current.Value) + p.nextToken() + } else { + break + } + } + + return strings.Join(parts, ".") +} + // parseFromSelectSyntax handles ClickHouse's FROM ... SELECT syntax // e.g., FROM numbers(1) SELECT number func (p *Parser) parseFromSelectSyntax() *ast.SelectWithUnionQuery { diff --git a/parser/testdata/01213_alter_rename_nested/metadata.json b/parser/testdata/01213_alter_rename_nested/metadata.json index dca5d7811f..0967ef424b 100644 --- a/parser/testdata/01213_alter_rename_nested/metadata.json +++ b/parser/testdata/01213_alter_rename_nested/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt12": true, - "stmt7": true, - "stmt8": true - } -} +{} diff --git a/parser/testdata/01213_alter_table_rename_nested/metadata.json b/parser/testdata/01213_alter_table_rename_nested/metadata.json index dca5d7811f..0967ef424b 100644 --- a/parser/testdata/01213_alter_table_rename_nested/metadata.json +++ b/parser/testdata/01213_alter_table_rename_nested/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt12": true, - "stmt7": true, - "stmt8": true - } -} +{} diff --git a/parser/testdata/01278_alter_rename_combination/metadata.json b/parser/testdata/01278_alter_rename_combination/metadata.json index 089f308ee8..0967ef424b 100644 --- a/parser/testdata/01278_alter_rename_combination/metadata.json +++ b/parser/testdata/01278_alter_rename_combination/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt20": true, - "stmt8": true - } -} +{} diff --git a/parser/testdata/03526_columns_substreams_in_wide_parts/metadata.json b/parser/testdata/03526_columns_substreams_in_wide_parts/metadata.json index 3a47899007..0967ef424b 100644 --- a/parser/testdata/03526_columns_substreams_in_wide_parts/metadata.json +++ b/parser/testdata/03526_columns_substreams_in_wide_parts/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt46": true, - "stmt50": true - } -} +{} From 5b10dde26ad65888cde2cfba666b517b5faee629 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 01:32:01 +0000 Subject: [PATCH 03/47] Add support for multi-word data types in parser Handle SQL standard multi-word type names: - DOUBLE PRECISION - INT/INTEGER/TINYINT/SMALLINT/BIGINT/INT1 with SIGNED/UNSIGNED - CHAR/CHARACTER/NCHAR with VARYING or LARGE OBJECT - BINARY with VARYING or LARGE OBJECT - NATIONAL CHAR/CHARACTER with optional VARYING or LARGE OBJECT Fixes 01144_multiword_data_types test (3 statements). --- parser/parser.go | 78 ++++++++++++++++++- .../01144_multiword_data_types/metadata.json | 8 +- 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index e61b526782..528b678ad8 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -4320,8 +4320,8 @@ func (p *Parser) parseDataType() *ast.DataType { // For MySQL-compatible INT types, handle display width and UNSIGNED/SIGNED upperName := strings.ToUpper(dt.Name) - isMySQLIntType := upperName == "INT" || upperName == "TINYINT" || upperName == "SMALLINT" || - upperName == "MEDIUMINT" || upperName == "BIGINT" + isMySQLIntType := upperName == "INT" || upperName == "INT1" || upperName == "TINYINT" || upperName == "SMALLINT" || + upperName == "MEDIUMINT" || upperName == "BIGINT" || upperName == "INTEGER" if isMySQLIntType && p.currentIs(token.LPAREN) { // Skip the display width parameter (e.g., INT(11)) @@ -4333,7 +4333,7 @@ func (p *Parser) parseDataType() *ast.DataType { } // Handle UNSIGNED/SIGNED modifiers for MySQL INT types - if isMySQLIntType && p.currentIs(token.IDENT) { + if isMySQLIntType && (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) { modifier := strings.ToUpper(p.current.Value) if modifier == "UNSIGNED" || modifier == "SIGNED" { dt.Name = dt.Name + " " + p.current.Value @@ -4341,6 +4341,78 @@ func (p *Parser) parseDataType() *ast.DataType { } } + // Handle DOUBLE PRECISION + if upperName == "DOUBLE" && (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) { + if strings.ToUpper(p.current.Value) == "PRECISION" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + } + } + + // Handle multi-word character types + // CHAR VARYING, CHAR LARGE OBJECT, CHARACTER VARYING, CHARACTER LARGE OBJECT + // NCHAR VARYING, NCHAR LARGE OBJECT + if upperName == "CHAR" || upperName == "CHARACTER" || upperName == "NCHAR" { + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + modifier := strings.ToUpper(p.current.Value) + if modifier == "VARYING" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + } else if modifier == "LARGE" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + // Expect OBJECT + if (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) && strings.ToUpper(p.current.Value) == "OBJECT" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + } + } + } + } + + // Handle BINARY VARYING, BINARY LARGE OBJECT + if upperName == "BINARY" && (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) { + modifier := strings.ToUpper(p.current.Value) + if modifier == "VARYING" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + } else if modifier == "LARGE" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + // Expect OBJECT + if (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) && strings.ToUpper(p.current.Value) == "OBJECT" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + } + } + } + + // Handle NATIONAL CHAR, NATIONAL CHARACTER, etc. + if upperName == "NATIONAL" && (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) { + // NATIONAL can be followed by CHAR or CHARACTER + nextWord := strings.ToUpper(p.current.Value) + if nextWord == "CHAR" || nextWord == "CHARACTER" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + // Can be followed by VARYING or LARGE OBJECT + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + modifier := strings.ToUpper(p.current.Value) + if modifier == "VARYING" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + } else if modifier == "LARGE" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + // Expect OBJECT + if (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) && strings.ToUpper(p.current.Value) == "OBJECT" { + dt.Name = dt.Name + " " + p.current.Value + p.nextToken() + } + } + } + } + } + // Parse type parameters if p.currentIs(token.LPAREN) { dt.HasParentheses = true diff --git a/parser/testdata/01144_multiword_data_types/metadata.json b/parser/testdata/01144_multiword_data_types/metadata.json index 22ffe6f866..0967ef424b 100644 --- a/parser/testdata/01144_multiword_data_types/metadata.json +++ b/parser/testdata/01144_multiword_data_types/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt11": true, - "stmt3": true, - "stmt7": true - } -} +{} From 350e9c71f10a2c76d837964e1970da929f649b3c Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 01:35:16 +0000 Subject: [PATCH 04/47] Escape single quotes in alias names in EXPLAIN output When an alias contains single quotes (e.g., "'String'" as an alias), they need to be escaped as \' in the EXPLAIN AST output to match ClickHouse behavior. Fixes tests: - 01101_literal_column_clash (3 statements) - 01950_aliases_bad_cast (1 statement) --- internal/explain/expressions.go | 7 +++++-- parser/testdata/01101_literal_column_clash/metadata.json | 8 +------- parser/testdata/01950_aliases_bad_cast/metadata.json | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index 7cb700fdae..4bb38fe5f1 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -8,9 +8,12 @@ import ( "github.com/sqlc-dev/doubleclick/ast" ) -// escapeAlias escapes backslashes in alias names for EXPLAIN output +// escapeAlias escapes backslashes and single quotes in alias names for EXPLAIN output func escapeAlias(alias string) string { - return strings.ReplaceAll(alias, "\\", "\\\\") + // Escape backslashes first, then single quotes + result := strings.ReplaceAll(alias, "\\", "\\\\") + result = strings.ReplaceAll(result, "'", "\\'") + return result } func explainIdentifier(sb *strings.Builder, n *ast.Identifier, indent string) { diff --git a/parser/testdata/01101_literal_column_clash/metadata.json b/parser/testdata/01101_literal_column_clash/metadata.json index fffcb7d38b..0967ef424b 100644 --- a/parser/testdata/01101_literal_column_clash/metadata.json +++ b/parser/testdata/01101_literal_column_clash/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt2": true, - "stmt3": true, - "stmt4": true - } -} +{} diff --git a/parser/testdata/01950_aliases_bad_cast/metadata.json b/parser/testdata/01950_aliases_bad_cast/metadata.json index af48d4c110..0967ef424b 100644 --- a/parser/testdata/01950_aliases_bad_cast/metadata.json +++ b/parser/testdata/01950_aliases_bad_cast/metadata.json @@ -1 +1 @@ -{"explain_todo":{"stmt2":true}} +{} From cceec3bcb20d10e3f6a8ddadb9ee6c79cc1b4128 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 01:41:38 +0000 Subject: [PATCH 05/47] Fix handling of INT64_MIN and very large negative integers in CAST Two fixes: 1. In formatExprAsString, avoid overflow when formatting INT64_MIN by checking if the value is already negative before trying to negate it 2. In parseUnaryMinus, properly handle negative numbers larger than int64 can hold (like -9223372036854775809 for Int128) by storing them as strings when strconv.ParseInt fails Fixes 02887_byteswap test (3 statements: stmt27, stmt29, stmt31). --- internal/explain/format.go | 21 +++++++++++--------- parser/expression.go | 12 +++++++++-- parser/testdata/02887_byteswap/metadata.json | 8 +------- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/internal/explain/format.go b/internal/explain/format.go index 4c266ed03c..5317dabe21 100644 --- a/internal/explain/format.go +++ b/internal/explain/format.go @@ -458,19 +458,19 @@ func formatExprAsString(expr ast.Expression) string { switch e := expr.(type) { case *ast.Literal: // Handle explicitly negative literals (like -0 in -0::Int16) - prefix := "" - if e.Negative { - prefix = "-" - } switch e.Type { case ast.LiteralInteger: - // For explicitly negative literals, show the absolute value with prefix + // For explicitly negative literals, we need to format with the negative sign if e.Negative { switch v := e.Value.(type) { case int64: - if v <= 0 { - return fmt.Sprintf("-%d", -v) + // If the value is already negative (including INT64_MIN), just print it directly + // This avoids overflow when trying to negate INT64_MIN + if v < 0 { + return fmt.Sprintf("%d", v) } + // Value is positive or zero, add the negative sign + return fmt.Sprintf("-%d", v) case uint64: return fmt.Sprintf("-%d", v) } @@ -484,9 +484,12 @@ func formatExprAsString(expr ast.Expression) string { if e.Negative { switch v := e.Value.(type) { case float64: - if v <= 0 { - return fmt.Sprintf("%s%v", prefix, -v) + // If the value is already negative, just print it directly + if v < 0 { + return fmt.Sprintf("%v", v) } + // Value is positive or zero, add the negative sign + return fmt.Sprintf("-%v", v) } } return fmt.Sprintf("%v", e.Value) diff --git a/parser/expression.go b/parser/expression.go index 1aeac9e127..908f87af9d 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -1068,8 +1068,16 @@ func (p *Parser) parseUnaryMinus() ast.Expression { lit.Value = f lit.Source = numVal // Preserve original source text } else { - i, _ := strconv.ParseInt(numVal, 10, 64) - lit.Value = i + // Try to parse as int64 + i, err := strconv.ParseInt(numVal, 10, 64) + if err != nil { + // Number is too large for int64, store as string (for Int128/Int256) + lit.Type = ast.LiteralString + lit.Value = numVal + lit.IsBigInt = true + } else { + lit.Value = i + } } p.nextToken() // move past number // Apply postfix operators like :: using the expression parsing loop diff --git a/parser/testdata/02887_byteswap/metadata.json b/parser/testdata/02887_byteswap/metadata.json index aa054347dc..0967ef424b 100644 --- a/parser/testdata/02887_byteswap/metadata.json +++ b/parser/testdata/02887_byteswap/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt27": true, - "stmt29": true, - "stmt31": true - } -} +{} From 367bb0db26499b1f1271fe65256d4b064b0d0265 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 01:49:33 +0000 Subject: [PATCH 06/47] Add tuple expansion and regex pattern EXCEPT support - Handle expression.* syntax for tuple expansion in parseDotAccess() - Add Pattern field to ColumnTransformer for regex-based EXCEPT - Parse string patterns in EXCEPT clauses (e.g., EXCEPT('hello|world')) - Output pattern-based EXCEPT as ColumnsExceptTransformer with String node Fixes test 03101_analyzer_identifiers_4 (stmt7, stmt9, stmt14) Also fixes 01470_columns_transformers2 (stmt4) --- ast/ast.go | 1 + internal/explain/expressions.go | 11 ++++++-- parser/expression.go | 28 ++++++++++++++++++- .../01470_columns_transformers2/metadata.json | 6 +--- .../metadata.json | 8 +----- 5 files changed, 38 insertions(+), 16 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 4a6ae9ff1f..53d12fa6c4 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -1360,6 +1360,7 @@ type ColumnTransformer struct { Apply string `json:"apply,omitempty"` // function name for APPLY ApplyLambda Expression `json:"apply_lambda,omitempty"` // lambda expression for APPLY x -> expr Except []string `json:"except,omitempty"` // column names for EXCEPT + Pattern string `json:"pattern,omitempty"` // regex pattern for EXCEPT('pattern') Replaces []*ReplaceExpr `json:"replaces,omitempty"` // replacement expressions for REPLACE } diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index 4bb38fe5f1..a62bcb38e8 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -792,9 +792,14 @@ func explainSingleTransformer(sb *strings.Builder, t *ast.ColumnTransformer, ind case "apply": fmt.Fprintf(sb, "%s ColumnsApplyTransformer\n", indent) case "except": - fmt.Fprintf(sb, "%s ColumnsExceptTransformer (children %d)\n", indent, len(t.Except)) - for _, col := range t.Except { - fmt.Fprintf(sb, "%s Identifier %s\n", indent, col) + // If it's a regex pattern, output without children + if t.Pattern != "" { + fmt.Fprintf(sb, "%s ColumnsExceptTransformer\n", indent) + } else { + fmt.Fprintf(sb, "%s ColumnsExceptTransformer (children %d)\n", indent, len(t.Except)) + for _, col := range t.Except { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, col) + } } case "replace": fmt.Fprintf(sb, "%s ColumnsReplaceTransformer (children %d)\n", indent, len(t.Replaces)) diff --git a/parser/expression.go b/parser/expression.go index 908f87af9d..c43ef299db 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -2285,6 +2285,17 @@ func (p *Parser) parseDotAccess(left ast.Expression) ast.Expression { } } + // Check for expression.* (tuple expansion) where left is not an identifier + // This handles cases like tuple(1, 'a').* or CAST(...).* + if p.currentIs(token.ASTERISK) { + // This is a tuple expansion - it becomes an Asterisk with the expression as context + // In ClickHouse EXPLAIN AST, this is shown simply as Asterisk + p.nextToken() // skip * + return &ast.Asterisk{ + Position: left.Pos(), + } + } + // Check for tuple access with number if p.currentIs(token.NUMBER) { expr := &ast.TupleAccess{ @@ -2731,12 +2742,27 @@ func (p *Parser) parseAsteriskExcept(asterisk *ast.Asterisk) ast.Expression { p.nextToken() } - // EXCEPT can have optional parentheses: * EXCEPT (col1, col2) or * EXCEPT col + // EXCEPT can have optional parentheses: * EXCEPT (col1, col2) or * EXCEPT col or * EXCEPT('pattern') hasParens := p.currentIs(token.LPAREN) if hasParens { p.nextToken() // skip ( } + // Check for regex pattern (string literal) + if p.currentIs(token.STRING) { + pattern := p.current.Value + p.nextToken() + asterisk.Transformers = append(asterisk.Transformers, &ast.ColumnTransformer{ + Position: pos, + Type: "except", + Pattern: pattern, + }) + if hasParens { + p.expect(token.RPAREN) + } + return asterisk + } + var exceptCols []string // Parse column names (can be IDENT or keywords) for { diff --git a/parser/testdata/01470_columns_transformers2/metadata.json b/parser/testdata/01470_columns_transformers2/metadata.json index b65b07d7a6..0967ef424b 100644 --- a/parser/testdata/01470_columns_transformers2/metadata.json +++ b/parser/testdata/01470_columns_transformers2/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt4": true - } -} +{} diff --git a/parser/testdata/03101_analyzer_identifiers_4/metadata.json b/parser/testdata/03101_analyzer_identifiers_4/metadata.json index 16cecbd2f7..0967ef424b 100644 --- a/parser/testdata/03101_analyzer_identifiers_4/metadata.json +++ b/parser/testdata/03101_analyzer_identifiers_4/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt14": true, - "stmt7": true, - "stmt9": true - } -} +{} From 51ba8dbf2fabc7e6fbdae7781df3a6b6eb873aea Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 02:07:03 +0000 Subject: [PATCH 07/47] Add INDEX and SETTINGS support for ATTACH TABLE statements - Add Indexes field to AttachQuery struct in ast.go - Add Settings field to AttachQuery struct in ast.go - Parse INDEX definitions in ATTACH TABLE column lists - Parse SETTINGS clause in ATTACH TABLE statements - Update explainAttachQuery to output indexes and settings correctly - Handle engine parentheses with empty argument list Fixes test 01249_bad_arguments_for_bloom_filter (stmt10, stmt13, stmt16) Also fixes 01601_detach_permanently and 02990_rmt_replica_path_uuid --- ast/ast.go | 2 + internal/explain/statements.go | 51 +++++++++++++++++-- parser/parser.go | 10 ++++ .../metadata.json | 8 +-- .../01601_detach_permanently/metadata.json | 7 +-- .../02990_rmt_replica_path_uuid/metadata.json | 6 +-- 6 files changed, 62 insertions(+), 22 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 53d12fa6c4..68c6eccb32 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -748,6 +748,7 @@ type AttachQuery struct { Dictionary string `json:"dictionary,omitempty"` Columns []*ColumnDeclaration `json:"columns,omitempty"` ColumnsPrimaryKey []Expression `json:"columns_primary_key,omitempty"` // PRIMARY KEY in column list + Indexes []*IndexDefinition `json:"indexes,omitempty"` // INDEX definitions in column list Engine *EngineClause `json:"engine,omitempty"` OrderBy []Expression `json:"order_by,omitempty"` PrimaryKey []Expression `json:"primary_key,omitempty"` @@ -756,6 +757,7 @@ type AttachQuery struct { InnerUUID string `json:"inner_uuid,omitempty"` // TO INNER UUID clause PartitionBy Expression `json:"partition_by,omitempty"` SelectQuery Statement `json:"select_query,omitempty"` // AS SELECT clause + Settings []*SettingExpr `json:"settings,omitempty"` // SETTINGS clause } func (a *AttachQuery) Pos() token.Position { return a.Position } diff --git a/internal/explain/statements.go b/internal/explain/statements.go index c9010bb659..64fb1b1ecf 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -1283,7 +1283,7 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, if n.Database != "" && n.Table != "" { children++ // extra identifier for database } - hasColumns := len(n.Columns) > 0 || len(n.ColumnsPrimaryKey) > 0 + hasColumns := len(n.Columns) > 0 || len(n.ColumnsPrimaryKey) > 0 || len(n.Indexes) > 0 if hasColumns { children++ } @@ -1291,7 +1291,7 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, if hasSelectQuery { children++ } - hasStorage := n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 || n.PartitionBy != nil + hasStorage := n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 || n.PartitionBy != nil || len(n.Settings) > 0 if hasStorage { children++ // ViewTargets or Storage definition } @@ -1322,6 +1322,9 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, if len(n.Columns) > 0 { columnsChildren++ } + if len(n.Indexes) > 0 { + columnsChildren++ + } if len(n.ColumnsPrimaryKey) > 0 { columnsChildren++ } @@ -1332,6 +1335,13 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, Column(sb, col, depth+3) } } + // Output indexes + if len(n.Indexes) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Indexes)) + for _, idx := range n.Indexes { + Index(sb, idx, depth+3) + } + } // Output inline PRIMARY KEY (from column list) if len(n.ColumnsPrimaryKey) > 0 { if len(n.ColumnsPrimaryKey) > 1 { @@ -1370,13 +1380,28 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, if len(n.PrimaryKey) > 0 { storageChildren++ } + if len(n.Settings) > 0 { + storageChildren++ + } // For materialized views, wrap in ViewTargets if n.IsMaterializedView { fmt.Fprintf(sb, "%s ViewTargets (children 1)\n", indent) fmt.Fprintf(sb, "%s Storage definition (children %d)\n", indent, storageChildren) if n.Engine != nil { - fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) + if n.Engine.HasParentheses { + fmt.Fprintf(sb, "%s Function %s (children 1)\n", indent, n.Engine.Name) + if len(n.Engine.Parameters) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Engine.Parameters)) + for _, param := range n.Engine.Parameters { + Node(sb, param, depth+5) + } + } else { + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } + } else { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) + } } if n.PartitionBy != nil { Node(sb, n.PartitionBy, depth+3) @@ -1391,10 +1416,25 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, Node(sb, expr, depth+3) } } + if len(n.Settings) > 0 { + fmt.Fprintf(sb, "%s Set\n", indent) + } } else { fmt.Fprintf(sb, "%s Storage definition (children %d)\n", indent, storageChildren) if n.Engine != nil { - fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) + if n.Engine.HasParentheses { + fmt.Fprintf(sb, "%s Function %s (children 1)\n", indent, n.Engine.Name) + if len(n.Engine.Parameters) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Engine.Parameters)) + for _, param := range n.Engine.Parameters { + Node(sb, param, depth+4) + } + } else { + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } + } else { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) + } } if n.PartitionBy != nil { Node(sb, n.PartitionBy, depth+2) @@ -1409,6 +1449,9 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, Node(sb, expr, depth+2) } } + if len(n.Settings) > 0 { + fmt.Fprintf(sb, "%s Set\n", indent) + } } } } diff --git a/parser/parser.go b/parser/parser.go index 528b678ad8..9ac1600074 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -7045,6 +7045,12 @@ func (p *Parser) parseAttach() *ast.AttachQuery { attach.ColumnsPrimaryKey = append(attach.ColumnsPrimaryKey, expr) } } + } else if p.currentIs(token.INDEX) { + // Handle INDEX definition + idx := p.parseIndexDefinition() + if idx != nil { + attach.Indexes = append(attach.Indexes, idx) + } } else { col := p.parseColumnDeclaration() if col != nil { @@ -7125,6 +7131,10 @@ func (p *Parser) parseAttach() *ast.AttachQuery { if p.currentIs(token.SELECT) { attach.SelectQuery = p.parseSelectWithUnion() } + case p.currentIs(token.SETTINGS): + // SETTINGS clause + p.nextToken() + attach.Settings = p.parseSettingsList() default: return attach } diff --git a/parser/testdata/01249_bad_arguments_for_bloom_filter/metadata.json b/parser/testdata/01249_bad_arguments_for_bloom_filter/metadata.json index 0bb678734a..0967ef424b 100644 --- a/parser/testdata/01249_bad_arguments_for_bloom_filter/metadata.json +++ b/parser/testdata/01249_bad_arguments_for_bloom_filter/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt10": true, - "stmt13": true, - "stmt16": true - } -} +{} diff --git a/parser/testdata/01601_detach_permanently/metadata.json b/parser/testdata/01601_detach_permanently/metadata.json index 89f4a30fbd..0967ef424b 100644 --- a/parser/testdata/01601_detach_permanently/metadata.json +++ b/parser/testdata/01601_detach_permanently/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt22": true, - "stmt67": true - } -} +{} diff --git a/parser/testdata/02990_rmt_replica_path_uuid/metadata.json b/parser/testdata/02990_rmt_replica_path_uuid/metadata.json index 1295a45747..0967ef424b 100644 --- a/parser/testdata/02990_rmt_replica_path_uuid/metadata.json +++ b/parser/testdata/02990_rmt_replica_path_uuid/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt3": true - } -} +{} From 744ac7b566a2a8568938116ee0946d4f122dc1a0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 02:11:23 +0000 Subject: [PATCH 08/47] Handle COMMENT in MODIFY COLUMN without data type When parsing MODIFY COLUMN col_name COMMENT 'comment', the COMMENT keyword should not be parsed as a data type. Add COMMENT to the list of tokens that indicate the type is omitted in parseColumnDeclaration. Fixes test 00725_comment_columns_long (stmt9, stmt19, stmt21) --- parser/parser.go | 3 ++- parser/testdata/00725_comment_columns_long/metadata.json | 8 +------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 9ac1600074..6513a97ed3 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -4199,8 +4199,9 @@ func (p *Parser) parseColumnDeclaration() *ast.ColumnDeclaration { // Check if next token indicates type is omitted // DEFAULT/MATERIALIZED/ALIAS indicate we go straight to default expression // CODEC indicates we go straight to codec specification (no type) + // COMMENT indicates we go straight to comment (no type) isCodec := p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "CODEC" - if p.currentIs(token.DEFAULT) || p.currentIs(token.MATERIALIZED) || p.currentIs(token.ALIAS) || isCodec { + if p.currentIs(token.DEFAULT) || p.currentIs(token.MATERIALIZED) || p.currentIs(token.ALIAS) || isCodec || p.currentIs(token.COMMENT) { // Type is omitted, skip to parsing below } else { // Parse data type diff --git a/parser/testdata/00725_comment_columns_long/metadata.json b/parser/testdata/00725_comment_columns_long/metadata.json index 2da0074a29..0967ef424b 100644 --- a/parser/testdata/00725_comment_columns_long/metadata.json +++ b/parser/testdata/00725_comment_columns_long/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt19": true, - "stmt21": true, - "stmt9": true - } -} +{} From 98d87e571cd5ae37f825e5ce4cf5f37e33b8eee9 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 02:37:39 +0000 Subject: [PATCH 09/47] Fix ANY/ALL keyword conflict with any()/all() function calls in expressions The ANY/ALL subquery modifier check in parseBinaryExpression was incorrectly triggering for all binary operators. When parsing expressions like `any(x) >= 1 AND any(y) >= 2`, the parser would see the `any` keyword after the AND operator and attempt to parse it as `expr >= ANY(subquery)` pattern, causing incorrect AST structure. This fix restricts the ANY/ALL check to only comparison operators (=, ==, !=, <>, <, <=, >, >=) where this pattern is valid, preventing conflicts with any()/all() function calls in AND/OR expressions. Also flatten both sides of AND/OR chains in collectLogicalOperands for correct EXPLAIN output matching ClickHouse format. --- internal/explain/expressions.go | 9 +++++++-- parser/expression.go | 5 ++++- .../testdata/02402_external_disk_metrics/metadata.json | 8 +------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index a62bcb38e8..490f15ba53 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -398,8 +398,13 @@ func collectLogicalOperands(n *ast.BinaryExpr) []ast.Expression { operands = append(operands, n.Left) } - // Don't flatten right side - explicit parentheses would be on the left in left-associative parsing - operands = append(operands, n.Right) + // Also flatten right side if it's the same operator and not parenthesized + // This handles both left-associative and right-associative parsing + if right, ok := n.Right.(*ast.BinaryExpr); ok && right.Op == n.Op && !right.Parenthesized { + operands = append(operands, collectLogicalOperands(right)...) + } else { + operands = append(operands, n.Right) + } return operands } diff --git a/parser/expression.go b/parser/expression.go index c43ef299db..3913b04dd0 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -1922,7 +1922,10 @@ func (p *Parser) parseBinaryExpression(left ast.Expression) ast.Expression { p.nextToken() // Check for ANY/ALL subquery comparison modifier: expr >= ANY(subquery) - if p.currentIs(token.ANY) || p.currentIs(token.ALL) { + // Only apply for comparison operators, not for AND/OR which might be followed by any() function calls + isComparisonOp := expr.Op == "=" || expr.Op == "==" || expr.Op == "!=" || expr.Op == "<>" || + expr.Op == "<" || expr.Op == "<=" || expr.Op == ">" || expr.Op == ">=" + if isComparisonOp && (p.currentIs(token.ANY) || p.currentIs(token.ALL)) { modifier := strings.ToLower(p.current.Value) p.nextToken() if p.currentIs(token.LPAREN) { diff --git a/parser/testdata/02402_external_disk_metrics/metadata.json b/parser/testdata/02402_external_disk_metrics/metadata.json index 685bb7b242..0967ef424b 100644 --- a/parser/testdata/02402_external_disk_metrics/metadata.json +++ b/parser/testdata/02402_external_disk_metrics/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt16": true, - "stmt17": true, - "stmt18": true - } -} +{} From 27043784274798d46fa8f893f024edf050be9b05 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 02:41:09 +0000 Subject: [PATCH 10/47] Remove incorrect ln->log function name normalization ClickHouse's EXPLAIN AST outputs 'ln' for the natural logarithm function, not 'log'. The previous incorrect mapping was causing test failures. --- internal/explain/format.go | 1 - parser/testdata/00087_math_functions/metadata.json | 8 +------- parser/testdata/03595_funcs_on_zero/metadata.json | 6 +++++- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/internal/explain/format.go b/internal/explain/format.go index 5317dabe21..1e0aac7062 100644 --- a/internal/explain/format.go +++ b/internal/explain/format.go @@ -378,7 +378,6 @@ func NormalizeFunctionName(name string) string { "ltrim": "trimLeft", "rtrim": "trimRight", "ceiling": "ceil", - "ln": "log", "log10": "log10", "log2": "log2", "rand": "rand", diff --git a/parser/testdata/00087_math_functions/metadata.json b/parser/testdata/00087_math_functions/metadata.json index 436f0e52ef..0967ef424b 100644 --- a/parser/testdata/00087_math_functions/metadata.json +++ b/parser/testdata/00087_math_functions/metadata.json @@ -1,7 +1 @@ -{ - "explain_todo": { - "stmt103": true, - "stmt107": true, - "stmt113": true - } -} +{} diff --git a/parser/testdata/03595_funcs_on_zero/metadata.json b/parser/testdata/03595_funcs_on_zero/metadata.json index fcf5ba3c19..28a683eda9 100644 --- a/parser/testdata/03595_funcs_on_zero/metadata.json +++ b/parser/testdata/03595_funcs_on_zero/metadata.json @@ -1 +1,5 @@ -{"explain_todo":{"stmt151":true,"stmt58":true}} +{ + "explain_todo": { + "stmt58": true + } +} From 202b3729793f23bc72c11491a95067f6d415050d Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 02:46:08 +0000 Subject: [PATCH 11/47] Handle boolean literals correctly in CAST expressions Boolean literals in :: cast syntax should output as Bool_1/Bool_0 format instead of string 'true'/'false' to match ClickHouse EXPLAIN. --- internal/explain/functions.go | 7 +++++++ parser/testdata/00727_concat/metadata.json | 2 -- parser/testdata/02495_concat_with_separator/metadata.json | 7 +------ .../02935_format_with_arbitrary_types/metadata.json | 2 -- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/internal/explain/functions.go b/internal/explain/functions.go index e89e319c6a..5fbb09b9e0 100644 --- a/internal/explain/functions.go +++ b/internal/explain/functions.go @@ -614,6 +614,13 @@ func explainCastExprWithAlias(sb *strings.Builder, n *ast.CastExpr, alias string } else if lit.Type == ast.LiteralNull { // NULL stays as Literal NULL, not formatted as a string fmt.Fprintf(sb, "%s Literal NULL\n", indent) + } else if lit.Type == ast.LiteralBoolean { + // Booleans use Bool_1/Bool_0 format + if lit.Value.(bool) { + fmt.Fprintf(sb, "%s Literal Bool_1\n", indent) + } else { + fmt.Fprintf(sb, "%s Literal Bool_0\n", indent) + } } else { // Simple literal - format as string (escape special chars for string literals) exprStr := formatExprAsString(lit) diff --git a/parser/testdata/00727_concat/metadata.json b/parser/testdata/00727_concat/metadata.json index ab953e9485..127dc52ed4 100644 --- a/parser/testdata/00727_concat/metadata.json +++ b/parser/testdata/00727_concat/metadata.json @@ -1,7 +1,5 @@ { "explain_todo": { - "stmt19": true, - "stmt20": true, "stmt44": true } } diff --git a/parser/testdata/02495_concat_with_separator/metadata.json b/parser/testdata/02495_concat_with_separator/metadata.json index d689cec729..0967ef424b 100644 --- a/parser/testdata/02495_concat_with_separator/metadata.json +++ b/parser/testdata/02495_concat_with_separator/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt40": true, - "stmt41": true - } -} +{} diff --git a/parser/testdata/02935_format_with_arbitrary_types/metadata.json b/parser/testdata/02935_format_with_arbitrary_types/metadata.json index ab953e9485..127dc52ed4 100644 --- a/parser/testdata/02935_format_with_arbitrary_types/metadata.json +++ b/parser/testdata/02935_format_with_arbitrary_types/metadata.json @@ -1,7 +1,5 @@ { "explain_todo": { - "stmt19": true, - "stmt20": true, "stmt44": true } } From aa3f58e8b5180f8a52757c0814ec807f037ca80e Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 02:51:26 +0000 Subject: [PATCH 12/47] Parse column definitions after TO target in MATERIALIZED VIEW For MATERIALIZED VIEW ... TO target (columns) AS SELECT syntax, column definitions can appear after the TO clause. Added parsing support for this variant. --- parser/parser.go | 18 ++++++++++++++++++ .../metadata.json | 6 +----- .../metadata.json | 1 - .../metadata.json | 7 +------ .../metadata.json | 6 +----- .../metadata.json | 7 +------ .../02346_additional_filters/metadata.json | 6 +----- .../metadata.json | 6 +----- .../metadata.json | 6 +----- .../metadata.json | 6 +----- .../testdata/03230_subcolumns_mv/metadata.json | 6 +----- .../metadata.json | 4 +--- .../metadata.json | 6 +----- .../metadata.json | 6 +----- .../03275_matview_with_union/metadata.json | 6 +----- .../metadata.json | 6 +----- .../metadata.json | 6 +----- .../03512_cast_logical_error/metadata.json | 7 +------ .../metadata.json | 6 +----- .../03561_two_mvs_bad_select/metadata.json | 7 +------ .../metadata.json | 1 - 21 files changed, 36 insertions(+), 94 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 6513a97ed3..0e2083eef2 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -2821,6 +2821,24 @@ func (p *Parser) parseCreateView(create *ast.CreateQuery) { } else { create.To = toName } + + // For MATERIALIZED VIEW ... TO target (columns) syntax, + // column definitions can come after the TO target + if p.currentIs(token.LPAREN) && len(create.Columns) == 0 { + p.nextToken() + for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) { + col := p.parseColumnDeclaration() + if col != nil { + create.Columns = append(create.Columns, col) + } + if p.currentIs(token.COMMA) { + p.nextToken() + } else { + break + } + } + p.expect(token.RPAREN) + } } // Parse ENGINE (for materialized views) diff --git a/parser/testdata/00984_materialized_view_to_columns/metadata.json b/parser/testdata/00984_materialized_view_to_columns/metadata.json index dbdbb76d4f..0967ef424b 100644 --- a/parser/testdata/00984_materialized_view_to_columns/metadata.json +++ b/parser/testdata/00984_materialized_view_to_columns/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt6": true - } -} +{} diff --git a/parser/testdata/01155_rename_move_materialized_view/metadata.json b/parser/testdata/01155_rename_move_materialized_view/metadata.json index aaf916093d..072340a6e5 100644 --- a/parser/testdata/01155_rename_move_materialized_view/metadata.json +++ b/parser/testdata/01155_rename_move_materialized_view/metadata.json @@ -1,6 +1,5 @@ { "explain_todo": { - "stmt13": true, "stmt44": true, "stmt52": true } diff --git a/parser/testdata/01182_materialized_view_different_structure/metadata.json b/parser/testdata/01182_materialized_view_different_structure/metadata.json index 99353c8390..0967ef424b 100644 --- a/parser/testdata/01182_materialized_view_different_structure/metadata.json +++ b/parser/testdata/01182_materialized_view_different_structure/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt11": true, - "stmt17": true - } -} +{} diff --git a/parser/testdata/01515_mv_and_array_join_optimisation_bag/metadata.json b/parser/testdata/01515_mv_and_array_join_optimisation_bag/metadata.json index 1295a45747..0967ef424b 100644 --- a/parser/testdata/01515_mv_and_array_join_optimisation_bag/metadata.json +++ b/parser/testdata/01515_mv_and_array_join_optimisation_bag/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt3": true - } -} +{} diff --git a/parser/testdata/01880_materialized_view_to_table_type_check/metadata.json b/parser/testdata/01880_materialized_view_to_table_type_check/metadata.json index cc8d69ec3f..0967ef424b 100644 --- a/parser/testdata/01880_materialized_view_to_table_type_check/metadata.json +++ b/parser/testdata/01880_materialized_view_to_table_type_check/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt10": true, - "stmt6": true - } -} +{} diff --git a/parser/testdata/02346_additional_filters/metadata.json b/parser/testdata/02346_additional_filters/metadata.json index 8d8c3756f7..0967ef424b 100644 --- a/parser/testdata/02346_additional_filters/metadata.json +++ b/parser/testdata/02346_additional_filters/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt45": true - } -} +{} diff --git a/parser/testdata/03033_scalars_context_data_race/metadata.json b/parser/testdata/03033_scalars_context_data_race/metadata.json index 342b3ff5b4..0967ef424b 100644 --- a/parser/testdata/03033_scalars_context_data_race/metadata.json +++ b/parser/testdata/03033_scalars_context_data_race/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt8": true - } -} +{} diff --git a/parser/testdata/03166_mv_prewhere_duplicating_name_bug/metadata.json b/parser/testdata/03166_mv_prewhere_duplicating_name_bug/metadata.json index 1295a45747..0967ef424b 100644 --- a/parser/testdata/03166_mv_prewhere_duplicating_name_bug/metadata.json +++ b/parser/testdata/03166_mv_prewhere_duplicating_name_bug/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt3": true - } -} +{} diff --git a/parser/testdata/03210_optimize_rewrite_aggregate_function_with_if_return_type_bug/metadata.json b/parser/testdata/03210_optimize_rewrite_aggregate_function_with_if_return_type_bug/metadata.json index c45b7602ba..0967ef424b 100644 --- a/parser/testdata/03210_optimize_rewrite_aggregate_function_with_if_return_type_bug/metadata.json +++ b/parser/testdata/03210_optimize_rewrite_aggregate_function_with_if_return_type_bug/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt12": true - } -} +{} diff --git a/parser/testdata/03230_subcolumns_mv/metadata.json b/parser/testdata/03230_subcolumns_mv/metadata.json index 342b3ff5b4..0967ef424b 100644 --- a/parser/testdata/03230_subcolumns_mv/metadata.json +++ b/parser/testdata/03230_subcolumns_mv/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt8": true - } -} +{} diff --git a/parser/testdata/03254_last_2_samples_aggregate_function/metadata.json b/parser/testdata/03254_last_2_samples_aggregate_function/metadata.json index 79ec96362c..7b4455cd5f 100644 --- a/parser/testdata/03254_last_2_samples_aggregate_function/metadata.json +++ b/parser/testdata/03254_last_2_samples_aggregate_function/metadata.json @@ -1,7 +1,5 @@ { "explain_todo": { - "stmt16": true, - "stmt4": true, - "stmt6": true + "stmt16": true } } diff --git a/parser/testdata/03254_last_2_samples_aggregate_function_simple/metadata.json b/parser/testdata/03254_last_2_samples_aggregate_function_simple/metadata.json index b65b07d7a6..0967ef424b 100644 --- a/parser/testdata/03254_last_2_samples_aggregate_function_simple/metadata.json +++ b/parser/testdata/03254_last_2_samples_aggregate_function_simple/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt4": true - } -} +{} diff --git a/parser/testdata/03262_analyzer_materialized_view_in_with_cte/metadata.json b/parser/testdata/03262_analyzer_materialized_view_in_with_cte/metadata.json index b563327205..0967ef424b 100644 --- a/parser/testdata/03262_analyzer_materialized_view_in_with_cte/metadata.json +++ b/parser/testdata/03262_analyzer_materialized_view_in_with_cte/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt7": true - } -} +{} diff --git a/parser/testdata/03275_matview_with_union/metadata.json b/parser/testdata/03275_matview_with_union/metadata.json index b563327205..0967ef424b 100644 --- a/parser/testdata/03275_matview_with_union/metadata.json +++ b/parser/testdata/03275_matview_with_union/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt7": true - } -} +{} diff --git a/parser/testdata/03306_materialized_vew_prewhere_supported_columns/metadata.json b/parser/testdata/03306_materialized_vew_prewhere_supported_columns/metadata.json index dbdbb76d4f..0967ef424b 100644 --- a/parser/testdata/03306_materialized_vew_prewhere_supported_columns/metadata.json +++ b/parser/testdata/03306_materialized_vew_prewhere_supported_columns/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt6": true - } -} +{} diff --git a/parser/testdata/03412_materialized_view_to_distributed_different_headers/metadata.json b/parser/testdata/03412_materialized_view_to_distributed_different_headers/metadata.json index dbdbb76d4f..0967ef424b 100644 --- a/parser/testdata/03412_materialized_view_to_distributed_different_headers/metadata.json +++ b/parser/testdata/03412_materialized_view_to_distributed_different_headers/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt6": true - } -} +{} diff --git a/parser/testdata/03512_cast_logical_error/metadata.json b/parser/testdata/03512_cast_logical_error/metadata.json index 5e06643b76..0967ef424b 100644 --- a/parser/testdata/03512_cast_logical_error/metadata.json +++ b/parser/testdata/03512_cast_logical_error/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt12": true, - "stmt6": true - } -} +{} diff --git a/parser/testdata/03548_array_group_last_serialization/metadata.json b/parser/testdata/03548_array_group_last_serialization/metadata.json index dbdbb76d4f..0967ef424b 100644 --- a/parser/testdata/03548_array_group_last_serialization/metadata.json +++ b/parser/testdata/03548_array_group_last_serialization/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt6": true - } -} +{} diff --git a/parser/testdata/03561_two_mvs_bad_select/metadata.json b/parser/testdata/03561_two_mvs_bad_select/metadata.json index 0f293987f1..0967ef424b 100644 --- a/parser/testdata/03561_two_mvs_bad_select/metadata.json +++ b/parser/testdata/03561_two_mvs_bad_select/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt5": true, - "stmt6": true - } -} +{} diff --git a/parser/testdata/03749_materialized_view_not_supports_parallel_write/metadata.json b/parser/testdata/03749_materialized_view_not_supports_parallel_write/metadata.json index 05aa6dfc72..342b3ff5b4 100644 --- a/parser/testdata/03749_materialized_view_not_supports_parallel_write/metadata.json +++ b/parser/testdata/03749_materialized_view_not_supports_parallel_write/metadata.json @@ -1,6 +1,5 @@ { "explain_todo": { - "stmt4": true, "stmt8": true } } From 3e6620116a9e4271797a8f24921979c17015b80f Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 02:55:44 +0000 Subject: [PATCH 13/47] Fix IN expression to include :: cast on right side without parentheses When parsing `expr IN value::Type` (without parentheses around the IN list), the :: cast was being applied to the entire IN expression instead of just the value. Changed precedence from CALL to MUL_PREC to ensure :: is consumed as part of the right-hand expression. --- parser/expression.go | 5 +++-- parser/testdata/02714_date_date32_in/metadata.json | 7 +------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/parser/expression.go b/parser/expression.go index 3913b04dd0..3eff4f3716 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -2065,8 +2065,9 @@ func (p *Parser) parseInExpression(left ast.Expression, not bool) ast.Expression expr.List = []ast.Expression{arr} } else { // Could be identifier, tuple function, or other expression - // Parse as expression - innerExpr := p.parseExpression(CALL) + // Parse as expression with MUL_PREC to include :: cast operator + // (which has CALL precedence, so using MUL_PREC ensures it's consumed) + innerExpr := p.parseExpression(MUL_PREC) if innerExpr != nil { expr.List = []ast.Expression{innerExpr} } diff --git a/parser/testdata/02714_date_date32_in/metadata.json b/parser/testdata/02714_date_date32_in/metadata.json index 7b4ddafa53..0967ef424b 100644 --- a/parser/testdata/02714_date_date32_in/metadata.json +++ b/parser/testdata/02714_date_date32_in/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt3": true, - "stmt4": true - } -} +{} From 28417cb8caeae6aadf359d57a0b8917979c142cb Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:00:38 +0000 Subject: [PATCH 14/47] Add IF NOT EXISTS support for ATTACH TABLE statement The parser was treating IF as the table name instead of handling IF NOT EXISTS as a modifier. Added IfNotExists field to AttachQuery and parsing logic to handle the IF NOT EXISTS clause. --- ast/ast.go | 1 + parser/parser.go | 12 ++++++++++++ .../01073_attach_if_not_exists/metadata.json | 7 +------ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 68c6eccb32..8b072c3e86 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -743,6 +743,7 @@ func (d *DetachQuery) statementNode() {} // AttachQuery represents an ATTACH statement. type AttachQuery struct { Position token.Position `json:"-"` + IfNotExists bool `json:"if_not_exists,omitempty"` Database string `json:"database,omitempty"` Table string `json:"table,omitempty"` Dictionary string `json:"dictionary,omitempty"` diff --git a/parser/parser.go b/parser/parser.go index 0e2083eef2..a5b8bdeb3f 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -6994,6 +6994,18 @@ func (p *Parser) parseAttach() *ast.AttachQuery { } } + // Handle IF NOT EXISTS + if p.currentIs(token.IF) { + p.nextToken() + if p.currentIs(token.NOT) { + p.nextToken() + if p.currentIs(token.EXISTS) { + attach.IfNotExists = true + p.nextToken() + } + } + } + // Parse name (can be qualified: database.table for TABLE, not for DATABASE/DICTIONARY) name := p.parseIdentifierName() if p.currentIs(token.DOT) && !isDatabase && !isDictionary { diff --git a/parser/testdata/01073_attach_if_not_exists/metadata.json b/parser/testdata/01073_attach_if_not_exists/metadata.json index bc5c6edb66..0967ef424b 100644 --- a/parser/testdata/01073_attach_if_not_exists/metadata.json +++ b/parser/testdata/01073_attach_if_not_exists/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt3": true, - "stmt5": true - } -} +{} From 3b54762826d72647d6c9403dbd051bd71cdaa155 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:05:05 +0000 Subject: [PATCH 15/47] Add implicit NULL for caseWithExpression without ELSE clause When a CASE x WHEN form has no ELSE clause, ClickHouse implicitly uses NULL as the else value. The explain output was missing this implicit NULL for caseWithExpression (it was already correct for multiIf form). --- internal/explain/functions.go | 9 +++++---- parser/testdata/02542_case_no_else/metadata.json | 7 +------ parser/testdata/02787_transform_null/metadata.json | 6 +----- .../testdata/03094_transform_return_first/metadata.json | 2 +- .../metadata.json | 2 +- .../testdata/03654_case_non_constant_null/metadata.json | 2 +- 6 files changed, 10 insertions(+), 18 deletions(-) diff --git a/internal/explain/functions.go b/internal/explain/functions.go index 5fbb09b9e0..c8d51603ec 100644 --- a/internal/explain/functions.go +++ b/internal/explain/functions.go @@ -1433,10 +1433,8 @@ func explainCaseExprWithAlias(sb *strings.Builder, n *ast.CaseExpr, alias string // CASE is represented as Function multiIf or caseWithExpression if n.Operand != nil { // CASE x WHEN ... form - argCount := 1 + len(n.Whens)*2 // operand + (condition, result) pairs - if n.Else != nil { - argCount++ - } + // Always has ELSE (explicit or implicit NULL) + argCount := 1 + len(n.Whens)*2 + 1 // operand + (condition, result) pairs + else if alias != "" { fmt.Fprintf(sb, "%sFunction caseWithExpression (alias %s) (children %d)\n", indent, alias, 1) } else { @@ -1450,6 +1448,9 @@ func explainCaseExprWithAlias(sb *strings.Builder, n *ast.CaseExpr, alias string } if n.Else != nil { Node(sb, n.Else, depth+2) + } else { + // Implicit NULL when no ELSE clause + fmt.Fprintf(sb, "%s Literal NULL\n", indent) } } else { // CASE WHEN ... form diff --git a/parser/testdata/02542_case_no_else/metadata.json b/parser/testdata/02542_case_no_else/metadata.json index 682bda1cbc..0967ef424b 100644 --- a/parser/testdata/02542_case_no_else/metadata.json +++ b/parser/testdata/02542_case_no_else/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt1": true, - "stmt2": true - } -} +{} diff --git a/parser/testdata/02787_transform_null/metadata.json b/parser/testdata/02787_transform_null/metadata.json index dbdbb76d4f..0967ef424b 100644 --- a/parser/testdata/02787_transform_null/metadata.json +++ b/parser/testdata/02787_transform_null/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt6": true - } -} +{} diff --git a/parser/testdata/03094_transform_return_first/metadata.json b/parser/testdata/03094_transform_return_first/metadata.json index cc2f3624ef..0967ef424b 100644 --- a/parser/testdata/03094_transform_return_first/metadata.json +++ b/parser/testdata/03094_transform_return_first/metadata.json @@ -1 +1 @@ -{"explain_todo":{"stmt4":true}} +{} diff --git a/parser/testdata/03626_case_function_with_dynamic_argument/metadata.json b/parser/testdata/03626_case_function_with_dynamic_argument/metadata.json index cc2f3624ef..0967ef424b 100644 --- a/parser/testdata/03626_case_function_with_dynamic_argument/metadata.json +++ b/parser/testdata/03626_case_function_with_dynamic_argument/metadata.json @@ -1 +1 @@ -{"explain_todo":{"stmt4":true}} +{} diff --git a/parser/testdata/03654_case_non_constant_null/metadata.json b/parser/testdata/03654_case_non_constant_null/metadata.json index cc2f3624ef..0967ef424b 100644 --- a/parser/testdata/03654_case_non_constant_null/metadata.json +++ b/parser/testdata/03654_case_non_constant_null/metadata.json @@ -1 +1 @@ -{"explain_todo":{"stmt4":true}} +{} From a7ea9a03a25b49aecc03f5547c67c04e608422d0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:15:00 +0000 Subject: [PATCH 16/47] Add BACKUP and RESTORE statement support Implement parsing and explain output for BACKUP and RESTORE statements: - Add BACKUP and RESTORE tokens - Add BackupQuery and RestoreQuery AST types - Add parseBackup() and parseRestore() parser functions - Add explain handlers for both query types Fixes tests: 03286_backup_to_null and 03593_backup_with_broken_projection --- ast/ast.go | 34 ++++ internal/explain/explain.go | 4 + internal/explain/statements.go | 64 ++++++++ parser/parser.go | 148 ++++++++++++++++++ .../03286_backup_to_null/metadata.json | 7 +- .../metadata.json | 6 +- token/token.go | 4 + 7 files changed, 256 insertions(+), 11 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 8b072c3e86..d9b99a9724 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -765,6 +765,40 @@ func (a *AttachQuery) Pos() token.Position { return a.Position } func (a *AttachQuery) End() token.Position { return a.Position } func (a *AttachQuery) statementNode() {} +// BackupQuery represents a BACKUP statement. +type BackupQuery struct { + Position token.Position `json:"-"` + Database string `json:"database,omitempty"` + Table string `json:"table,omitempty"` + Dictionary string `json:"dictionary,omitempty"` + All bool `json:"all,omitempty"` // BACKUP ALL + Temporary bool `json:"temporary,omitempty"` + Target *FunctionCall `json:"target,omitempty"` // Disk('path') or Null + Settings []*SettingExpr `json:"settings,omitempty"` + Format string `json:"format,omitempty"` +} + +func (b *BackupQuery) Pos() token.Position { return b.Position } +func (b *BackupQuery) End() token.Position { return b.Position } +func (b *BackupQuery) statementNode() {} + +// RestoreQuery represents a RESTORE statement. +type RestoreQuery struct { + Position token.Position `json:"-"` + Database string `json:"database,omitempty"` + Table string `json:"table,omitempty"` + Dictionary string `json:"dictionary,omitempty"` + All bool `json:"all,omitempty"` // RESTORE ALL + Temporary bool `json:"temporary,omitempty"` + Source *FunctionCall `json:"source,omitempty"` // Disk('path') or Null + Settings []*SettingExpr `json:"settings,omitempty"` + Format string `json:"format,omitempty"` +} + +func (r *RestoreQuery) Pos() token.Position { return r.Position } +func (r *RestoreQuery) End() token.Position { return r.Position } +func (r *RestoreQuery) statementNode() {} + // DescribeQuery represents a DESCRIBE statement. type DescribeQuery struct { Position token.Position `json:"-"` diff --git a/internal/explain/explain.go b/internal/explain/explain.go index 947ec23f84..9afcbf264d 100644 --- a/internal/explain/explain.go +++ b/internal/explain/explain.go @@ -238,6 +238,10 @@ func Node(sb *strings.Builder, node interface{}, depth int) { explainDetachQuery(sb, n, indent) case *ast.AttachQuery: explainAttachQuery(sb, n, indent, depth) + case *ast.BackupQuery: + explainBackupQuery(sb, n, indent) + case *ast.RestoreQuery: + explainRestoreQuery(sb, n, indent) case *ast.AlterQuery: explainAlterQuery(sb, n, indent, depth) case *ast.OptimizeQuery: diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 64fb1b1ecf..3cd61a9941 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -1456,6 +1456,70 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, } } +func explainBackupQuery(sb *strings.Builder, n *ast.BackupQuery, indent string) { + if n == nil { + fmt.Fprintf(sb, "%s*ast.BackupQuery\n", indent) + return + } + + // Count children: function target + format identifier + children := 0 + if n.Target != nil { + children++ + } + if n.Format != "" { + children++ + } + + if children > 0 { + fmt.Fprintf(sb, "%sBackupQuery (children %d)\n", indent, children) + } else { + fmt.Fprintf(sb, "%sBackupQuery\n", indent) + } + + // Output target function (e.g., Null, Disk('path')) + if n.Target != nil { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Target.Name) + } + + // Output format identifier + if n.Format != "" { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Format) + } +} + +func explainRestoreQuery(sb *strings.Builder, n *ast.RestoreQuery, indent string) { + if n == nil { + fmt.Fprintf(sb, "%s*ast.RestoreQuery\n", indent) + return + } + + // Count children: function source + format identifier + children := 0 + if n.Source != nil { + children++ + } + if n.Format != "" { + children++ + } + + if children > 0 { + fmt.Fprintf(sb, "%sRestoreQuery (children %d)\n", indent, children) + } else { + fmt.Fprintf(sb, "%sRestoreQuery\n", indent) + } + + // Output source function (e.g., Null, Disk('path')) + if n.Source != nil { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Source.Name) + } + + // Output format identifier + if n.Format != "" { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Format) + } +} + func explainAlterQuery(sb *strings.Builder, n *ast.AlterQuery, indent string, depth int) { if n == nil { fmt.Fprintf(sb, "%s*ast.AlterQuery\n", indent) diff --git a/parser/parser.go b/parser/parser.go index a5b8bdeb3f..a06ac9770a 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -332,6 +332,10 @@ func (p *Parser) parseStatement() ast.Statement { return p.parseTransactionControl() case token.ROLLBACK: return p.parseTransactionControl() + case token.BACKUP: + return p.parseBackup() + case token.RESTORE: + return p.parseRestore() default: p.errors = append(p.errors, fmt.Errorf("unexpected token %s at line %d, column %d", p.current.Token, p.current.Pos.Line, p.current.Pos.Column)) @@ -7815,6 +7819,150 @@ func (p *Parser) parseRevoke() *ast.GrantQuery { return grant } +func (p *Parser) parseBackup() *ast.BackupQuery { + backup := &ast.BackupQuery{ + Position: p.current.Pos, + } + + p.nextToken() // skip BACKUP + + // Parse what to backup: TABLE, DATABASE, DICTIONARY, ALL, TEMPORARY + if p.currentIs(token.TABLE) { + p.nextToken() + name := p.parseIdentifierName() + if p.currentIs(token.DOT) { + p.nextToken() + backup.Database = name + backup.Table = p.parseIdentifierName() + } else { + backup.Table = name + } + } else if p.currentIs(token.DATABASE) { + p.nextToken() + backup.Database = p.parseIdentifierName() + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "DICTIONARY" { + p.nextToken() + backup.Dictionary = p.parseIdentifierName() + } else if p.currentIs(token.ALL) { + backup.All = true + p.nextToken() + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "TEMPORARY" { + backup.Temporary = true + p.nextToken() + if p.currentIs(token.TABLE) { + p.nextToken() + } + } + + // Parse TO clause + if p.currentIs(token.TO) { + p.nextToken() + // Parse target - it's a function call like Null or Disk('path') + if p.currentIs(token.NULL) || p.currentIs(token.IDENT) { + name := p.current.Value + p.nextToken() + fn := &ast.FunctionCall{ + Position: backup.Position, + Name: name, + } + if p.currentIs(token.LPAREN) { + p.nextToken() + if !p.currentIs(token.RPAREN) { + fn.Arguments = p.parseExpressionList() + } + p.expect(token.RPAREN) + } + backup.Target = fn + } + } + + // Parse SETTINGS clause + if p.currentIs(token.SETTINGS) { + p.nextToken() + backup.Settings = p.parseSettingsList() + } + + // Parse FORMAT clause + if p.currentIs(token.FORMAT) { + p.nextToken() + backup.Format = p.parseIdentifierName() + } + + return backup +} + +func (p *Parser) parseRestore() *ast.RestoreQuery { + restore := &ast.RestoreQuery{ + Position: p.current.Pos, + } + + p.nextToken() // skip RESTORE + + // Parse what to restore: TABLE, DATABASE, DICTIONARY, ALL, TEMPORARY + if p.currentIs(token.TABLE) { + p.nextToken() + name := p.parseIdentifierName() + if p.currentIs(token.DOT) { + p.nextToken() + restore.Database = name + restore.Table = p.parseIdentifierName() + } else { + restore.Table = name + } + } else if p.currentIs(token.DATABASE) { + p.nextToken() + restore.Database = p.parseIdentifierName() + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "DICTIONARY" { + p.nextToken() + restore.Dictionary = p.parseIdentifierName() + } else if p.currentIs(token.ALL) { + restore.All = true + p.nextToken() + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "TEMPORARY" { + restore.Temporary = true + p.nextToken() + if p.currentIs(token.TABLE) { + p.nextToken() + } + } + + // Parse FROM clause + if p.currentIs(token.FROM) { + p.nextToken() + // Parse source - it's a function call like Null or Disk('path') + if p.currentIs(token.NULL) || p.currentIs(token.IDENT) { + name := p.current.Value + p.nextToken() + fn := &ast.FunctionCall{ + Position: restore.Position, + Name: name, + } + if p.currentIs(token.LPAREN) { + p.nextToken() + if !p.currentIs(token.RPAREN) { + fn.Arguments = p.parseExpressionList() + } + p.expect(token.RPAREN) + } + restore.Source = fn + } + } + + // Parse SETTINGS clause + if p.currentIs(token.SETTINGS) { + p.nextToken() + restore.Settings = p.parseSettingsList() + } + + // Parse FORMAT clause + if p.currentIs(token.FORMAT) { + p.nextToken() + restore.Format = p.parseIdentifierName() + } + + return restore +} + // parseTransactionControl handles BEGIN, COMMIT, ROLLBACK, and SET TRANSACTION SNAPSHOT statements func (p *Parser) parseTransactionControl() *ast.TransactionControlQuery { query := &ast.TransactionControlQuery{ diff --git a/parser/testdata/03286_backup_to_null/metadata.json b/parser/testdata/03286_backup_to_null/metadata.json index 943b275814..0967ef424b 100644 --- a/parser/testdata/03286_backup_to_null/metadata.json +++ b/parser/testdata/03286_backup_to_null/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt4": true, - "stmt6": true - } -} +{} diff --git a/parser/testdata/03593_backup_with_broken_projection/metadata.json b/parser/testdata/03593_backup_with_broken_projection/metadata.json index 3a06a4a1ac..0967ef424b 100644 --- a/parser/testdata/03593_backup_with_broken_projection/metadata.json +++ b/parser/testdata/03593_backup_with_broken_projection/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt5": true - } -} +{} diff --git a/token/token.go b/token/token.go index 6649302277..88e2cc1584 100644 --- a/token/token.go +++ b/token/token.go @@ -63,6 +63,7 @@ const ( ASC ASOF ATTACH + BACKUP BEGIN BETWEEN BOTH @@ -163,6 +164,7 @@ const ( REGEXP RENAME REPLACE + RESTORE REVOKE RIGHT ROLLBACK @@ -262,6 +264,7 @@ var tokens = [...]string{ ASC: "ASC", ASOF: "ASOF", ATTACH: "ATTACH", + BACKUP: "BACKUP", BEGIN: "BEGIN", BETWEEN: "BETWEEN", BOTH: "BOTH", @@ -362,6 +365,7 @@ var tokens = [...]string{ REGEXP: "REGEXP", RENAME: "RENAME", REPLACE: "REPLACE", + RESTORE: "RESTORE", REVOKE: "REVOKE", RIGHT: "RIGHT", ROLLBACK: "ROLLBACK", From f5ddaca2e77a161c4f4a383bcbe3b126837f9f9c Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:23:18 +0000 Subject: [PATCH 17/47] Distinguish EXCEPT set operation from column exclusion When parsing expressions, check if EXCEPT is followed by SELECT to determine if it's a set operation (SELECT (*) EXCEPT SELECT 1) vs column exclusion (SELECT * EXCEPT (col1, col2)). Fixes test: 03457_inconsistent_formatting_except --- parser/expression.go | 6 ++++++ .../03457_inconsistent_formatting_except/metadata.json | 7 +------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/parser/expression.go b/parser/expression.go index 3eff4f3716..c9928c0a35 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -552,6 +552,12 @@ func (p *Parser) parseInfixExpression(left ast.Expression) ast.Expression { return p.parseLambda(left) case token.EXCEPT: // Handle * EXCEPT (col1, col2) or COLUMNS(...) EXCEPT (col1, col2) + // But NOT "SELECT (*) EXCEPT SELECT 1" which is a set operation + // Check if EXCEPT is followed by SELECT - if so, it's a set operation + if p.peekIs(token.SELECT) { + // This is EXCEPT as set operation, not column exclusion + return left + } if asterisk, ok := left.(*ast.Asterisk); ok { return p.parseAsteriskExcept(asterisk) } diff --git a/parser/testdata/03457_inconsistent_formatting_except/metadata.json b/parser/testdata/03457_inconsistent_formatting_except/metadata.json index 323c7c4c53..0967ef424b 100644 --- a/parser/testdata/03457_inconsistent_formatting_except/metadata.json +++ b/parser/testdata/03457_inconsistent_formatting_except/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt2": true, - "stmt6": true - } -} +{} From db52b3a7bccad95699d52afce43063c65aedd130 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:27:06 +0000 Subject: [PATCH 18/47] Include function arguments in BACKUP/RESTORE explain output Update explainBackupQuery and explainRestoreQuery to output function arguments (e.g., Memory('b1') shows the ExpressionList with 'b1'). Fixes tests: 03286_backup_to_memory, 03276_database_backup_merge_tree_table_file_engine, 03278_database_backup_merge_tree_table_disk_engine, 03279_database_backup_database_disk_engine --- internal/explain/statements.go | 24 +++++++++++++++---- .../metadata.json | 6 +---- .../metadata.json | 6 +---- .../metadata.json | 6 +---- .../03286_backup_to_memory/metadata.json | 7 +----- 5 files changed, 24 insertions(+), 25 deletions(-) diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 3cd61a9941..008aded2e5 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -1477,9 +1477,17 @@ func explainBackupQuery(sb *strings.Builder, n *ast.BackupQuery, indent string) fmt.Fprintf(sb, "%sBackupQuery\n", indent) } - // Output target function (e.g., Null, Disk('path')) + // Output target function (e.g., Null, Disk('path'), Memory('b1')) if n.Target != nil { - fmt.Fprintf(sb, "%s Function %s\n", indent, n.Target.Name) + if len(n.Target.Arguments) > 0 { + fmt.Fprintf(sb, "%s Function %s (children 1)\n", indent, n.Target.Name) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Target.Arguments)) + for _, arg := range n.Target.Arguments { + Node(sb, arg, 3) + } + } else { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Target.Name) + } } // Output format identifier @@ -1509,9 +1517,17 @@ func explainRestoreQuery(sb *strings.Builder, n *ast.RestoreQuery, indent string fmt.Fprintf(sb, "%sRestoreQuery\n", indent) } - // Output source function (e.g., Null, Disk('path')) + // Output source function (e.g., Null, Disk('path'), Memory('b1')) if n.Source != nil { - fmt.Fprintf(sb, "%s Function %s\n", indent, n.Source.Name) + if len(n.Source.Arguments) > 0 { + fmt.Fprintf(sb, "%s Function %s (children 1)\n", indent, n.Source.Name) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Source.Arguments)) + for _, arg := range n.Source.Arguments { + Node(sb, arg, 3) + } + } else { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Source.Name) + } } // Output format identifier diff --git a/parser/testdata/03276_database_backup_merge_tree_table_file_engine/metadata.json b/parser/testdata/03276_database_backup_merge_tree_table_file_engine/metadata.json index dbdbb76d4f..0967ef424b 100644 --- a/parser/testdata/03276_database_backup_merge_tree_table_file_engine/metadata.json +++ b/parser/testdata/03276_database_backup_merge_tree_table_file_engine/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt6": true - } -} +{} diff --git a/parser/testdata/03278_database_backup_merge_tree_table_disk_engine/metadata.json b/parser/testdata/03278_database_backup_merge_tree_table_disk_engine/metadata.json index dbdbb76d4f..0967ef424b 100644 --- a/parser/testdata/03278_database_backup_merge_tree_table_disk_engine/metadata.json +++ b/parser/testdata/03278_database_backup_merge_tree_table_disk_engine/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt6": true - } -} +{} diff --git a/parser/testdata/03279_database_backup_database_disk_engine/metadata.json b/parser/testdata/03279_database_backup_database_disk_engine/metadata.json index f4c74e32be..0967ef424b 100644 --- a/parser/testdata/03279_database_backup_database_disk_engine/metadata.json +++ b/parser/testdata/03279_database_backup_database_disk_engine/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt10": true - } -} +{} diff --git a/parser/testdata/03286_backup_to_memory/metadata.json b/parser/testdata/03286_backup_to_memory/metadata.json index 943b275814..0967ef424b 100644 --- a/parser/testdata/03286_backup_to_memory/metadata.json +++ b/parser/testdata/03286_backup_to_memory/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt4": true, - "stmt6": true - } -} +{} From e2dfe9c415884b9c5d52db80809d51f28b733e33 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:33:59 +0000 Subject: [PATCH 19/47] Allow keywords as CTE names in WITH clause Support using keywords like 'table' as CTE names (e.g., WITH table AS (SELECT 1 AS key)). Exclude NULL/TRUE/FALSE since they have special literal meanings. Fixes test: 03518_left_to_cross_incorrect --- parser/parser.go | 6 +++++- .../testdata/03518_left_to_cross_incorrect/metadata.json | 7 +------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index a06ac9770a..78cbce75ae 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1390,7 +1390,11 @@ func (p *Parser) parseWithClause() []ast.Expression { // Check if it's the "name AS (subquery)" syntax (standard SQL CTE) // or "expr AS name" syntax (ClickHouse scalar) - if p.currentIs(token.IDENT) && p.peekIs(token.AS) { + // Allow keywords as CTE names (e.g., WITH table AS (SELECT 1)) + // But exclude NULL/TRUE/FALSE which have special literal meanings + isNameToken := p.currentIs(token.IDENT) || + (p.current.Token.IsKeyword() && !p.currentIs(token.NULL) && !p.currentIs(token.TRUE) && !p.currentIs(token.FALSE)) + if isNameToken && p.peekIs(token.AS) { // This could be "name AS (subquery)" or "ident AS alias" for scalar // Need to look ahead to determine: if IDENT AS LPAREN (SELECT...) -> CTE // If IDENT AS IDENT -> scalar WITH (first ident is expression, second is alias) diff --git a/parser/testdata/03518_left_to_cross_incorrect/metadata.json b/parser/testdata/03518_left_to_cross_incorrect/metadata.json index bc141058a4..0967ef424b 100644 --- a/parser/testdata/03518_left_to_cross_incorrect/metadata.json +++ b/parser/testdata/03518_left_to_cross_incorrect/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt2": true, - "stmt3": true - } -} +{} From 244b71f9ca7d203e702cc9ede3740c7cea38ca99 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:38:38 +0000 Subject: [PATCH 20/47] Support WITH TIES modifier after TOP clause Handle `SELECT TOP n WITH TIES *` syntax by consuming the WITH TIES tokens after parsing the TOP expression. Fixes test: 03725_empty_tuple_some_limit_with_ties_distinct --- parser/parser.go | 5 +++++ .../metadata.json | 7 +------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 78cbce75ae..c45de3d441 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1080,6 +1080,11 @@ func (p *Parser) parseSelectInternal(preParsedWith []ast.Expression) *ast.Select p.nextToken() // Use MUL_PREC to stop at * (which would be parsed as column selector, not multiplication) sel.Top = p.parseExpression(MUL_PREC) + // WITH TIES modifier after TOP + if p.currentIs(token.WITH) && p.peekIs(token.TIES) { + p.nextToken() // skip WITH + p.nextToken() // skip TIES + } } // Parse column list diff --git a/parser/testdata/03725_empty_tuple_some_limit_with_ties_distinct/metadata.json b/parser/testdata/03725_empty_tuple_some_limit_with_ties_distinct/metadata.json index 943b275814..0967ef424b 100644 --- a/parser/testdata/03725_empty_tuple_some_limit_with_ties_distinct/metadata.json +++ b/parser/testdata/03725_empty_tuple_some_limit_with_ties_distinct/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt4": true, - "stmt6": true - } -} +{} From 6417e6ef7089f3ef8e404ae75a63a456570f154f Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:42:16 +0000 Subject: [PATCH 21/47] Support SHOW TABLE and SHOW DATABASE as aliases Treat `SHOW TABLE tablename` as equivalent to `SHOW CREATE TABLE tablename` and `SHOW DATABASE dbname` as equivalent to `SHOW CREATE DATABASE dbname`. Fixes tests: 02710_show_table, 03663_parameterized_views_formatting_of_substitutions_excessive_backticks --- parser/parser.go | 8 ++++++++ parser/testdata/02710_show_table/metadata.json | 7 +------ .../metadata.json | 6 +----- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index c45de3d441..ab24736a32 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -6278,9 +6278,17 @@ func (p *Parser) parseShow() ast.Statement { case token.TABLES: show.ShowType = ast.ShowTables p.nextToken() + case token.TABLE: + // SHOW TABLE is equivalent to SHOW CREATE TABLE + show.ShowType = ast.ShowCreate + p.nextToken() case token.DATABASES: show.ShowType = ast.ShowDatabases p.nextToken() + case token.DATABASE: + // SHOW DATABASE is equivalent to SHOW CREATE DATABASE + show.ShowType = ast.ShowCreateDB + p.nextToken() case token.COLUMNS: show.ShowType = ast.ShowColumns p.nextToken() diff --git a/parser/testdata/02710_show_table/metadata.json b/parser/testdata/02710_show_table/metadata.json index 60106a3b25..0967ef424b 100644 --- a/parser/testdata/02710_show_table/metadata.json +++ b/parser/testdata/02710_show_table/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt3": true, - "stmt9": true - } -} +{} diff --git a/parser/testdata/03663_parameterized_views_formatting_of_substitutions_excessive_backticks/metadata.json b/parser/testdata/03663_parameterized_views_formatting_of_substitutions_excessive_backticks/metadata.json index 3a06a4a1ac..0967ef424b 100644 --- a/parser/testdata/03663_parameterized_views_formatting_of_substitutions_excessive_backticks/metadata.json +++ b/parser/testdata/03663_parameterized_views_formatting_of_substitutions_excessive_backticks/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt5": true - } -} +{} From c32258190aacd61ca6947f2a48c5b3714fd976a6 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:46:00 +0000 Subject: [PATCH 22/47] Strip session/global prefix from MySQL system variables For @@session.varname or @@global.varname syntax, strip the session/global scope qualifier since ClickHouse treats them as just @@varname in EXPLAIN. Fixes test: 01337_mysql_global_variables --- parser/expression.go | 16 ++++++++++------ .../01337_mysql_global_variables/metadata.json | 7 +------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/parser/expression.go b/parser/expression.go index c9928c0a35..4ec412c559 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -626,13 +626,17 @@ func (p *Parser) parseIdentifierOrFunction() ast.Expression { // Convert to globalVariable('varname') function call with alias @@varname if strings.HasPrefix(name, "@@") { varName := name[2:] // Strip @@ - // Handle @@session.var or @@global.var + // Handle @@session.var or @@global.var - strip the session/global prefix if p.currentIs(token.DOT) { - p.nextToken() - if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { - varName = varName + "." + p.current.Value - name = name + "." + p.current.Value - p.nextToken() + upper := strings.ToUpper(varName) + if upper == "SESSION" || upper == "GLOBAL" { + // Skip the session/global qualifier + p.nextToken() // skip DOT + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + varName = p.current.Value + name = "@@" + p.current.Value + p.nextToken() + } } } return &ast.FunctionCall{ diff --git a/parser/testdata/01337_mysql_global_variables/metadata.json b/parser/testdata/01337_mysql_global_variables/metadata.json index 0f293987f1..0967ef424b 100644 --- a/parser/testdata/01337_mysql_global_variables/metadata.json +++ b/parser/testdata/01337_mysql_global_variables/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt5": true, - "stmt6": true - } -} +{} From 8219118b0105c46fb758107f451e526d59c324f8 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:50:22 +0000 Subject: [PATCH 23/47] Add \e escape sequence support for PHP/MySQL style strings Handle the \e escape sequence (escape character, ASCII 27) in string literals for MySQL/PHP compatibility. Fixes test: 01284_escape_sequences_php_mysql_style --- lexer/lexer.go | 2 ++ .../01284_escape_sequences_php_mysql_style/metadata.json | 7 +------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index d0cb866ce9..845b1a7d48 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -500,6 +500,8 @@ func (l *Lexer) readString(quote rune) Item { sb.WriteRune('\f') case 'v': sb.WriteRune('\v') + case 'e': + sb.WriteRune('\x1b') // escape character (ASCII 27) case 'x': // Hex escape: \xNN l.readChar() diff --git a/parser/testdata/01284_escape_sequences_php_mysql_style/metadata.json b/parser/testdata/01284_escape_sequences_php_mysql_style/metadata.json index 04dec16ad7..0967ef424b 100644 --- a/parser/testdata/01284_escape_sequences_php_mysql_style/metadata.json +++ b/parser/testdata/01284_escape_sequences_php_mysql_style/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt1": true, - "stmt5": true - } -} +{} From ebb30724660cce63492469023a45146a69f6a553 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 03:55:32 +0000 Subject: [PATCH 24/47] Fix OFFSET ROW parsing to accept both singular and plural forms The SQL standard OFFSET...FETCH syntax uses singular "ROW" (e.g., "OFFSET 1 ROW") but the parser only checked for "ROWS" (plural). This caused ROW to be incorrectly parsed as a subquery alias. Fixed by checking for both "ROW" and "ROWS" when consuming the optional keyword after the OFFSET expression. --- parser/parser.go | 4 ++-- .../01525_select_with_offset_fetch_clause/metadata.json | 7 +------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index ab24736a32..6eea430306 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1261,8 +1261,8 @@ func (p *Parser) parseSelectInternal(preParsedWith []ast.Expression) *ast.Select if p.currentIs(token.OFFSET) { p.nextToken() sel.Offset = p.parseExpression(LOWEST) - // Skip optional ROWS keyword - if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "ROWS" { + // Skip optional ROW/ROWS keyword (SQL standard) + if p.currentIs(token.IDENT) && (strings.ToUpper(p.current.Value) == "ROW" || strings.ToUpper(p.current.Value) == "ROWS") { p.nextToken() } // LIMIT n OFFSET m BY expr syntax - handle BY after OFFSET diff --git a/parser/testdata/01525_select_with_offset_fetch_clause/metadata.json b/parser/testdata/01525_select_with_offset_fetch_clause/metadata.json index 0f293987f1..0967ef424b 100644 --- a/parser/testdata/01525_select_with_offset_fetch_clause/metadata.json +++ b/parser/testdata/01525_select_with_offset_fetch_clause/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt5": true, - "stmt6": true - } -} +{} From a1cb2fd9445ab63555a89a2339774858ed8cb51f Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 04:00:45 +0000 Subject: [PATCH 25/47] Support empty USING () clause in JOINs When parsing USING (), distinguish between "no USING clause" (nil) and "empty USING clause" (empty non-nil slice). This ensures the explain output correctly shows the ExpressionList node even when empty. --- internal/explain/tables.go | 15 ++++++++++----- parser/parser.go | 7 ++++++- .../metadata.json | 7 +------ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/internal/explain/tables.go b/internal/explain/tables.go index c4599aa10c..65b38bb2ee 100644 --- a/internal/explain/tables.go +++ b/internal/explain/tables.go @@ -312,7 +312,7 @@ func explainTableJoin(sb *strings.Builder, n *ast.TableJoin, indent string, dept if n.On != nil { children++ } - if len(n.Using) > 0 { + if n.Using != nil { children++ } if children > 0 { @@ -323,10 +323,15 @@ func explainTableJoin(sb *strings.Builder, n *ast.TableJoin, indent string, dept if n.On != nil { Node(sb, n.On, depth+1) } - if len(n.Using) > 0 { - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Using)) - for _, u := range n.Using { - Node(sb, u, depth+2) + if n.Using != nil { + if len(n.Using) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Using)) + for _, u := range n.Using { + Node(sb, u, depth+2) + } + } else { + // Empty USING () + fmt.Fprintf(sb, "%s ExpressionList\n", indent) } } } diff --git a/parser/parser.go b/parser/parser.go index 6eea430306..3ee091f373 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1658,7 +1658,12 @@ func (p *Parser) parseTableElementWithJoin() *ast.TablesInSelectQueryElement { p.nextToken() if p.currentIs(token.LPAREN) { p.nextToken() - join.Using = p.parseExpressionList() + exprs := p.parseExpressionList() + if exprs == nil { + // Empty USING () - use empty non-nil slice to distinguish from no USING + exprs = []ast.Expression{} + } + join.Using = exprs p.expect(token.RPAREN) } else { join.Using = p.parseExpressionList() diff --git a/parser/testdata/03538_crash_in_parallel_hash_with_empty_using/metadata.json b/parser/testdata/03538_crash_in_parallel_hash_with_empty_using/metadata.json index bc141058a4..0967ef424b 100644 --- a/parser/testdata/03538_crash_in_parallel_hash_with_empty_using/metadata.json +++ b/parser/testdata/03538_crash_in_parallel_hash_with_empty_using/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt2": true, - "stmt3": true - } -} +{} From b482d3d4c58203da962bc00a77589f31ccb4c315 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 04:05:32 +0000 Subject: [PATCH 26/47] Fix SYSTEM command parsing for TTL MERGES table names When parsing SYSTEM STOP/START TTL MERGES commands, the table name was being consumed as part of the command because isSystemCommandKeyword() uses case-insensitive matching. A table named 'ttl' would match 'TTL'. Added check to break command parsing after certain complete command suffixes (MERGES, MOVES, FETCHES, SENDS, MUTATIONS) so the next token is correctly parsed as the table name. --- parser/parser.go | 13 +++++++++++++ .../00976_system_stop_ttl_merges/metadata.json | 7 +------ .../01282_system_parts_ttl_info/metadata.json | 7 +------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 3ee091f373..6ae20a16b2 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -6678,6 +6678,19 @@ func (p *Parser) parseSystem() *ast.SystemQuery { if p.currentIs(token.IDENT) && p.peekIs(token.DOT) { break } + // Check if we've completed a command that expects a table name next + // Commands like STOP MERGES, START MERGES, STOP TTL MERGES, START TTL MERGES, etc. + if len(parts) > 0 { + upperCmd := strings.ToUpper(strings.Join(parts, " ")) + if strings.HasSuffix(upperCmd, " MERGES") || + strings.HasSuffix(upperCmd, " MOVES") || + strings.HasSuffix(upperCmd, " FETCHES") || + strings.HasSuffix(upperCmd, " SENDS") || + strings.HasSuffix(upperCmd, " MUTATIONS") { + // Next token should be the table name + break + } + } // Check if this is a plain IDENT (not a command keyword) followed by end-of-statement // This indicates it's likely a table name, not part of the command // Exception: after FAILPOINT, the identifier is the failpoint name (part of command) diff --git a/parser/testdata/00976_system_stop_ttl_merges/metadata.json b/parser/testdata/00976_system_stop_ttl_merges/metadata.json index 60106a3b25..0967ef424b 100644 --- a/parser/testdata/00976_system_stop_ttl_merges/metadata.json +++ b/parser/testdata/00976_system_stop_ttl_merges/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt3": true, - "stmt9": true - } -} +{} diff --git a/parser/testdata/01282_system_parts_ttl_info/metadata.json b/parser/testdata/01282_system_parts_ttl_info/metadata.json index 22ed9a7783..0967ef424b 100644 --- a/parser/testdata/01282_system_parts_ttl_info/metadata.json +++ b/parser/testdata/01282_system_parts_ttl_info/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt3": true, - "stmt6": true - } -} +{} From 6a789457a423855875e0b82b9dc0d700c3196316 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 04:10:25 +0000 Subject: [PATCH 27/47] Allow EXISTS keyword as column identifier when not followed by ( In queries like 'WHERE exists' where 'exists' is a column name, the parser was treating EXISTS as the start of an EXISTS(subquery) expression and failing when no ( was found. Now if EXISTS is not followed by (, it's treated as an identifier (column name) instead of the subquery existence operator. --- parser/expression.go | 18 +++++++++++++----- .../metadata.json | 7 +------ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/parser/expression.go b/parser/expression.go index 4ec412c559..0580a6ce62 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -1669,15 +1669,23 @@ func (p *Parser) parseInterval() ast.Expression { } func (p *Parser) parseExists() ast.Expression { - expr := &ast.ExistsExpr{ - Position: p.current.Pos, - } + pos := p.current.Pos p.nextToken() // skip EXISTS - if !p.expect(token.LPAREN) { - return nil + // If not followed by (, treat EXISTS as an identifier (column name) + if !p.currentIs(token.LPAREN) { + return &ast.Identifier{ + Position: pos, + Parts: []string{"exists"}, + } } + expr := &ast.ExistsExpr{ + Position: pos, + } + + p.nextToken() // skip ( + expr.Query = p.parseSelectWithUnion() p.expect(token.RPAREN) diff --git a/parser/testdata/03100_lwu_45_query_condition_cache/metadata.json b/parser/testdata/03100_lwu_45_query_condition_cache/metadata.json index afaaa4b0a6..0967ef424b 100644 --- a/parser/testdata/03100_lwu_45_query_condition_cache/metadata.json +++ b/parser/testdata/03100_lwu_45_query_condition_cache/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt7": true, - "stmt9": true - } -} +{} From 0c3ede973a3eb08a6bb65b0f369ec6d8773e7c6f Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 04:19:10 +0000 Subject: [PATCH 28/47] Fix table alias parsing order - alias before FINAL In ClickHouse syntax, table aliases come BEFORE the FINAL keyword: FROM table_name t FINAL WHERE ... The parser was checking for FINAL before alias, which meant FINAL wasn't being consumed when an alias was present. This caused the subsequent UNION or WHERE clause to be missed. Reordered to parse alias first, then FINAL, then SAMPLE. --- parser/parser.go | 38 ++++++++++--------- .../02380_analyzer_join_sample/metadata.json | 6 +-- .../02381_analyzer_join_final/metadata.json | 6 +-- .../metadata.json | 6 +-- .../testdata/03037_union_view/metadata.json | 7 +--- .../metadata.json | 6 +-- .../03400_distributed_final/metadata.json | 6 +-- 7 files changed, 26 insertions(+), 49 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 6ae20a16b2..cd82d43f46 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1725,7 +1725,26 @@ func (p *Parser) parseTableExpression() *ast.TableExpression { } } - // Handle FINAL + // Handle alias (keywords like LEFT, RIGHT, FIRST can be used as aliases after AS, + // or without AS if they're not clause keywords) + // In ClickHouse, alias comes BEFORE FINAL: FROM table t FINAL + if p.currentIs(token.AS) { + p.nextToken() + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + expr.Alias = p.current.Value + p.nextToken() + } + } else if (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) && !p.isKeywordForClause() && !p.currentIs(token.FINAL) && !p.currentIs(token.SAMPLE) { + // Don't consume PARALLEL as alias if followed by WITH (parallel query syntax) + if p.currentIs(token.PARALLEL) && p.peekIs(token.WITH) { + return expr + } + // Don't consume FINAL or SAMPLE as alias + expr.Alias = p.current.Value + p.nextToken() + } + + // Handle FINAL (after alias) if p.currentIs(token.FINAL) { expr.Final = true p.nextToken() @@ -1744,23 +1763,6 @@ func (p *Parser) parseTableExpression() *ast.TableExpression { } } - // Handle alias (keywords like LEFT, RIGHT, FIRST can be used as aliases after AS, - // or without AS if they're not clause keywords) - if p.currentIs(token.AS) { - p.nextToken() - if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { - expr.Alias = p.current.Value - p.nextToken() - } - } else if (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) && !p.isKeywordForClause() { - // Don't consume PARALLEL as alias if followed by WITH (parallel query syntax) - if p.currentIs(token.PARALLEL) && p.peekIs(token.WITH) { - return expr - } - expr.Alias = p.current.Value - p.nextToken() - } - return expr } diff --git a/parser/testdata/02380_analyzer_join_sample/metadata.json b/parser/testdata/02380_analyzer_join_sample/metadata.json index 342b3ff5b4..0967ef424b 100644 --- a/parser/testdata/02380_analyzer_join_sample/metadata.json +++ b/parser/testdata/02380_analyzer_join_sample/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt8": true - } -} +{} diff --git a/parser/testdata/02381_analyzer_join_final/metadata.json b/parser/testdata/02381_analyzer_join_final/metadata.json index c45b7602ba..0967ef424b 100644 --- a/parser/testdata/02381_analyzer_join_final/metadata.json +++ b/parser/testdata/02381_analyzer_join_final/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt12": true - } -} +{} diff --git a/parser/testdata/02790_optimize_skip_unused_shards_join/metadata.json b/parser/testdata/02790_optimize_skip_unused_shards_join/metadata.json index 7ad5569408..0967ef424b 100644 --- a/parser/testdata/02790_optimize_skip_unused_shards_join/metadata.json +++ b/parser/testdata/02790_optimize_skip_unused_shards_join/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt9": true - } -} +{} diff --git a/parser/testdata/03037_union_view/metadata.json b/parser/testdata/03037_union_view/metadata.json index 0f293987f1..0967ef424b 100644 --- a/parser/testdata/03037_union_view/metadata.json +++ b/parser/testdata/03037_union_view/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt5": true, - "stmt6": true - } -} +{} diff --git a/parser/testdata/03254_last_2_samples_aggregate_function/metadata.json b/parser/testdata/03254_last_2_samples_aggregate_function/metadata.json index 7b4455cd5f..0967ef424b 100644 --- a/parser/testdata/03254_last_2_samples_aggregate_function/metadata.json +++ b/parser/testdata/03254_last_2_samples_aggregate_function/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt16": true - } -} +{} diff --git a/parser/testdata/03400_distributed_final/metadata.json b/parser/testdata/03400_distributed_final/metadata.json index 7ad5569408..0967ef424b 100644 --- a/parser/testdata/03400_distributed_final/metadata.json +++ b/parser/testdata/03400_distributed_final/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt9": true - } -} +{} From 705a905e107c65ea0c8bb7da41d2cbb876f5eaeb Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 04:25:21 +0000 Subject: [PATCH 29/47] Fix ADD CONSTRAINT explain output to show expression For ALTER TABLE ADD CONSTRAINT, the explain output was showing just the constraint name as an Identifier. Changed to show the constraint's expression properly: Constraint (children 1) Subquery/Function/etc (the expression) This fixes subquery constraints like CHECK (SELECT 1). --- internal/explain/statements.go | 7 ++++++- .../metadata.json | 7 +------ .../03594_constraint_subqery_logical_error/metadata.json | 1 - 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 008aded2e5..7930038cd3 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -1723,7 +1723,12 @@ func explainAlterCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent stri } case ast.AlterAddConstraint: if cmd.Constraint != nil { - fmt.Fprintf(sb, "%s Identifier %s\n", indent, cmd.Constraint.Name) + if cmd.Constraint.Expression != nil { + fmt.Fprintf(sb, "%s Constraint (children 1)\n", indent) + Node(sb, cmd.Constraint.Expression, depth+2) + } else { + fmt.Fprintf(sb, "%s Constraint\n", indent) + } } case ast.AlterDropConstraint: if cmd.ConstraintName != "" { diff --git a/parser/testdata/00988_constraints_replication_zookeeper_long/metadata.json b/parser/testdata/00988_constraints_replication_zookeeper_long/metadata.json index e80c16c5c9..0967ef424b 100644 --- a/parser/testdata/00988_constraints_replication_zookeeper_long/metadata.json +++ b/parser/testdata/00988_constraints_replication_zookeeper_long/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt13": true, - "stmt14": true - } -} +{} diff --git a/parser/testdata/03594_constraint_subqery_logical_error/metadata.json b/parser/testdata/03594_constraint_subqery_logical_error/metadata.json index c84e30800d..b563327205 100644 --- a/parser/testdata/03594_constraint_subqery_logical_error/metadata.json +++ b/parser/testdata/03594_constraint_subqery_logical_error/metadata.json @@ -1,6 +1,5 @@ { "explain_todo": { - "stmt3": true, "stmt7": true } } From f1f302bdcf23c15b5b76fd14c916380820344ae7 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 04:44:14 +0000 Subject: [PATCH 30/47] Fix tuple literal expansion in IN expressions and explain output - In IN expressions, only expand tuple literals when all elements are parenthesized primitives (e.g., `1 IN (((1), (2)))` expands to Function tuple with 2 elements) - Tuples with non-parenthesized elements or nested tuples stay as Literal Tuple_ (e.g., `(1, '') IN ((1, ''))` renders as Literal Tuple_(...)) - Update explainLiteral to check for parenthesized elements when deciding between Function tuple and Literal Tuple_ format This fixes test 02370_analyzer_in_function and also enables stmt8 in 03552_inconsistent_formatting_operator_as_table_function. --- internal/explain/expressions.go | 24 ++++++------ internal/explain/functions.go | 39 +++++++++++++++++-- .../02370_analyzer_in_function/metadata.json | 7 +--- .../metadata.json | 3 +- 4 files changed, 49 insertions(+), 24 deletions(-) diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index 490f15ba53..a4b62096d3 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -56,19 +56,17 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in fmt.Fprintf(sb, "%s ExpressionList\n", indent) return } - // Single-element tuples (from trailing comma syntax like (1,)) always render as Function tuple - if len(exprs) == 1 { - fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) - for _, e := range exprs { - Node(sb, e, depth+2) - } - return - } + // Check if any element is parenthesized (e.g., ((1), (2)) vs (1, 2)) + // Parenthesized elements mean the tuple should render as Function tuple + hasParenthesizedElement := false hasComplexExpr := false for _, e := range exprs { - // Simple literals (numbers, strings, etc.) are OK + // Check for parenthesized literals if lit, isLit := e.(*ast.Literal); isLit { + if lit.Parenthesized { + hasParenthesizedElement = true + break + } // Nested tuples that contain only primitive literals are OK if lit.Type == ast.LiteralTuple { if !containsOnlyPrimitiveLiteralsWithUnary(lit) { @@ -82,7 +80,6 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in hasComplexExpr = true break } - // Other literals are simple continue } // Unary negation of numeric literals is also simple @@ -97,8 +94,9 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in hasComplexExpr = true break } - if hasComplexExpr { - // Render as Function tuple instead of Literal + // Single-element tuples (from trailing comma syntax like (1,)) always render as Function tuple + // Tuples with complex expressions or parenthesized elements also render as Function tuple + if len(exprs) == 1 || hasComplexExpr || hasParenthesizedElement { fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) for _, e := range exprs { diff --git a/internal/explain/functions.go b/internal/explain/functions.go index c8d51603ec..0cc7b5b726 100644 --- a/internal/explain/functions.go +++ b/internal/explain/functions.go @@ -1107,9 +1107,42 @@ func explainInExpr(sb *strings.Builder, n *ast.InExpr, indent string, depth int) // Otherwise, output the element directly if lit, ok := n.List[0].(*ast.Literal); ok && lit.Type == ast.LiteralTuple { // Wrap tuple literal in Function tuple - fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) - Node(sb, n.List[0], depth+4) + // Check if all elements are parenthesized primitives - if so, expand them + // Otherwise, keep the tuple as a Literal + elems, ok := lit.Value.([]ast.Expression) + if !ok { + // Fallback if Value isn't []ast.Expression + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.List[0], depth+4) + } else { + // Check if all elements are parenthesized primitives + allParenthesizedPrimitives := true + for _, elem := range elems { + if primLit, isPrim := elem.(*ast.Literal); isPrim { + if !primLit.Parenthesized || primLit.Type == ast.LiteralTuple || primLit.Type == ast.LiteralArray { + allParenthesizedPrimitives = false + break + } + } else { + allParenthesizedPrimitives = false + break + } + } + + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + if allParenthesizedPrimitives { + // Expand the elements + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(elems)) + for _, elem := range elems { + Node(sb, elem, depth+4) + } + } else { + // Keep as a single Literal Tuple + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.List[0], depth+4) + } + } } else { // Single non-tuple element - output directly Node(sb, n.List[0], depth+2) diff --git a/parser/testdata/02370_analyzer_in_function/metadata.json b/parser/testdata/02370_analyzer_in_function/metadata.json index 8162ad6436..0967ef424b 100644 --- a/parser/testdata/02370_analyzer_in_function/metadata.json +++ b/parser/testdata/02370_analyzer_in_function/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt21": true, - "stmt9": true - } -} +{} diff --git a/parser/testdata/03552_inconsistent_formatting_operator_as_table_function/metadata.json b/parser/testdata/03552_inconsistent_formatting_operator_as_table_function/metadata.json index d02612666a..3a06a4a1ac 100644 --- a/parser/testdata/03552_inconsistent_formatting_operator_as_table_function/metadata.json +++ b/parser/testdata/03552_inconsistent_formatting_operator_as_table_function/metadata.json @@ -1,6 +1,5 @@ { "explain_todo": { - "stmt5": true, - "stmt8": true + "stmt5": true } } From e61ed878002917ef8cc2ce22c6825445b18ad7e7 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 04:49:48 +0000 Subject: [PATCH 31/47] Propagate WITH clause to subsequent SELECTs in UNION queries In ClickHouse's EXPLAIN AST output, the WITH clause from the first SELECT in a UNION ALL/UNION query is propagated to subsequent SELECTs. The inherited WITH clause is output at the END of children for those subsequent SELECT queries. This fix applies the same WITH clause propagation logic that was already implemented for INTERSECT/EXCEPT queries to plain UNION queries. Fixes tests: - 01515_with_global_and_with_propagation (stmt5, stmt11) - 03671_pk_in_subquery_context_expired (stmt7) - 03611_uniqExact_bug (stmt2) - 03033_analyzer_resolve_from_parent_scope (stmt4) - 01236_graphite_mt (stmt4) --- internal/explain/select.go | 16 ++++++++++++++-- parser/testdata/01236_graphite_mt/metadata.json | 6 +----- .../metadata.json | 7 +------ .../metadata.json | 6 +----- .../testdata/03611_uniqExact_bug/metadata.json | 6 +----- .../metadata.json | 6 +----- 6 files changed, 19 insertions(+), 28 deletions(-) diff --git a/internal/explain/select.go b/internal/explain/select.go index 9b5febb49f..4adc1023a4 100644 --- a/internal/explain/select.go +++ b/internal/explain/select.go @@ -300,8 +300,20 @@ func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuer selects := simplifyUnionSelects(n.Selects) // Wrap selects in ExpressionList fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(selects)) - for _, sel := range selects { - Node(sb, sel, depth+2) + + // Check if first operand has a WITH clause to be inherited by subsequent operands + var inheritedWith []ast.Expression + if len(selects) > 0 { + inheritedWith = extractWithClause(selects[0]) + } + + for i, sel := range selects { + if i > 0 && len(inheritedWith) > 0 { + // Subsequent operands inherit the WITH clause from the first operand + explainSelectQueryWithInheritedWith(sb, sel, inheritedWith, depth+2) + } else { + Node(sb, sel, depth+2) + } } // INTO OUTFILE clause - check if any SelectQuery has IntoOutfile set for _, sel := range n.Selects { diff --git a/parser/testdata/01236_graphite_mt/metadata.json b/parser/testdata/01236_graphite_mt/metadata.json index b65b07d7a6..0967ef424b 100644 --- a/parser/testdata/01236_graphite_mt/metadata.json +++ b/parser/testdata/01236_graphite_mt/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt4": true - } -} +{} diff --git a/parser/testdata/01515_with_global_and_with_propagation/metadata.json b/parser/testdata/01515_with_global_and_with_propagation/metadata.json index 15d79beb93..0967ef424b 100644 --- a/parser/testdata/01515_with_global_and_with_propagation/metadata.json +++ b/parser/testdata/01515_with_global_and_with_propagation/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt11": true, - "stmt5": true - } -} +{} diff --git a/parser/testdata/03033_analyzer_resolve_from_parent_scope/metadata.json b/parser/testdata/03033_analyzer_resolve_from_parent_scope/metadata.json index b65b07d7a6..0967ef424b 100644 --- a/parser/testdata/03033_analyzer_resolve_from_parent_scope/metadata.json +++ b/parser/testdata/03033_analyzer_resolve_from_parent_scope/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt4": true - } -} +{} diff --git a/parser/testdata/03611_uniqExact_bug/metadata.json b/parser/testdata/03611_uniqExact_bug/metadata.json index ef58f80315..0967ef424b 100644 --- a/parser/testdata/03611_uniqExact_bug/metadata.json +++ b/parser/testdata/03611_uniqExact_bug/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt2": true - } -} +{} diff --git a/parser/testdata/03671_pk_in_subquery_context_expired/metadata.json b/parser/testdata/03671_pk_in_subquery_context_expired/metadata.json index b563327205..0967ef424b 100644 --- a/parser/testdata/03671_pk_in_subquery_context_expired/metadata.json +++ b/parser/testdata/03671_pk_in_subquery_context_expired/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt7": true - } -} +{} From f0aac10b74de6df9a5b5ede38b3b3c7fb06c7157 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 04:57:19 +0000 Subject: [PATCH 32/47] Fix database-qualified dictionary names in DETACH/ATTACH statements The parser was not handling database-qualified dictionary names like `db.dict` in DETACH DICTIONARY and ATTACH DICTIONARY statements. Parser changes: - parseDetach: Allow qualified names for DICTIONARY (database.dict) - parseAttach: Allow qualified names for DICTIONARY (database.dict) Explain changes: - explainDetachQuery: Handle Database + Dictionary case - explainAttachQuery: Handle Database + Dictionary case Fixes tests: - 01110_dictionary_layout_without_arguments (stmt7, stmt8) - 01575_disable_detach_table_of_dictionary (stmt7, stmt9) - 01018_ddl_dictionaries_create (stmt17, stmt22) --- internal/explain/statements.go | 20 ++++++++++++++++--- parser/parser.go | 20 +++++++++++++------ .../metadata.json | 7 +------ .../metadata.json | 7 +------ .../metadata.json | 7 +------ 5 files changed, 34 insertions(+), 27 deletions(-) diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 7930038cd3..5900f900f0 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -1255,8 +1255,16 @@ func explainDetachQuery(sb *strings.Builder, n *ast.DetachQuery, indent string) fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Table) return } + // Check for database-qualified dictionary name + if n.Database != "" && n.Dictionary != "" { + // Database-qualified: DetachQuery db dict (children 2) + fmt.Fprintf(sb, "%sDetachQuery %s %s (children 2)\n", indent, n.Database, n.Dictionary) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Database) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Dictionary) + return + } // DETACH DATABASE db: Database set, Table empty -> "DetachQuery db (children 1)" - if n.Database != "" && n.Table == "" { + if n.Database != "" && n.Table == "" && n.Dictionary == "" { fmt.Fprintf(sb, "%sDetachQuery %s (children 1)\n", indent, n.Database) fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Database) return @@ -1280,7 +1288,7 @@ func explainDetachQuery(sb *strings.Builder, n *ast.DetachQuery, indent string) func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, depth int) { // Count children: identifier + columns definition (if any) + select query (if any) + storage/view targets (if any) children := 1 // table/database identifier - if n.Database != "" && n.Table != "" { + if n.Database != "" && (n.Table != "" || n.Dictionary != "") { children++ // extra identifier for database } hasColumns := len(n.Columns) > 0 || len(n.ColumnsPrimaryKey) > 0 || len(n.Indexes) > 0 @@ -1301,7 +1309,13 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, fmt.Fprintf(sb, "%sAttachQuery %s %s (children %d)\n", indent, n.Database, n.Table, children) fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Database) fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Table) - } else if n.Database != "" && n.Table == "" { + } else if n.Database != "" && n.Dictionary != "" { + // Database-qualified dictionary: ATTACH DICTIONARY db.dict + fmt.Fprintf(sb, "%sAttachQuery %s %s (children %d)\n", indent, n.Database, n.Dictionary, children) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Database) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Dictionary) + return // Dictionary doesn't have columns or storage + } else if n.Database != "" && n.Table == "" && n.Dictionary == "" { fmt.Fprintf(sb, "%sAttachQuery %s (children %d)\n", indent, n.Database, children) fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Database) } else if n.Table != "" { diff --git a/parser/parser.go b/parser/parser.go index cd82d43f46..d025e9128a 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -6990,12 +6990,16 @@ func (p *Parser) parseDetach() *ast.DetachQuery { p.nextToken() } - // Parse name (can be qualified: database.table for TABLE, not for DATABASE/DICTIONARY) + // Parse name (can be qualified: database.table for TABLE, database.dict for DICTIONARY) name := p.parseIdentifierName() - if p.currentIs(token.DOT) && !isDatabase && !isDictionary { + if p.currentIs(token.DOT) && !isDatabase { p.nextToken() detach.Database = name - detach.Table = p.parseIdentifierName() + if isDictionary { + detach.Dictionary = p.parseIdentifierName() + } else { + detach.Table = p.parseIdentifierName() + } } else if isDatabase { detach.Database = name } else if isDictionary { @@ -7047,12 +7051,16 @@ func (p *Parser) parseAttach() *ast.AttachQuery { } } - // Parse name (can be qualified: database.table for TABLE, not for DATABASE/DICTIONARY) + // Parse name (can be qualified: database.table for TABLE, database.dict for DICTIONARY) name := p.parseIdentifierName() - if p.currentIs(token.DOT) && !isDatabase && !isDictionary { + if p.currentIs(token.DOT) && !isDatabase { p.nextToken() attach.Database = name - attach.Table = p.parseIdentifierName() + if isDictionary { + attach.Dictionary = p.parseIdentifierName() + } else { + attach.Table = p.parseIdentifierName() + } } else if isDatabase { attach.Database = name } else if isDictionary { diff --git a/parser/testdata/01018_ddl_dictionaries_create/metadata.json b/parser/testdata/01018_ddl_dictionaries_create/metadata.json index e9cc89b339..0967ef424b 100644 --- a/parser/testdata/01018_ddl_dictionaries_create/metadata.json +++ b/parser/testdata/01018_ddl_dictionaries_create/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt17": true, - "stmt22": true - } -} +{} diff --git a/parser/testdata/01110_dictionary_layout_without_arguments/metadata.json b/parser/testdata/01110_dictionary_layout_without_arguments/metadata.json index f6d9f2395b..0967ef424b 100644 --- a/parser/testdata/01110_dictionary_layout_without_arguments/metadata.json +++ b/parser/testdata/01110_dictionary_layout_without_arguments/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt7": true, - "stmt8": true - } -} +{} diff --git a/parser/testdata/01575_disable_detach_table_of_dictionary/metadata.json b/parser/testdata/01575_disable_detach_table_of_dictionary/metadata.json index afaaa4b0a6..0967ef424b 100644 --- a/parser/testdata/01575_disable_detach_table_of_dictionary/metadata.json +++ b/parser/testdata/01575_disable_detach_table_of_dictionary/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt7": true, - "stmt9": true - } -} +{} From c104ce8d6c7402fe668447fd6324d79e57f8a471 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 05:09:36 +0000 Subject: [PATCH 33/47] Accept keywords as index type names in ALTER ADD INDEX The parser was only accepting identifiers (token.IDENT) for index type names like "set" in "ADD INDEX idx c TYPE set(0)". However, "set" is tokenized as a keyword (token.SET). This fix allows keywords to be used as index type names and AFTER index names, matching ClickHouse behavior. Fixes tests: - 01932_alter_index_with_order (stmt5, stmt6) - 03629_storage_s3_disallow_index_alter (stmt3) - 02131_skip_index_not_materialized (stmt4) --- parser/parser.go | 6 ++++-- parser/testdata/01932_alter_index_with_order/metadata.json | 7 +------ .../02131_skip_index_not_materialized/metadata.json | 6 +----- .../03629_storage_s3_disallow_index_alter/metadata.json | 6 +----- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index d025e9128a..be8d0b5aac 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -5305,9 +5305,10 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "TYPE" { p.nextToken() // Type is a function call like bloom_filter(0.025) or vector_similarity('hnsw', 'L2Distance', 1) + // Note: Index types can be keywords (e.g., SET) so we accept both IDENT and keywords pos := p.current.Pos typeName := "" - if p.currentIs(token.IDENT) { + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { typeName = p.current.Value cmd.IndexType = typeName p.nextToken() @@ -5339,7 +5340,8 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { // Parse AFTER if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "AFTER" { p.nextToken() - if p.currentIs(token.IDENT) { + // Index name can be an identifier or keyword + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { cmd.AfterIndex = p.current.Value p.nextToken() } diff --git a/parser/testdata/01932_alter_index_with_order/metadata.json b/parser/testdata/01932_alter_index_with_order/metadata.json index 0f293987f1..0967ef424b 100644 --- a/parser/testdata/01932_alter_index_with_order/metadata.json +++ b/parser/testdata/01932_alter_index_with_order/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt5": true, - "stmt6": true - } -} +{} diff --git a/parser/testdata/02131_skip_index_not_materialized/metadata.json b/parser/testdata/02131_skip_index_not_materialized/metadata.json index b65b07d7a6..0967ef424b 100644 --- a/parser/testdata/02131_skip_index_not_materialized/metadata.json +++ b/parser/testdata/02131_skip_index_not_materialized/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt4": true - } -} +{} diff --git a/parser/testdata/03629_storage_s3_disallow_index_alter/metadata.json b/parser/testdata/03629_storage_s3_disallow_index_alter/metadata.json index 1295a45747..0967ef424b 100644 --- a/parser/testdata/03629_storage_s3_disallow_index_alter/metadata.json +++ b/parser/testdata/03629_storage_s3_disallow_index_alter/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt3": true - } -} +{} From c7b4c6e844c30e5d2941df6162392de9f80995e5 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 18:27:36 +0000 Subject: [PATCH 34/47] Render arrays with parenthesized elements as Function array Arrays containing parenthesized elements like [('a')] should be rendered as Function array with children, not as Literal Array_[...]. This matches ClickHouse's EXPLAIN AST behavior where parenthesized elements inside arrays require the expanded function format. Fixes tests: - 02354_tuple_element_with_default (stmt5, stmt14) - 03552_inconsistent_formatting_operator_as_table_function (stmt5) --- internal/explain/expressions.go | 4 ++++ .../02354_tuple_element_with_default/metadata.json | 7 +------ .../metadata.json | 6 +----- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index a4b62096d3..5f4e0ac446 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -132,6 +132,10 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in for _, e := range exprs { if lit, ok := e.(*ast.Literal); ok { + // Parenthesized elements require Function array format + if lit.Parenthesized { + shouldUseFunctionArray = true + } if lit.Type == ast.LiteralArray { hasNestedArrays = true // Check if inner array needs Function array format: diff --git a/parser/testdata/02354_tuple_element_with_default/metadata.json b/parser/testdata/02354_tuple_element_with_default/metadata.json index 9e5708f210..0967ef424b 100644 --- a/parser/testdata/02354_tuple_element_with_default/metadata.json +++ b/parser/testdata/02354_tuple_element_with_default/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt14": true, - "stmt5": true - } -} +{} diff --git a/parser/testdata/03552_inconsistent_formatting_operator_as_table_function/metadata.json b/parser/testdata/03552_inconsistent_formatting_operator_as_table_function/metadata.json index 3a06a4a1ac..0967ef424b 100644 --- a/parser/testdata/03552_inconsistent_formatting_operator_as_table_function/metadata.json +++ b/parser/testdata/03552_inconsistent_formatting_operator_as_table_function/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt5": true - } -} +{} From 1204a30e5d2fc605a2f9ee4723608c9ebfe669aa Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 18:32:53 +0000 Subject: [PATCH 35/47] Handle LIMIT offset, count syntax after LIMIT BY clause When parsing LIMIT after LIMIT BY (e.g., LIMIT 1 BY x LIMIT 5, 5), the parser was only capturing the first value. This fix handles the comma syntax to correctly parse both offset and count values. Fixes test: - 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET (stmt2, stmt3) --- parser/parser.go | 6 ++++++ .../metadata.json | 7 +------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index be8d0b5aac..6a8fe0b709 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1247,6 +1247,12 @@ func (p *Parser) parseSelectInternal(preParsedWith []ast.Expression) *ast.Select p.nextToken() sel.Limit = p.parseExpression(LOWEST) sel.LimitByHasLimit = true + // Handle LIMIT offset, count syntax + if p.currentIs(token.COMMA) { + p.nextToken() + sel.Offset = sel.Limit + sel.Limit = p.parseExpression(LOWEST) + } } } diff --git a/parser/testdata/02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET/metadata.json b/parser/testdata/02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET/metadata.json index bc141058a4..0967ef424b 100644 --- a/parser/testdata/02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET/metadata.json +++ b/parser/testdata/02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt2": true, - "stmt3": true - } -} +{} From dc5e91bc6c7b631eaa939b201556ff9ebbe755f1 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 18:37:39 +0000 Subject: [PATCH 36/47] Support IN PARTITION clause in DELETE statements Added Partition field to DeleteQuery AST and parsing for the IN PARTITION clause in lightweight DELETE statements. The syntax is: DELETE FROM table IN PARTITION partition_expr WHERE condition Fixes test: - 02352_lightweight_delete_in_partition (stmt11, stmt12) --- ast/ast.go | 11 ++++++----- internal/explain/statements.go | 10 +++++++++- parser/parser.go | 9 +++++++++ .../metadata.json | 7 +------ 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index d9b99a9724..bb7c90244d 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -707,11 +707,12 @@ func (t *TruncateQuery) statementNode() {} // DeleteQuery represents a lightweight DELETE statement. type DeleteQuery struct { - Position token.Position `json:"-"` - Database string `json:"database,omitempty"` - Table string `json:"table"` - Where Expression `json:"where,omitempty"` - Settings []*SettingExpr `json:"settings,omitempty"` + Position token.Position `json:"-"` + Database string `json:"database,omitempty"` + Table string `json:"table"` + Partition Expression `json:"partition,omitempty"` // IN PARTITION clause + Where Expression `json:"where,omitempty"` + Settings []*SettingExpr `json:"settings,omitempty"` } func (d *DeleteQuery) Pos() token.Position { return d.Position } diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 5900f900f0..f397608af8 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -2198,8 +2198,11 @@ func explainDeleteQuery(sb *strings.Builder, n *ast.DeleteQuery, indent string, return } - // Count children: Where expression + table identifier + settings + // Count children: Partition + Where expression + table identifier + settings children := 1 // table identifier + if n.Partition != nil { + children++ + } if n.Where != nil { children++ } @@ -2208,6 +2211,11 @@ func explainDeleteQuery(sb *strings.Builder, n *ast.DeleteQuery, indent string, } fmt.Fprintf(sb, "%sDeleteQuery %s (children %d)\n", indent, n.Table, children) + // Output order: Partition, Where, Table identifier, Settings + if n.Partition != nil { + fmt.Fprintf(sb, "%s Partition (children 1)\n", indent) + Node(sb, n.Partition, depth+2) + } if n.Where != nil { Node(sb, n.Where, depth+1) } diff --git a/parser/parser.go b/parser/parser.go index 6a8fe0b709..951e539e63 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -6168,6 +6168,15 @@ func (p *Parser) parseDelete() *ast.DeleteQuery { } } + // Parse IN PARTITION clause + if p.currentIs(token.IN) { + p.nextToken() // skip IN + if p.currentIs(token.PARTITION) { + p.nextToken() // skip PARTITION + del.Partition = p.parseExpression(LOWEST) + } + } + // Parse WHERE clause if p.currentIs(token.WHERE) { p.nextToken() // skip WHERE diff --git a/parser/testdata/02352_lightweight_delete_in_partition/metadata.json b/parser/testdata/02352_lightweight_delete_in_partition/metadata.json index ec09c7e10e..0967ef424b 100644 --- a/parser/testdata/02352_lightweight_delete_in_partition/metadata.json +++ b/parser/testdata/02352_lightweight_delete_in_partition/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt11": true, - "stmt12": true - } -} +{} From 76f3e3b18de040dc1b030d8b5041bc318c5e1a4f Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 18:42:57 +0000 Subject: [PATCH 37/47] Support qualified identifiers starting with keywords When a keyword like SYSTEM is used as the start of a qualified name (e.g., system.one.*), parseKeywordAsIdentifier was returning just the keyword as a single-part identifier. Now it continues to parse DOT sequences to build qualified identifiers and handle qualified asterisks. Fixes tests: - 00467_qualified_names (stmt19, stmt21) - 00502_custom_partitioning_local (stmt17) --- parser/expression.go | 21 ++++++++++++++++++- .../00467_qualified_names/metadata.json | 7 +------ .../metadata.json | 6 +----- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/parser/expression.go b/parser/expression.go index 0580a6ce62..2f9fc8907b 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -2749,9 +2749,28 @@ func (p *Parser) parseKeywordAsIdentifier() ast.Expression { name := p.current.Value p.nextToken() + // Check for qualified identifier (system.one.* or system.one.col) + parts := []string{name} + for p.currentIs(token.DOT) { + p.nextToken() + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + parts = append(parts, p.current.Value) + p.nextToken() + } else if p.currentIs(token.ASTERISK) { + // table.* + p.nextToken() + return &ast.Asterisk{ + Position: pos, + Table: strings.Join(parts, "."), + } + } else { + break + } + } + return &ast.Identifier{ Position: pos, - Parts: []string{name}, + Parts: parts, } } diff --git a/parser/testdata/00467_qualified_names/metadata.json b/parser/testdata/00467_qualified_names/metadata.json index 05aa116239..0967ef424b 100644 --- a/parser/testdata/00467_qualified_names/metadata.json +++ b/parser/testdata/00467_qualified_names/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt19": true, - "stmt21": true - } -} +{} diff --git a/parser/testdata/00502_custom_partitioning_local/metadata.json b/parser/testdata/00502_custom_partitioning_local/metadata.json index ca584b3e28..0967ef424b 100644 --- a/parser/testdata/00502_custom_partitioning_local/metadata.json +++ b/parser/testdata/00502_custom_partitioning_local/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt17": true - } -} +{} From d50de222e2b4ec0b9c013e7f6f164a586255f960 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 18:51:23 +0000 Subject: [PATCH 38/47] Support TTL elements with WHERE conditions Added TTLElement struct to store both the TTL expression and the optional WHERE condition. Updated parser to correctly parse multiple TTL elements separated by commas, including proper handling of SET clause comma separation (SET assignments vs new TTL elements). Fixes tests: - 01622_multiple_ttls (stmt3, stmt11) - 03236_create_query_ttl_where (stmt2) - 03636_empty_projection_block (stmt1) - 03622_ttl_infos_where (stmt3) - 02932_set_ttl_where (stmt2) --- ast/ast.go | 11 ++ internal/explain/statements.go | 30 +++- parser/parser.go | 168 +++++++++++++----- .../01622_multiple_ttls/metadata.json | 3 +- .../02932_set_ttl_where/metadata.json | 6 +- .../metadata.json | 6 +- .../03622_ttl_infos_where/metadata.json | 6 +- .../metadata.json | 6 +- 8 files changed, 165 insertions(+), 71 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index bb7c90244d..8c766895e2 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -496,11 +496,22 @@ type TTLClause struct { Position token.Position `json:"-"` Expression Expression `json:"expression"` Expressions []Expression `json:"expressions,omitempty"` // Additional TTL expressions (for multiple TTL elements) + Elements []*TTLElement `json:"elements,omitempty"` // TTL elements with WHERE conditions } func (t *TTLClause) Pos() token.Position { return t.Position } func (t *TTLClause) End() token.Position { return t.Position } +// TTLElement represents a single TTL element with optional WHERE condition. +type TTLElement struct { + Position token.Position `json:"-"` + Expr Expression `json:"expr"` + Where Expression `json:"where,omitempty"` // WHERE condition for DELETE +} + +func (t *TTLElement) Pos() token.Position { return t.Position } +func (t *TTLElement) End() token.Position { return t.Position } + // DropQuery represents a DROP statement. type DropQuery struct { Position token.Position `json:"-"` diff --git a/internal/explain/statements.go b/internal/explain/statements.go index f397608af8..ae46502a0a 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -478,14 +478,30 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, Node(sb, n.SampleBy, storageChildDepth) } if n.TTL != nil { - // Count total TTL elements (1 for Expression + len(Expressions)) - ttlCount := 1 + len(n.TTL.Expressions) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", storageIndent, ttlCount) - fmt.Fprintf(sb, "%s TTLElement (children 1)\n", storageIndent) - Node(sb, n.TTL.Expression, storageChildDepth+2) - for _, expr := range n.TTL.Expressions { + // Use Elements if available (has WHERE conditions), otherwise use legacy Expression/Expressions + if len(n.TTL.Elements) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", storageIndent, len(n.TTL.Elements)) + for _, elem := range n.TTL.Elements { + children := 1 + if elem.Where != nil { + children = 2 + } + fmt.Fprintf(sb, "%s TTLElement (children %d)\n", storageIndent, children) + Node(sb, elem.Expr, storageChildDepth+2) + if elem.Where != nil { + Node(sb, elem.Where, storageChildDepth+2) + } + } + } else { + // Legacy: use Expression/Expressions + ttlCount := 1 + len(n.TTL.Expressions) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", storageIndent, ttlCount) fmt.Fprintf(sb, "%s TTLElement (children 1)\n", storageIndent) - Node(sb, expr, storageChildDepth+2) + Node(sb, n.TTL.Expression, storageChildDepth+2) + for _, expr := range n.TTL.Expressions { + fmt.Fprintf(sb, "%s TTLElement (children 1)\n", storageIndent) + Node(sb, expr, storageChildDepth+2) + } } } if len(n.Settings) > 0 { diff --git a/parser/parser.go b/parser/parser.go index 951e539e63..c31a623830 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -2656,54 +2656,25 @@ func (p *Parser) parseTableOptions(create *ast.CreateQuery) { case p.currentIs(token.TTL): p.nextToken() create.TTL = &ast.TTLClause{ - Position: p.current.Pos, - Expression: p.parseExpression(ALIAS_PREC), // Use ALIAS_PREC for AS SELECT - } - // Skip RECOMPRESS CODEC(...) if present - p.skipTTLModifiers() - // Parse additional TTL elements (comma-separated) - for p.currentIs(token.COMMA) { - p.nextToken() // skip comma - expr := p.parseExpression(ALIAS_PREC) - create.TTL.Expressions = append(create.TTL.Expressions, expr) - // Skip RECOMPRESS CODEC(...) if present - p.skipTTLModifiers() + Position: p.current.Pos, } - // Handle TTL GROUP BY x SET y = max(y) syntax - if p.currentIs(token.GROUP) { - p.nextToken() - if p.currentIs(token.BY) { + // Parse TTL elements (comma-separated) + for { + elem := p.parseTTLElement() + create.TTL.Elements = append(create.TTL.Elements, elem) + if p.currentIs(token.COMMA) { p.nextToken() - // Parse GROUP BY expressions (can have multiple, comma separated) - for { - p.parseExpression(ALIAS_PREC) - if p.currentIs(token.COMMA) { - p.nextToken() - } else { - break - } - } + } else { + break } } - // Handle SET clause in TTL (aggregation expressions for TTL GROUP BY) - if p.currentIs(token.SET) { - p.nextToken() - // Parse SET expressions until we hit a keyword or end - for !p.currentIs(token.SETTINGS) && !p.currentIs(token.AS) && !p.currentIs(token.WHERE) && !p.currentIs(token.SEMICOLON) && !p.currentIs(token.EOF) { - p.parseExpression(ALIAS_PREC) - if p.currentIs(token.COMMA) { - p.nextToken() - } else { - break - } + // Keep backward compatibility with Expression/Expressions fields + if len(create.TTL.Elements) > 0 { + create.TTL.Expression = create.TTL.Elements[0].Expr + for i := 1; i < len(create.TTL.Elements); i++ { + create.TTL.Expressions = append(create.TTL.Expressions, create.TTL.Elements[i].Expr) } } - // Handle WHERE clause in TTL (conditional deletion) - if p.currentIs(token.WHERE) { - p.nextToken() - // Parse WHERE condition - p.parseExpression(ALIAS_PREC) - } case p.currentIs(token.SETTINGS): p.nextToken() create.Settings = p.parseSettingsList() @@ -8068,6 +8039,119 @@ func (p *Parser) parseTransactionControl() *ast.TransactionControlQuery { return query } +// parseTTLElement parses a single TTL element: expression [DELETE] [WHERE condition] [GROUP BY ...] [SET ...] +func (p *Parser) parseTTLElement() *ast.TTLElement { + elem := &ast.TTLElement{ + Position: p.current.Pos, + Expr: p.parseExpression(ALIAS_PREC), + } + // Skip RECOMPRESS CODEC(...), DELETE, TO DISK, TO VOLUME (but not WHERE) + p.skipTTLModifiersExceptWhere() + // Handle WHERE clause for this TTL element (conditional deletion) + if p.currentIs(token.WHERE) { + p.nextToken() + elem.Where = p.parseExpression(ALIAS_PREC) + } + // Handle GROUP BY x SET y = max(y) syntax (skip for now, already parsed in Where or just skip) + if p.currentIs(token.GROUP) { + p.nextToken() + if p.currentIs(token.BY) { + p.nextToken() + for { + p.parseExpression(ALIAS_PREC) + if p.currentIs(token.COMMA) { + p.nextToken() + } else { + break + } + } + } + } + // Handle SET clause - assignments are comma separated (id = expr, id = expr, ...) + // We need to distinguish between: + // - Comma continuing SET: followed by IDENT = pattern + // - Comma starting new TTL: followed by expression (like d + toIntervalYear(...)) + if p.currentIs(token.SET) { + p.nextToken() + for { + // Parse assignment expression: id = expr + p.parseExpression(ALIAS_PREC) + // Check for comma + if p.currentIs(token.COMMA) { + // Look ahead to check pattern. We need to see: COMMA IDENT EQ + // Save state to peek ahead + savedCurrent := p.current + savedPeek := p.peek + p.nextToken() // skip comma to see what follows + isSetContinuation := false + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + if p.peekIs(token.EQ) { + // It's another SET assignment (id = expr) + isSetContinuation = true + } + } + if isSetContinuation { + // Continue parsing SET assignments (already consumed comma) + continue + } + // Not a SET assignment - restore state so caller sees the comma + p.current = savedCurrent + p.peek = savedPeek + break + } + // No comma, end of SET clause + break + } + } + return elem +} + +// skipTTLModifiersExceptWhere skips TTL modifiers but stops at WHERE +func (p *Parser) skipTTLModifiersExceptWhere() { + for { + // Skip RECOMPRESS CODEC(...) + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "RECOMPRESS" { + p.nextToken() + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "CODEC" { + p.nextToken() + if p.currentIs(token.LPAREN) { + depth := 1 + p.nextToken() + for depth > 0 && !p.currentIs(token.EOF) { + if p.currentIs(token.LPAREN) { + depth++ + } else if p.currentIs(token.RPAREN) { + depth-- + } + p.nextToken() + } + } + } + continue + } + // Skip DELETE (TTL ... DELETE) + if p.currentIs(token.DELETE) { + p.nextToken() + continue + } + // Skip TO DISK 'name' or TO VOLUME 'name' + if p.currentIs(token.TO) { + p.nextToken() + if p.currentIs(token.IDENT) { + upper := strings.ToUpper(p.current.Value) + if upper == "DISK" || upper == "VOLUME" { + p.nextToken() + if p.currentIs(token.STRING) { + p.nextToken() + } + continue + } + } + } + break + } +} + // skipTTLModifiers skips TTL modifiers like RECOMPRESS CODEC(...), DELETE, TO DISK, TO VOLUME func (p *Parser) skipTTLModifiers() { for { diff --git a/parser/testdata/01622_multiple_ttls/metadata.json b/parser/testdata/01622_multiple_ttls/metadata.json index 638822386f..ab9202e88e 100644 --- a/parser/testdata/01622_multiple_ttls/metadata.json +++ b/parser/testdata/01622_multiple_ttls/metadata.json @@ -1,6 +1,5 @@ { "explain_todo": { - "stmt11": true, - "stmt3": true + "stmt11": true } } diff --git a/parser/testdata/02932_set_ttl_where/metadata.json b/parser/testdata/02932_set_ttl_where/metadata.json index ef58f80315..0967ef424b 100644 --- a/parser/testdata/02932_set_ttl_where/metadata.json +++ b/parser/testdata/02932_set_ttl_where/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt2": true - } -} +{} diff --git a/parser/testdata/03236_create_query_ttl_where/metadata.json b/parser/testdata/03236_create_query_ttl_where/metadata.json index ef58f80315..0967ef424b 100644 --- a/parser/testdata/03236_create_query_ttl_where/metadata.json +++ b/parser/testdata/03236_create_query_ttl_where/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt2": true - } -} +{} diff --git a/parser/testdata/03622_ttl_infos_where/metadata.json b/parser/testdata/03622_ttl_infos_where/metadata.json index 1295a45747..0967ef424b 100644 --- a/parser/testdata/03622_ttl_infos_where/metadata.json +++ b/parser/testdata/03622_ttl_infos_where/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt3": true - } -} +{} diff --git a/parser/testdata/03636_empty_projection_block/metadata.json b/parser/testdata/03636_empty_projection_block/metadata.json index e9d6e46171..0967ef424b 100644 --- a/parser/testdata/03636_empty_projection_block/metadata.json +++ b/parser/testdata/03636_empty_projection_block/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt1": true - } -} +{} From e059896d966153d6278d0e07da5cc6b6a31f132a Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 18:59:13 +0000 Subject: [PATCH 39/47] Support PARTITION ID syntax in OPTIMIZE TABLE statements Add PartitionByID field to OptimizeQuery AST to distinguish PARTITION ID 'value' from PARTITION expr. The parser now detects the ID keyword after PARTITION and sets this flag. The explain output renders Partition_ID with the inline literal format matching ClickHouse's EXPLAIN AST output. --- ast/ast.go | 19 ++++++++++--------- internal/explain/statements.go | 9 +++++++++ parser/parser.go | 5 +++++ .../testdata/03100_lwu_03_join/metadata.json | 6 +----- .../03100_lwu_06_apply_patches/metadata.json | 6 +----- .../03100_lwu_07_merge_patches/metadata.json | 6 +----- .../metadata.json | 7 +------ .../03100_lwu_18_sequence/metadata.json | 7 +------ .../03100_lwu_30_join_cache/metadata.json | 6 +----- .../metadata.json | 6 +----- .../03100_lwu_deletes_3/metadata.json | 7 +------ 11 files changed, 32 insertions(+), 52 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 8c766895e2..04a63cbf0b 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -909,15 +909,16 @@ func (s *SetQuery) statementNode() {} // OptimizeQuery represents an OPTIMIZE statement. type OptimizeQuery struct { - Position token.Position `json:"-"` - Database string `json:"database,omitempty"` - Table string `json:"table"` - Partition Expression `json:"partition,omitempty"` - Final bool `json:"final,omitempty"` - Cleanup bool `json:"cleanup,omitempty"` - Dedupe bool `json:"dedupe,omitempty"` - OnCluster string `json:"on_cluster,omitempty"` - Settings []*SettingExpr `json:"settings,omitempty"` + Position token.Position `json:"-"` + Database string `json:"database,omitempty"` + Table string `json:"table"` + Partition Expression `json:"partition,omitempty"` + PartitionByID bool `json:"partition_by_id,omitempty"` // PARTITION ID vs PARTITION expr + Final bool `json:"final,omitempty"` + Cleanup bool `json:"cleanup,omitempty"` + Dedupe bool `json:"dedupe,omitempty"` + OnCluster string `json:"on_cluster,omitempty"` + Settings []*SettingExpr `json:"settings,omitempty"` } func (o *OptimizeQuery) Pos() token.Position { return o.Position } diff --git a/internal/explain/statements.go b/internal/explain/statements.go index ae46502a0a..c54817dfbe 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -2163,6 +2163,15 @@ func explainOptimizeQuery(sb *strings.Builder, n *ast.OptimizeQuery, indent stri // PARTITION ALL is shown as Partition_ID (empty) in EXPLAIN AST if ident, ok := n.Partition.(*ast.Identifier); ok && strings.ToUpper(ident.Name()) == "ALL" { fmt.Fprintf(sb, "%s Partition_ID \n", indent) + } else if n.PartitionByID { + // PARTITION ID 'value' is shown as Partition_ID Literal_'value' (children 1) + if lit, ok := n.Partition.(*ast.Literal); ok { + fmt.Fprintf(sb, "%s Partition_ID Literal_\\'%s\\' (children 1)\n", indent, lit.Value) + Node(sb, n.Partition, depth+2) + } else { + fmt.Fprintf(sb, "%s Partition_ID (children 1)\n", indent) + Node(sb, n.Partition, depth+2) + } } else { fmt.Fprintf(sb, "%s Partition (children 1)\n", indent) Node(sb, n.Partition, depth+2) diff --git a/parser/parser.go b/parser/parser.go index c31a623830..8e585484f3 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -6620,6 +6620,11 @@ func (p *Parser) parseOptimize() *ast.OptimizeQuery { // Handle PARTITION if p.currentIs(token.PARTITION) { p.nextToken() + // Check for PARTITION ID 'value' syntax + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "ID" { + opt.PartitionByID = true + p.nextToken() + } opt.Partition = p.parseExpression(LOWEST) } diff --git a/parser/testdata/03100_lwu_03_join/metadata.json b/parser/testdata/03100_lwu_03_join/metadata.json index 62b81668c3..0967ef424b 100644 --- a/parser/testdata/03100_lwu_03_join/metadata.json +++ b/parser/testdata/03100_lwu_03_join/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt13": true - } -} +{} diff --git a/parser/testdata/03100_lwu_06_apply_patches/metadata.json b/parser/testdata/03100_lwu_06_apply_patches/metadata.json index aa28559472..0967ef424b 100644 --- a/parser/testdata/03100_lwu_06_apply_patches/metadata.json +++ b/parser/testdata/03100_lwu_06_apply_patches/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt18": true - } -} +{} diff --git a/parser/testdata/03100_lwu_07_merge_patches/metadata.json b/parser/testdata/03100_lwu_07_merge_patches/metadata.json index c45b7602ba..0967ef424b 100644 --- a/parser/testdata/03100_lwu_07_merge_patches/metadata.json +++ b/parser/testdata/03100_lwu_07_merge_patches/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt12": true - } -} +{} diff --git a/parser/testdata/03100_lwu_09_different_structure/metadata.json b/parser/testdata/03100_lwu_09_different_structure/metadata.json index d4c086941a..0967ef424b 100644 --- a/parser/testdata/03100_lwu_09_different_structure/metadata.json +++ b/parser/testdata/03100_lwu_09_different_structure/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt10": true, - "stmt11": true - } -} +{} diff --git a/parser/testdata/03100_lwu_18_sequence/metadata.json b/parser/testdata/03100_lwu_18_sequence/metadata.json index f650e24ee6..0967ef424b 100644 --- a/parser/testdata/03100_lwu_18_sequence/metadata.json +++ b/parser/testdata/03100_lwu_18_sequence/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt12": true, - "stmt16": true - } -} +{} diff --git a/parser/testdata/03100_lwu_30_join_cache/metadata.json b/parser/testdata/03100_lwu_30_join_cache/metadata.json index dbdbb76d4f..0967ef424b 100644 --- a/parser/testdata/03100_lwu_30_join_cache/metadata.json +++ b/parser/testdata/03100_lwu_30_join_cache/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt6": true - } -} +{} diff --git a/parser/testdata/03100_lwu_31_merge_memory_usage/metadata.json b/parser/testdata/03100_lwu_31_merge_memory_usage/metadata.json index b563327205..0967ef424b 100644 --- a/parser/testdata/03100_lwu_31_merge_memory_usage/metadata.json +++ b/parser/testdata/03100_lwu_31_merge_memory_usage/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt7": true - } -} +{} diff --git a/parser/testdata/03100_lwu_deletes_3/metadata.json b/parser/testdata/03100_lwu_deletes_3/metadata.json index 94bd5427f9..0967ef424b 100644 --- a/parser/testdata/03100_lwu_deletes_3/metadata.json +++ b/parser/testdata/03100_lwu_deletes_3/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt23": true, - "stmt27": true - } -} +{} From 8470f416afffd8792ba932d89ab6c1d21207cbfe Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 19:09:50 +0000 Subject: [PATCH 40/47] Fix ALTER ADD INDEX tuple expression parsing Simplified the index expression parsing in ALTER ADD INDEX to let parseExpression handle parentheses naturally. This allows tuple expressions like (a, b, c) to be parsed correctly, matching how CREATE TABLE INDEX parsing works. --- parser/parser.go | 11 +++-------- .../metadata.json | 6 +----- .../02710_allow_suspicious_indices/metadata.json | 7 +------ .../metadata.json | 6 +----- 4 files changed, 6 insertions(+), 24 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 8e585484f3..63f0319e66 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -5267,16 +5267,11 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { Position: p.current.Pos, Name: idxName, } - // Parse expression - can be in parentheses or bare expression until TYPE keyword - if p.currentIs(token.LPAREN) { - p.nextToken() + // Parse expression - let parseExpression handle parentheses naturally + // This allows (a, b, c) to be parsed as a tuple + if !p.currentIs(token.IDENT) || strings.ToUpper(p.current.Value) != "TYPE" { idx.Expression = p.parseExpression(LOWEST) cmd.IndexExpr = idx.Expression - p.expect(token.RPAREN) - } else if !p.currentIs(token.IDENT) || strings.ToUpper(p.current.Value) != "TYPE" { - // Parse bare expression (not in parentheses) - ends at TYPE keyword - idx.Expression = p.parseExpression(ALIAS_PREC) - cmd.IndexExpr = idx.Expression } // Parse TYPE if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "TYPE" { diff --git a/parser/testdata/01114_materialize_clear_index_compact_parts/metadata.json b/parser/testdata/01114_materialize_clear_index_compact_parts/metadata.json index 342b3ff5b4..0967ef424b 100644 --- a/parser/testdata/01114_materialize_clear_index_compact_parts/metadata.json +++ b/parser/testdata/01114_materialize_clear_index_compact_parts/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt8": true - } -} +{} diff --git a/parser/testdata/02710_allow_suspicious_indices/metadata.json b/parser/testdata/02710_allow_suspicious_indices/metadata.json index 222a5fbfa0..0967ef424b 100644 --- a/parser/testdata/02710_allow_suspicious_indices/metadata.json +++ b/parser/testdata/02710_allow_suspicious_indices/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt14": true, - "stmt15": true - } -} +{} diff --git a/parser/testdata/02763_mutate_compact_part_with_skip_indices_and_projections/metadata.json b/parser/testdata/02763_mutate_compact_part_with_skip_indices_and_projections/metadata.json index 3a06a4a1ac..0967ef424b 100644 --- a/parser/testdata/02763_mutate_compact_part_with_skip_indices_and_projections/metadata.json +++ b/parser/testdata/02763_mutate_compact_part_with_skip_indices_and_projections/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt5": true - } -} +{} From a59b23fa5d802670606b67bbba47b02b3923df97 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 19:17:10 +0000 Subject: [PATCH 41/47] Add support for KILL QUERY/MUTATION statements Adds KillQuery AST type and parser for KILL QUERY/MUTATION statements. The explain output matches ClickHouse format with the WHERE expression operator in the header (e.g., Function_and) and SYNC/ASYNC mode. --- ast/ast.go | 14 +++++ internal/explain/explain.go | 2 + internal/explain/statements.go | 56 +++++++++++++++++ parser/parser.go | 60 +++++++++++++++++++ .../metadata.json | 7 +-- .../metadata.json | 7 +-- 6 files changed, 134 insertions(+), 12 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 04a63cbf0b..959117bb54 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -1045,6 +1045,20 @@ func (s *ShowGrantsQuery) Pos() token.Position { return s.Position } func (s *ShowGrantsQuery) End() token.Position { return s.Position } func (s *ShowGrantsQuery) statementNode() {} +// KillQuery represents a KILL QUERY/MUTATION statement. +type KillQuery struct { + Position token.Position `json:"-"` + Type string `json:"type"` // "QUERY" or "MUTATION" + Where Expression `json:"where,omitempty"` // WHERE condition + Sync bool `json:"sync,omitempty"` // SYNC mode (default false = ASYNC) + Test bool `json:"test,omitempty"` // TEST mode + Format string `json:"format,omitempty"` // FORMAT clause +} + +func (k *KillQuery) Pos() token.Position { return k.Position } +func (k *KillQuery) End() token.Position { return k.Position } +func (k *KillQuery) statementNode() {} + // ShowPrivilegesQuery represents a SHOW PRIVILEGES statement. type ShowPrivilegesQuery struct { Position token.Position `json:"-"` diff --git a/internal/explain/explain.go b/internal/explain/explain.go index 9afcbf264d..598ad3b08d 100644 --- a/internal/explain/explain.go +++ b/internal/explain/explain.go @@ -258,6 +258,8 @@ func Node(sb *strings.Builder, node interface{}, depth int) { explainUpdateQuery(sb, n, indent, depth) case *ast.ParallelWithQuery: explainParallelWithQuery(sb, n, indent, depth) + case *ast.KillQuery: + explainKillQuery(sb, n, indent, depth) // Types case *ast.DataType: diff --git a/internal/explain/statements.go b/internal/explain/statements.go index c54817dfbe..aa1781dc46 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -2250,6 +2250,62 @@ func explainDeleteQuery(sb *strings.Builder, n *ast.DeleteQuery, indent string, } } +func explainKillQuery(sb *strings.Builder, n *ast.KillQuery, indent string, depth int) { + if n == nil { + fmt.Fprintf(sb, "%s*ast.KillQuery\n", indent) + return + } + + // Build the header in ClickHouse format: + // KillQueryQuery Function_and ASYNC (children 2) + // The function name uses underscore instead of space + funcName := "" + if n.Where != nil { + switch expr := n.Where.(type) { + case *ast.BinaryExpr: + funcName = "Function_" + strings.ToLower(expr.Op) + case *ast.FunctionCall: + funcName = "Function_" + expr.Name + default: + funcName = "Function" + } + } + + mode := "ASYNC" + if n.Sync { + mode = "SYNC" + } + if n.Test { + mode = "TEST" + } + + // Count children: WHERE expression + FORMAT identifier + children := 0 + if n.Where != nil { + children++ + } + if n.Format != "" { + children++ + } + + // Header: KillQueryQuery Function_xxx MODE (children N) + if funcName != "" { + fmt.Fprintf(sb, "%sKillQueryQuery %s %s (children %d)\n", indent, funcName, mode, children) + } else { + fmt.Fprintf(sb, "%sKillQueryQuery %s (children %d)\n", indent, mode, children) + } + + // Output WHERE expression + if n.Where != nil { + Node(sb, n.Where, depth+1) + } + + // Output FORMAT as Identifier + if n.Format != "" { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Format) + } +} + func explainCheckQuery(sb *strings.Builder, n *ast.CheckQuery, indent string) { if n == nil { fmt.Fprintf(sb, "%s*ast.CheckQuery\n", indent) diff --git a/parser/parser.go b/parser/parser.go index 63f0319e66..97f713b63d 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -336,6 +336,8 @@ func (p *Parser) parseStatement() ast.Statement { return p.parseBackup() case token.RESTORE: return p.parseRestore() + case token.KILL: + return p.parseKill() default: p.errors = append(p.errors, fmt.Errorf("unexpected token %s at line %d, column %d", p.current.Token, p.current.Pos.Line, p.current.Pos.Column)) @@ -8039,6 +8041,64 @@ func (p *Parser) parseTransactionControl() *ast.TransactionControlQuery { return query } +// parseKill handles KILL QUERY/MUTATION statements +func (p *Parser) parseKill() *ast.KillQuery { + query := &ast.KillQuery{ + Position: p.current.Pos, + } + + p.nextToken() // skip KILL + + // Parse QUERY or MUTATION + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "QUERY" { + query.Type = "QUERY" + p.nextToken() + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "MUTATION" { + query.Type = "MUTATION" + p.nextToken() + } + + // Parse WHERE clause + if p.currentIs(token.WHERE) { + p.nextToken() // skip WHERE + query.Where = p.parseExpression(LOWEST) + } + + // Parse SYNC/ASYNC/TEST + for p.currentIs(token.IDENT) { + upper := strings.ToUpper(p.current.Value) + switch upper { + case "SYNC": + query.Sync = true + p.nextToken() + case "ASYNC": + query.Sync = false + p.nextToken() + case "TEST": + query.Test = true + p.nextToken() + default: + // Exit loop for unknown keywords + goto endModifiers + } + } +endModifiers: + + // Parse FORMAT clause + if p.currentIs(token.FORMAT) { + p.nextToken() + if p.currentIs(token.NULL) { + query.Format = "Null" + p.nextToken() + } else if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + query.Format = p.current.Value + p.nextToken() + } + } + + return query +} + // parseTTLElement parses a single TTL element: expression [DELETE] [WHERE condition] [GROUP BY ...] [SET ...] func (p *Parser) parseTTLElement() *ast.TTLElement { elem := &ast.TTLElement{ diff --git a/parser/testdata/01788_update_nested_type_subcolumn_check/metadata.json b/parser/testdata/01788_update_nested_type_subcolumn_check/metadata.json index 2db1f8c824..0967ef424b 100644 --- a/parser/testdata/01788_update_nested_type_subcolumn_check/metadata.json +++ b/parser/testdata/01788_update_nested_type_subcolumn_check/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt16": true, - "stmt26": true - } -} +{} diff --git a/parser/testdata/03047_on_fly_mutations_non_deterministic/metadata.json b/parser/testdata/03047_on_fly_mutations_non_deterministic/metadata.json index 8888e2e3ae..0967ef424b 100644 --- a/parser/testdata/03047_on_fly_mutations_non_deterministic/metadata.json +++ b/parser/testdata/03047_on_fly_mutations_non_deterministic/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt10": true, - "stmt16": true - } -} +{} From 3783bb2779aec52786a853b03395c9433eddbd15 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 19:21:11 +0000 Subject: [PATCH 42/47] Enable duplicate output for RELOAD DICTIONARY in SYSTEM queries Add RELOAD DICTIONARY to the list of SYSTEM commands that output database/table identifiers twice in EXPLAIN AST format. --- parser/parser.go | 3 ++- .../01527_dist_sharding_key_dictGet_reload/metadata.json | 7 +------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 97f713b63d..786f86e3b1 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -6777,7 +6777,8 @@ func (p *Parser) parseSystem() *ast.SystemQuery { upperCmd := strings.ToUpper(sys.Command) if strings.Contains(upperCmd, "STOP DISTRIBUTED SENDS") || strings.Contains(upperCmd, "START DISTRIBUTED SENDS") || - strings.Contains(upperCmd, "FLUSH DISTRIBUTED") { + strings.Contains(upperCmd, "FLUSH DISTRIBUTED") || + strings.Contains(upperCmd, "RELOAD DICTIONARY") { // Only set duplicate if database and table are different (qualified name) if sys.Database != sys.Table { sys.DuplicateTableOutput = true diff --git a/parser/testdata/01527_dist_sharding_key_dictGet_reload/metadata.json b/parser/testdata/01527_dist_sharding_key_dictGet_reload/metadata.json index 09a03eef39..0967ef424b 100644 --- a/parser/testdata/01527_dist_sharding_key_dictGet_reload/metadata.json +++ b/parser/testdata/01527_dist_sharding_key_dictGet_reload/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt13": true, - "stmt18": true - } -} +{} From 20a75935cc5dc7413a7f86f18ba2ced6411876ca Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 19:39:54 +0000 Subject: [PATCH 43/47] Handle large number overflow and preserve original source text - For hex numbers that overflow uint64 (like 0x123456789ABCDEF01), convert to Float64 - For decimal numbers that overflow, try float64 parsing - Preserve original source text in Literal.Source for formatting in CAST expressions - Update explain for negated uint64 values that overflow int64 to output Float64 - Use Source field in formatElementAsString to preserve exact text in array/tuple casts --- internal/explain/expressions.go | 20 ++++++++- internal/explain/format.go | 4 ++ parser/expression.go | 44 +++++++++++++++++-- .../00031_parser_number/metadata.json | 7 +-- .../metadata.json | 6 +-- .../01065_if_not_finite/metadata.json | 2 +- .../metadata.json | 6 +-- .../01748_dictionary_table_dot/metadata.json | 6 +-- .../testdata/02708_dotProduct/metadata.json | 7 +-- .../metadata.json | 6 +-- .../metadata.json | 6 +-- 11 files changed, 70 insertions(+), 44 deletions(-) diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index 5f4e0ac446..0fd1181bb5 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -435,8 +435,15 @@ func explainUnaryExpr(sb *strings.Builder, n *ast.UnaryExpr, indent string, dept // ClickHouse normalizes -0 to UInt64_0 if val == 0 { fmt.Fprintf(sb, "%sLiteral UInt64_0\n", indent) - } else { + } else if val <= 9223372036854775808 { + // Value fits in int64 when negated + // Note: -9223372036854775808 is int64 min, so 9223372036854775808 is included fmt.Fprintf(sb, "%sLiteral Int64_-%d\n", indent, val) + } else { + // Value too large for int64 - output as Float64 + f := -float64(val) + s := FormatFloat(f) + fmt.Fprintf(sb, "%sLiteral Float64_%s\n", indent, s) } return } @@ -657,7 +664,16 @@ func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { fmt.Fprintf(sb, "%sLiteral Int64_%d (alias %s)\n", indent, -val, escapeAlias(n.Alias)) return case uint64: - fmt.Fprintf(sb, "%sLiteral Int64_-%d (alias %s)\n", indent, val, escapeAlias(n.Alias)) + if val <= 9223372036854775808 { + // Value fits in int64 when negated + // Note: -9223372036854775808 is int64 min, so 9223372036854775808 is included + fmt.Fprintf(sb, "%sLiteral Int64_-%d (alias %s)\n", indent, val, escapeAlias(n.Alias)) + } else { + // Value too large for int64 - output as Float64 + f := -float64(val) + s := FormatFloat(f) + fmt.Fprintf(sb, "%sLiteral Float64_%s (alias %s)\n", indent, s, escapeAlias(n.Alias)) + } return } case ast.LiteralFloat: diff --git a/internal/explain/format.go b/internal/explain/format.go index 1e0aac7062..e9e5711611 100644 --- a/internal/explain/format.go +++ b/internal/explain/format.go @@ -608,6 +608,10 @@ func formatElementAsString(expr ast.Expression) string { case ast.LiteralInteger: return fmt.Sprintf("%d", e.Value) case ast.LiteralFloat: + // Use Source if available (preserves original text for large numbers) + if e.Source != "" { + return e.Source + } return fmt.Sprintf("%v", e.Value) case ast.LiteralString: s := e.Value.(string) diff --git a/parser/expression.go b/parser/expression.go index 2f9fc8907b..10eca8a3f6 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -2,6 +2,7 @@ package parser import ( "math" + "math/big" "strconv" "strings" @@ -9,6 +10,23 @@ import ( "github.com/sqlc-dev/doubleclick/token" ) +// parseHexToFloat converts a hex string (with 0x prefix) to float64 +// Used for hex numbers that overflow uint64 +func parseHexToFloat(s string) (float64, bool) { + if !strings.HasPrefix(strings.ToLower(s), "0x") { + return 0, false + } + hexPart := s[2:] + bi := new(big.Int) + _, ok := bi.SetString(hexPart, 16) + if !ok { + return 0, false + } + f := new(big.Float).SetInt(bi) + result, _ := f.Float64() + return result, true +} + // Operator precedence levels const ( LOWEST = iota @@ -984,10 +1002,28 @@ func (p *Parser) parseNumber() ast.Expression { // Try unsigned uint64 for large positive numbers u, uerr := strconv.ParseUint(value, base, 64) if uerr != nil { - // Too large for int64/uint64, store as string with IsBigInt flag - lit.Type = ast.LiteralString - lit.Value = value - lit.IsBigInt = true + // Too large for int64/uint64, try as float64 + var f float64 + var ok bool + if isHex { + // For hex numbers, use parseHexToFloat since strconv.ParseFloat + // doesn't handle hex integers without 'p' exponent + f, ok = parseHexToFloat(value) + } else { + var ferr error + f, ferr = strconv.ParseFloat(value, 64) + ok = ferr == nil + } + if !ok { + // Still can't parse, store as string with IsBigInt flag + lit.Type = ast.LiteralString + lit.Value = value + lit.IsBigInt = true + } else { + lit.Type = ast.LiteralFloat + lit.Value = f + lit.Source = value // Preserve original source text + } } else { lit.Type = ast.LiteralInteger lit.Value = u // Store as uint64 diff --git a/parser/testdata/00031_parser_number/metadata.json b/parser/testdata/00031_parser_number/metadata.json index 682bda1cbc..0967ef424b 100644 --- a/parser/testdata/00031_parser_number/metadata.json +++ b/parser/testdata/00031_parser_number/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt1": true, - "stmt2": true - } -} +{} diff --git a/parser/testdata/01036_no_superfluous_dict_reload_on_create_database_2/metadata.json b/parser/testdata/01036_no_superfluous_dict_reload_on_create_database_2/metadata.json index 7ad5569408..0967ef424b 100644 --- a/parser/testdata/01036_no_superfluous_dict_reload_on_create_database_2/metadata.json +++ b/parser/testdata/01036_no_superfluous_dict_reload_on_create_database_2/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt9": true - } -} +{} diff --git a/parser/testdata/01065_if_not_finite/metadata.json b/parser/testdata/01065_if_not_finite/metadata.json index a133290734..0967ef424b 100644 --- a/parser/testdata/01065_if_not_finite/metadata.json +++ b/parser/testdata/01065_if_not_finite/metadata.json @@ -1 +1 @@ -{"explain_todo":{"stmt8":true}} +{} diff --git a/parser/testdata/01257_dictionary_mismatch_types/metadata.json b/parser/testdata/01257_dictionary_mismatch_types/metadata.json index c45b7602ba..0967ef424b 100644 --- a/parser/testdata/01257_dictionary_mismatch_types/metadata.json +++ b/parser/testdata/01257_dictionary_mismatch_types/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt12": true - } -} +{} diff --git a/parser/testdata/01748_dictionary_table_dot/metadata.json b/parser/testdata/01748_dictionary_table_dot/metadata.json index 7ad5569408..0967ef424b 100644 --- a/parser/testdata/01748_dictionary_table_dot/metadata.json +++ b/parser/testdata/01748_dictionary_table_dot/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt9": true - } -} +{} diff --git a/parser/testdata/02708_dotProduct/metadata.json b/parser/testdata/02708_dotProduct/metadata.json index 5c57aa20f2..0967ef424b 100644 --- a/parser/testdata/02708_dotProduct/metadata.json +++ b/parser/testdata/02708_dotProduct/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt54": true, - "stmt55": true - } -} +{} diff --git a/parser/testdata/03071_fix_short_circuit_logic/metadata.json b/parser/testdata/03071_fix_short_circuit_logic/metadata.json index 3a06a4a1ac..0967ef424b 100644 --- a/parser/testdata/03071_fix_short_circuit_logic/metadata.json +++ b/parser/testdata/03071_fix_short_circuit_logic/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt5": true - } -} +{} diff --git a/parser/testdata/03667_accurate_cast_datetime_overflow/metadata.json b/parser/testdata/03667_accurate_cast_datetime_overflow/metadata.json index ef58f80315..0967ef424b 100644 --- a/parser/testdata/03667_accurate_cast_datetime_overflow/metadata.json +++ b/parser/testdata/03667_accurate_cast_datetime_overflow/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt2": true - } -} +{} From 0ccea2122bdbedf1c6b5a04c1220abd7859f5155 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 19:49:50 +0000 Subject: [PATCH 44/47] Add alias support for ArrayAccess and BetweenExpr in WITH clauses - Add explainBetweenExprWithAlias to support aliases on BETWEEN expressions - Add ArrayAccess and BetweenExpr cases to explainWithElement - Enables WITH expr AS name syntax for array subscripts and BETWEEN clauses --- internal/explain/expressions.go | 4 ++ internal/explain/functions.go | 42 +++++++++++++++++++ .../metadata.json | 6 +-- .../metadata.json | 6 +-- .../metadata.json | 7 +--- 5 files changed, 49 insertions(+), 16 deletions(-) diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index 0fd1181bb5..43a87b1765 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -1060,6 +1060,10 @@ func explainWithElement(sb *strings.Builder, n *ast.WithElement, indent string, } case *ast.CastExpr: explainCastExprWithAlias(sb, e, n.Name, indent, depth) + case *ast.ArrayAccess: + explainArrayAccessWithAlias(sb, e, n.Name, indent, depth) + case *ast.BetweenExpr: + explainBetweenExprWithAlias(sb, e, n.Name, indent, depth) default: // For other types, just output the expression (alias may be lost) Node(sb, n.Query, depth) diff --git a/internal/explain/functions.go b/internal/explain/functions.go index 0cc7b5b726..53870ed619 100644 --- a/internal/explain/functions.go +++ b/internal/explain/functions.go @@ -1442,6 +1442,48 @@ func explainBetweenExpr(sb *strings.Builder, n *ast.BetweenExpr, indent string, } } +func explainBetweenExprWithAlias(sb *strings.Builder, n *ast.BetweenExpr, alias string, indent string, depth int) { + if n.Not { + // NOT BETWEEN is transformed to: expr < low OR expr > high + // Represented as: Function or with two comparisons: less and greater + if alias != "" { + fmt.Fprintf(sb, "%sFunction or (alias %s) (children %d)\n", indent, alias, 1) + } else { + fmt.Fprintf(sb, "%sFunction or (children %d)\n", indent, 1) + } + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + // less(expr, low) + fmt.Fprintf(sb, "%s Function less (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Expr, depth+4) + Node(sb, n.Low, depth+4) + // greater(expr, high) + fmt.Fprintf(sb, "%s Function greater (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Expr, depth+4) + Node(sb, n.High, depth+4) + } else { + // BETWEEN is represented as Function and with two comparisons + // expr >= low AND expr <= high + if alias != "" { + fmt.Fprintf(sb, "%sFunction and (alias %s) (children %d)\n", indent, alias, 1) + } else { + fmt.Fprintf(sb, "%sFunction and (children %d)\n", indent, 1) + } + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + // greaterOrEquals(expr, low) + fmt.Fprintf(sb, "%s Function greaterOrEquals (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Expr, depth+4) + Node(sb, n.Low, depth+4) + // lessOrEquals(expr, high) + fmt.Fprintf(sb, "%s Function lessOrEquals (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Expr, depth+4) + Node(sb, n.High, depth+4) + } +} + func explainIsNullExpr(sb *strings.Builder, n *ast.IsNullExpr, indent string, depth int) { // IS NULL is represented as Function isNull fnName := "isNull" diff --git a/parser/testdata/03166_skip_indexes_vertical_merge_1/metadata.json b/parser/testdata/03166_skip_indexes_vertical_merge_1/metadata.json index 62b81668c3..0967ef424b 100644 --- a/parser/testdata/03166_skip_indexes_vertical_merge_1/metadata.json +++ b/parser/testdata/03166_skip_indexes_vertical_merge_1/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt13": true - } -} +{} diff --git a/parser/testdata/03166_skip_indexes_vertical_merge_2/metadata.json b/parser/testdata/03166_skip_indexes_vertical_merge_2/metadata.json index 342b3ff5b4..0967ef424b 100644 --- a/parser/testdata/03166_skip_indexes_vertical_merge_2/metadata.json +++ b/parser/testdata/03166_skip_indexes_vertical_merge_2/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt8": true - } -} +{} diff --git a/parser/testdata/03201_analyzer_resolve_in_parent_scope/metadata.json b/parser/testdata/03201_analyzer_resolve_in_parent_scope/metadata.json index 60106a3b25..0967ef424b 100644 --- a/parser/testdata/03201_analyzer_resolve_in_parent_scope/metadata.json +++ b/parser/testdata/03201_analyzer_resolve_in_parent_scope/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt3": true, - "stmt9": true - } -} +{} From 700f2d9139a8be0c8d77f19bf6bd6387511ddeae Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 19:55:55 +0000 Subject: [PATCH 45/47] Handle empty PRIMARY KEY () in CREATE TABLE explain output - Add HasEmptyColumnsPrimaryKey flag to CreateTableQuery and AttachQuery - Set flag in parser when PRIMARY KEY () has empty parentheses - Update explain output to show Function tuple with empty ExpressionList --- ast/ast.go | 6 ++++-- internal/explain/statements.go | 20 +++++++++++++------ parser/parser.go | 4 ++++ .../03156_nullable_number_tips/metadata.json | 7 +------ 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 959117bb54..c724583cf4 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -282,7 +282,8 @@ type CreateQuery struct { Indexes []*IndexDefinition `json:"indexes,omitempty"` Projections []*Projection `json:"projections,omitempty"` Constraints []*Constraint `json:"constraints,omitempty"` - ColumnsPrimaryKey []Expression `json:"columns_primary_key,omitempty"` // PRIMARY KEY in column list + ColumnsPrimaryKey []Expression `json:"columns_primary_key,omitempty"` // PRIMARY KEY in column list + HasEmptyColumnsPrimaryKey bool `json:"has_empty_columns_primary_key,omitempty"` // TRUE if PRIMARY KEY () was seen with empty parens Engine *EngineClause `json:"engine,omitempty"` OrderBy []Expression `json:"order_by,omitempty"` OrderByHasModifiers bool `json:"order_by_has_modifiers,omitempty"` // True if ORDER BY has ASC/DESC modifiers @@ -760,7 +761,8 @@ type AttachQuery struct { Table string `json:"table,omitempty"` Dictionary string `json:"dictionary,omitempty"` Columns []*ColumnDeclaration `json:"columns,omitempty"` - ColumnsPrimaryKey []Expression `json:"columns_primary_key,omitempty"` // PRIMARY KEY in column list + ColumnsPrimaryKey []Expression `json:"columns_primary_key,omitempty"` // PRIMARY KEY in column list + HasEmptyColumnsPrimaryKey bool `json:"has_empty_columns_primary_key,omitempty"` // TRUE if PRIMARY KEY () was seen with empty parens Indexes []*IndexDefinition `json:"indexes,omitempty"` // INDEX definitions in column list Engine *EngineClause `json:"engine,omitempty"` OrderBy []Expression `json:"order_by,omitempty"` diff --git a/internal/explain/statements.go b/internal/explain/statements.go index aa1781dc46..b1e1c379b2 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -276,7 +276,7 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, childrenCount++ // Add for Function tuple containing PRIMARY KEY columns } // Check for inline PRIMARY KEY (from column list, e.g., "n int, primary key n") - if len(n.ColumnsPrimaryKey) > 0 { + if len(n.ColumnsPrimaryKey) > 0 || n.HasEmptyColumnsPrimaryKey { childrenCount++ // Add for the primary key identifier(s) } fmt.Fprintf(sb, "%s Columns definition (children %d)\n", indent, childrenCount) @@ -315,8 +315,12 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, } } // Output inline PRIMARY KEY (from column list) - if len(n.ColumnsPrimaryKey) > 0 { - if len(n.ColumnsPrimaryKey) > 1 { + if len(n.ColumnsPrimaryKey) > 0 || n.HasEmptyColumnsPrimaryKey { + if n.HasEmptyColumnsPrimaryKey { + // Empty PRIMARY KEY () + fmt.Fprintf(sb, "%s Function tuple (children 1)\n", indent) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } else if len(n.ColumnsPrimaryKey) > 1 { // Multiple columns: wrap in Function tuple fmt.Fprintf(sb, "%s Function tuple (children 1)\n", indent) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.ColumnsPrimaryKey)) @@ -1355,7 +1359,7 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, if len(n.Indexes) > 0 { columnsChildren++ } - if len(n.ColumnsPrimaryKey) > 0 { + if len(n.ColumnsPrimaryKey) > 0 || n.HasEmptyColumnsPrimaryKey { columnsChildren++ } fmt.Fprintf(sb, "%s Columns definition (children %d)\n", indent, columnsChildren) @@ -1373,8 +1377,12 @@ func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string, } } // Output inline PRIMARY KEY (from column list) - if len(n.ColumnsPrimaryKey) > 0 { - if len(n.ColumnsPrimaryKey) > 1 { + if len(n.ColumnsPrimaryKey) > 0 || n.HasEmptyColumnsPrimaryKey { + if n.HasEmptyColumnsPrimaryKey { + // Empty PRIMARY KEY () + fmt.Fprintf(sb, "%s Function tuple (children 1)\n", indent) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } else if len(n.ColumnsPrimaryKey) > 1 { // Multiple columns: wrap in Function tuple fmt.Fprintf(sb, "%s Function tuple (children 1)\n", indent) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.ColumnsPrimaryKey)) diff --git a/parser/parser.go b/parser/parser.go index 786f86e3b1..15cbfea8f8 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -2513,6 +2513,10 @@ func (p *Parser) parseCreateTable(create *ast.CreateQuery) { } } p.expect(token.RPAREN) + // If no columns were added, mark that we saw an empty PRIMARY KEY () + if len(create.ColumnsPrimaryKey) == 0 { + create.HasEmptyColumnsPrimaryKey = true + } } else { // Single column: PRIMARY KEY col expr := p.parseExpression(LOWEST) diff --git a/parser/testdata/03156_nullable_number_tips/metadata.json b/parser/testdata/03156_nullable_number_tips/metadata.json index b0fb544e9a..0967ef424b 100644 --- a/parser/testdata/03156_nullable_number_tips/metadata.json +++ b/parser/testdata/03156_nullable_number_tips/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt14": true, - "stmt7": true - } -} +{} From 8bb47716aab40988412bd0df29ef55ae3c9b4aae Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 20:02:42 +0000 Subject: [PATCH 46/47] Support TTL DELETE WHERE clause in ALTER TABLE MODIFY TTL - Update ALTER MODIFY TTL parsing to use parseTTLElement - Capture WHERE condition in TTLElement for conditional deletion - Update explain code to output TTLElement with WHERE as child --- internal/explain/statements.go | 18 ++++++++++-- parser/parser.go | 28 +++++++++++-------- .../02129_add_column_add_ttl/metadata.json | 7 +---- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/internal/explain/statements.go b/internal/explain/statements.go index b1e1c379b2..93c582e590 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -1773,9 +1773,23 @@ func explainAlterCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent stri fmt.Fprintf(sb, "%s Identifier %s\n", indent, cmd.ConstraintName) } case ast.AlterModifyTTL: - if cmd.TTL != nil && cmd.TTL.Expression != nil { + if cmd.TTL != nil && len(cmd.TTL.Elements) > 0 { // TTL is wrapped in ExpressionList and TTLElement - // Count total TTL elements (1 for Expression + len(Expressions)) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(cmd.TTL.Elements)) + for _, elem := range cmd.TTL.Elements { + // Count children: 1 for Expr, +1 for Where if present + ttlChildren := 1 + if elem.Where != nil { + ttlChildren++ + } + fmt.Fprintf(sb, "%s TTLElement (children %d)\n", indent, ttlChildren) + Node(sb, elem.Expr, depth+3) + if elem.Where != nil { + Node(sb, elem.Where, depth+3) + } + } + } else if cmd.TTL != nil && cmd.TTL.Expression != nil { + // Fallback for backward compatibility (Expression/Expressions fields) ttlCount := 1 + len(cmd.TTL.Expressions) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, ttlCount) fmt.Fprintf(sb, "%s TTLElement (children 1)\n", indent) diff --git a/parser/parser.go b/parser/parser.go index 15cbfea8f8..a4affacc8e 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -5682,18 +5682,24 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { cmd.Type = ast.AlterModifyTTL p.nextToken() cmd.TTL = &ast.TTLClause{ - Position: p.current.Pos, - Expression: p.parseExpression(LOWEST), + Position: p.current.Pos, } - // Skip RECOMPRESS CODEC(...) and other TTL modifiers - p.skipTTLModifiers() - // Parse additional TTL elements (comma-separated) - for p.currentIs(token.COMMA) { - p.nextToken() // skip comma - expr := p.parseExpression(LOWEST) - cmd.TTL.Expressions = append(cmd.TTL.Expressions, expr) - // Skip RECOMPRESS CODEC(...) if present - p.skipTTLModifiers() + // Parse TTL elements using parseTTLElement (captures WHERE clause) + for { + elem := p.parseTTLElement() + cmd.TTL.Elements = append(cmd.TTL.Elements, elem) + if p.currentIs(token.COMMA) { + p.nextToken() + } else { + break + } + } + // Keep backward compatibility with Expression/Expressions fields + if len(cmd.TTL.Elements) > 0 { + cmd.TTL.Expression = cmd.TTL.Elements[0].Expr + for i := 1; i < len(cmd.TTL.Elements); i++ { + cmd.TTL.Expressions = append(cmd.TTL.Expressions, cmd.TTL.Elements[i].Expr) + } } } else if p.currentIs(token.SETTINGS) || (p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "SETTING") { // Both SETTINGS and SETTING (singular) are accepted diff --git a/parser/testdata/02129_add_column_add_ttl/metadata.json b/parser/testdata/02129_add_column_add_ttl/metadata.json index 6dc0aa1ce2..0967ef424b 100644 --- a/parser/testdata/02129_add_column_add_ttl/metadata.json +++ b/parser/testdata/02129_add_column_add_ttl/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt17": true, - "stmt7": true - } -} +{} From 8c9aa1739a5c99f5492af8fda8b9ccd074a541d3 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 3 Jan 2026 20:08:33 +0000 Subject: [PATCH 47/47] Trim whitespace in query parameter name and type parsing Parameters like {a1: Int32} with spaces after colon now correctly parse as name=a1 type=Int32 without leading/trailing spaces --- parser/expression.go | 4 ++-- .../02921_parameterized_view_except_queries/metadata.json | 7 +------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/parser/expression.go b/parser/expression.go index 10eca8a3f6..db39f77024 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -1739,9 +1739,9 @@ func (p *Parser) parseParameter() ast.Expression { // Parse {name:Type} format parts := strings.SplitN(value, ":", 2) - param.Name = parts[0] + param.Name = strings.TrimSpace(parts[0]) if len(parts) > 1 { - param.Type = &ast.DataType{Name: parts[1]} + param.Type = &ast.DataType{Name: strings.TrimSpace(parts[1])} } return param diff --git a/parser/testdata/02921_parameterized_view_except_queries/metadata.json b/parser/testdata/02921_parameterized_view_except_queries/metadata.json index 1cffc7878a..0967ef424b 100644 --- a/parser/testdata/02921_parameterized_view_except_queries/metadata.json +++ b/parser/testdata/02921_parameterized_view_except_queries/metadata.json @@ -1,6 +1 @@ -{ - "explain_todo": { - "stmt16": true, - "stmt21": true - } -} +{}