Skip to content

Commit f8d2efd

Browse files
authored
Simplify UNION ALL with identical expressions but different aliases (#69)
1 parent 3adedd2 commit f8d2efd

File tree

3 files changed

+103
-9
lines changed

3 files changed

+103
-9
lines changed

CLAUDE.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ This finds tests with `explain_todo` entries in their metadata.
1212

1313
## Running Tests
1414

15-
Always run parser tests with a 5 second timeout:
15+
Always run parser tests with a 10 second timeout:
1616

1717
```bash
18-
go test ./parser/... -timeout 5s
18+
go test ./parser/... -timeout 10s
1919
```
2020

2121
The tests are very fast. If a test is timing out, it indicates a bug (likely an infinite loop in the parser).

internal/explain/select.go

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,12 @@ func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuer
2020
}
2121
children := countSelectUnionChildren(n)
2222
fmt.Fprintf(sb, "%sSelectWithUnionQuery (children %d)\n", indent, children)
23+
// ClickHouse optimizes UNION ALL when selects have identical expressions but different aliases.
24+
// In that case, only the first SELECT is shown since column names come from the first SELECT anyway.
25+
selects := simplifyUnionSelects(n.Selects)
2326
// Wrap selects in ExpressionList
24-
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Selects))
25-
for _, sel := range n.Selects {
27+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(selects))
28+
for _, sel := range selects {
2629
Node(sb, sel, depth+2)
2730
}
2831
// INTO OUTFILE clause - check if any SelectQuery has IntoOutfile set
@@ -252,6 +255,101 @@ func countSelectUnionChildren(n *ast.SelectWithUnionQuery) int {
252255
return count
253256
}
254257

258+
// simplifyUnionSelects implements ClickHouse's UNION ALL optimization:
259+
// When all SELECT queries in a UNION have identical expressions (ignoring aliases)
260+
// but different aliases, only the first SELECT is returned.
261+
// This only applies when ALL columns in ALL SELECTs have explicit aliases.
262+
// If aliases are the same across all SELECTs, or if any column lacks an alias, all are kept.
263+
func simplifyUnionSelects(selects []ast.Statement) []ast.Statement {
264+
if len(selects) <= 1 {
265+
return selects
266+
}
267+
268+
// Check if all are simple SelectQuery with only literal columns
269+
var queries []*ast.SelectQuery
270+
for _, sel := range selects {
271+
sq, ok := sel.(*ast.SelectQuery)
272+
if !ok {
273+
// Not a simple SelectQuery, can't simplify
274+
return selects
275+
}
276+
// Only handle simple SELECT with just columns, no FROM/WHERE/etc.
277+
if sq.From != nil || sq.Where != nil || sq.GroupBy != nil ||
278+
sq.Having != nil || sq.OrderBy != nil || len(sq.With) > 0 {
279+
return selects
280+
}
281+
queries = append(queries, sq)
282+
}
283+
284+
// Check if all have the same number of columns
285+
numCols := len(queries[0].Columns)
286+
for _, q := range queries[1:] {
287+
if len(q.Columns) != numCols {
288+
return selects
289+
}
290+
}
291+
292+
// Check if columns are all literals with aliases
293+
// and compare expressions (without aliases) and aliases separately
294+
allSameAliases := true
295+
allSameExprs := true
296+
allHaveAliases := true
297+
298+
for colIdx := 0; colIdx < numCols; colIdx++ {
299+
firstAlias := ""
300+
firstExpr := ""
301+
302+
for i, q := range queries {
303+
col := q.Columns[colIdx]
304+
alias := ""
305+
exprStr := ""
306+
hasAlias := false
307+
308+
switch c := col.(type) {
309+
case *ast.AliasedExpr:
310+
alias = c.Alias
311+
hasAlias = c.Alias != ""
312+
// Get string representation of the expression
313+
if lit, ok := c.Expr.(*ast.Literal); ok {
314+
exprStr = fmt.Sprintf("%v", lit.Value)
315+
} else {
316+
// Non-literal expression, can't simplify
317+
return selects
318+
}
319+
case *ast.Literal:
320+
exprStr = fmt.Sprintf("%v", c.Value)
321+
hasAlias = false
322+
default:
323+
// Not a simple literal or aliased literal
324+
return selects
325+
}
326+
327+
if !hasAlias {
328+
allHaveAliases = false
329+
}
330+
331+
if i == 0 {
332+
firstAlias = alias
333+
firstExpr = exprStr
334+
} else {
335+
if alias != firstAlias {
336+
allSameAliases = false
337+
}
338+
if exprStr != firstExpr {
339+
allSameExprs = false
340+
}
341+
}
342+
}
343+
}
344+
345+
// If expressions are the same, all have aliases, but aliases differ, return only first SELECT
346+
if allSameExprs && allHaveAliases && !allSameAliases {
347+
return selects[:1]
348+
}
349+
350+
return selects
351+
}
352+
255353
func countSelectQueryChildren(n *ast.SelectQuery) int {
256354
count := 1 // columns ExpressionList
257355
// WITH clause
Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1 @@
1-
{
2-
"explain_todo": {
3-
"stmt1": true
4-
}
5-
}
1+
{}

0 commit comments

Comments
 (0)