@@ -20,9 +20,12 @@ func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuer
2020 }
2121 children := countSelectUnionChildren (n )
2222 fmt .Fprintf (sb , "%sSelectWithUnionQuery (children %d)\n " , indent , children )
23+ // ClickHouse optimizes UNION ALL when selects have identical expressions but different aliases.
24+ // In that case, only the first SELECT is shown since column names come from the first SELECT anyway.
25+ selects := simplifyUnionSelects (n .Selects )
2326 // Wrap selects in ExpressionList
24- fmt .Fprintf (sb , "%s ExpressionList (children %d)\n " , indent , len (n . Selects ))
25- for _ , sel := range n . Selects {
27+ fmt .Fprintf (sb , "%s ExpressionList (children %d)\n " , indent , len (selects ))
28+ for _ , sel := range selects {
2629 Node (sb , sel , depth + 2 )
2730 }
2831 // INTO OUTFILE clause - check if any SelectQuery has IntoOutfile set
@@ -252,6 +255,101 @@ func countSelectUnionChildren(n *ast.SelectWithUnionQuery) int {
252255 return count
253256}
254257
258+ // simplifyUnionSelects implements ClickHouse's UNION ALL optimization:
259+ // When all SELECT queries in a UNION have identical expressions (ignoring aliases)
260+ // but different aliases, only the first SELECT is returned.
261+ // This only applies when ALL columns in ALL SELECTs have explicit aliases.
262+ // If aliases are the same across all SELECTs, or if any column lacks an alias, all are kept.
263+ func simplifyUnionSelects (selects []ast.Statement ) []ast.Statement {
264+ if len (selects ) <= 1 {
265+ return selects
266+ }
267+
268+ // Check if all are simple SelectQuery with only literal columns
269+ var queries []* ast.SelectQuery
270+ for _ , sel := range selects {
271+ sq , ok := sel .(* ast.SelectQuery )
272+ if ! ok {
273+ // Not a simple SelectQuery, can't simplify
274+ return selects
275+ }
276+ // Only handle simple SELECT with just columns, no FROM/WHERE/etc.
277+ if sq .From != nil || sq .Where != nil || sq .GroupBy != nil ||
278+ sq .Having != nil || sq .OrderBy != nil || len (sq .With ) > 0 {
279+ return selects
280+ }
281+ queries = append (queries , sq )
282+ }
283+
284+ // Check if all have the same number of columns
285+ numCols := len (queries [0 ].Columns )
286+ for _ , q := range queries [1 :] {
287+ if len (q .Columns ) != numCols {
288+ return selects
289+ }
290+ }
291+
292+ // Check if columns are all literals with aliases
293+ // and compare expressions (without aliases) and aliases separately
294+ allSameAliases := true
295+ allSameExprs := true
296+ allHaveAliases := true
297+
298+ for colIdx := 0 ; colIdx < numCols ; colIdx ++ {
299+ firstAlias := ""
300+ firstExpr := ""
301+
302+ for i , q := range queries {
303+ col := q .Columns [colIdx ]
304+ alias := ""
305+ exprStr := ""
306+ hasAlias := false
307+
308+ switch c := col .(type ) {
309+ case * ast.AliasedExpr :
310+ alias = c .Alias
311+ hasAlias = c .Alias != ""
312+ // Get string representation of the expression
313+ if lit , ok := c .Expr .(* ast.Literal ); ok {
314+ exprStr = fmt .Sprintf ("%v" , lit .Value )
315+ } else {
316+ // Non-literal expression, can't simplify
317+ return selects
318+ }
319+ case * ast.Literal :
320+ exprStr = fmt .Sprintf ("%v" , c .Value )
321+ hasAlias = false
322+ default :
323+ // Not a simple literal or aliased literal
324+ return selects
325+ }
326+
327+ if ! hasAlias {
328+ allHaveAliases = false
329+ }
330+
331+ if i == 0 {
332+ firstAlias = alias
333+ firstExpr = exprStr
334+ } else {
335+ if alias != firstAlias {
336+ allSameAliases = false
337+ }
338+ if exprStr != firstExpr {
339+ allSameExprs = false
340+ }
341+ }
342+ }
343+ }
344+
345+ // If expressions are the same, all have aliases, but aliases differ, return only first SELECT
346+ if allSameExprs && allHaveAliases && ! allSameAliases {
347+ return selects [:1 ]
348+ }
349+
350+ return selects
351+ }
352+
255353func countSelectQueryChildren (n * ast.SelectQuery ) int {
256354 count := 1 // columns ExpressionList
257355 // WITH clause
0 commit comments