diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java index 74de4c58660c..19198a52424b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java @@ -39,8 +39,10 @@ import org.apache.calcite.rex.RexUnknownAs; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.util.Pair; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.SearchTransformer; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; @@ -189,10 +191,14 @@ public RexNode visitCall(RexCall call) { newOperands.add(operand); } } - if (newOperands.size() == 1) { - return rexBuilder.makeLiteral(false); + switch (newOperands.size()) { + case 1: + return rexBuilder.makeLiteral(false); + case 2: + return rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, newOperands); + default: + return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands); } - return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands); } } else if (call.getOperands().get(0).getKind() == SqlKind.ROW) { // Struct @@ -269,6 +275,14 @@ public RexNode visitCall(RexCall call) { } } } + } else if (call.getKind() == SqlKind.SEARCH) { + // TODO process SEARCH as is? + RexNode expanded = new SearchTransformer<> (rexBuilder, call, RexUnknownAs.UNKNOWN).transform(); + RexNode processed = expanded.accept(this); + if (expanded != processed) { + return processed; + } + return call; } // If we did not reduce, check the children nodes diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index ab9eb330b3a5..f315cb62aed2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -54,7 +54,10 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZ; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; @@ -117,6 +120,16 @@ public class RexNodeConverter { private final RexBuilder rexBuilder; private final RelDataTypeFactory typeFactory; + private static final int MAX_NODES_FOR_IN_TO_OR_TRANSFORMATION; + + static { + try { + MAX_NODES_FOR_IN_TO_OR_TRANSFORMATION = HiveConf.getIntVar( + Hive.get().getConf(), HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES); + } catch (HiveException e) { + throw new IllegalStateException(e); + } + } /** * Constructor used by HiveRexExecutorImpl. @@ -259,19 +272,12 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { // If it is a floor operator, we need to rewrite it childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst, rexBuilder); } else if (HiveIn.INSTANCE.equals(calciteOp) && isAllPrimitive) { - if (childRexNodeLst.size() == 2) { - // if it is a single item in an IN clause, transform A IN (B) to A = B - // from IN [A,B] => EQUALS [A,B] - // except complex types - calciteOp = SqlStdOperatorTable.EQUALS; - } else if (RexUtil.isReferenceOrAccess(childRexNodeLst.get(0), true)){ - // if it is more than an single item in an IN clause, - // transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]] - // except complex types - // Rewrite to OR is done only if number of operands are less than - // the threshold configured - childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst, rexBuilder); - calciteOp = SqlStdOperatorTable.OR; + if (childRexNodeLst.size() == 2 || RexUtil.isReferenceOrAccess(childRexNodeLst.get(0), true)) { + RexNode rewritten = rewriteInClause(childRexNodeLst, rexBuilder); + assert rewritten instanceof RexCall; + RexCall call = (RexCall) rewritten; + calciteOp = call.op; + childRexNodeLst = call.operands; } } else if (calciteOp.getKind() == SqlKind.COALESCE && childRexNodeLst.size() > 1) { @@ -577,17 +583,43 @@ public static List transformInToOrOperands(List operands, RexB return disjuncts; } - public static List rewriteInClauseChildren(SqlOperator op, List childRexNodeLst, - RexBuilder rexBuilder) throws SemanticException { - assert op == HiveIn.INSTANCE; - RexNode firstPred = childRexNodeLst.get(0); - List newChildRexNodeLst = new ArrayList(); - for (int i = 1; i < childRexNodeLst.size(); i++) { - newChildRexNodeLst.add( - rexBuilder.makeCall( - SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); + /** + * This method tries to rewrite IN expression arguments into an equivalent call. + * If there are only two elements, generates an EQUALS: + * IN [A,B] => EQUALS [A,B] + * Otherwise, tries to generate a SEARCH: + * IN [A,B,C] => SEARCH(A, SARG([B..B], [C..C])) + * If this is not possible (e.g., argument types not sufficiently compatible to generate a Calcite SEARCH expression), + * tries to generate an OR expression: + * IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]] + * If this is not possible (e.g., non-deterministic calls are found in the expressions), returns null. + */ + public static RexNode rewriteInClause(List childRexNodeLst, RexBuilder rexBuilder) { + if (childRexNodeLst.size() == 2) { + return rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, childRexNodeLst); } - return newChildRexNodeLst; + + RexNode arg = childRexNodeLst.get(0); + List ranges = childRexNodeLst.subList(1, childRexNodeLst.size()); + // Avoid SEARCH on rows for the moment (it can lead to issues in Calcite), and check all types are SEARCH-compatible + if (!arg.getType().isStruct() && ranges.stream().allMatch(range -> range.getKind() == SqlKind.LITERAL + && !RexLiteral.isNullLiteral(range) + && SqlTypeUtil.inSameFamily(arg.getType(), range.getType()))) { + RexNode search = rexBuilder.makeIn(arg, ranges); + if (search.getKind() != SqlKind.SEARCH) { + throw new AssertionError("SEARCH creation failed: " + search); + } + return search; + } + + // Calcite SEARCH conversion was not possible: generate our own OR expression + if (MAX_NODES_FOR_IN_TO_OR_TRANSFORMATION == 0 || childRexNodeLst.size() <= MAX_NODES_FOR_IN_TO_OR_TRANSFORMATION) { + List newInputs = RexNodeConverter.transformInToOrOperands(childRexNodeLst, rexBuilder); + if (newInputs != null) { + return newInputs.size() == 1 ? newInputs.get(0) : rexBuilder.makeCall(SqlStdOperatorTable.OR, newInputs); + } + } + return null; } public static List rewriteCoalesceChildren( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java index 556388c71914..affc07032afb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java @@ -38,12 +38,9 @@ import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.util.Util; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.HiveFunctionInfo; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; @@ -102,16 +99,9 @@ public class HiveFunctionHelper implements FunctionHelper { private static final Logger LOG = LoggerFactory.getLogger(HiveFunctionHelper.class); private final RexBuilder rexBuilder; - private final int maxNodesForInToOrTransformation; public HiveFunctionHelper(RexBuilder rexBuilder) { this.rexBuilder = rexBuilder; - try { - this.maxNodesForInToOrTransformation = HiveConf.getIntVar( - Hive.get().getConf(), HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES); - } catch (HiveException e) { - throw new IllegalStateException(e); - } } /** @@ -267,28 +257,12 @@ public RexNode getExpression(String functionText, FunctionInfo fi, // If it is a floor operator, we need to rewrite it inputs = RexNodeConverter.rewriteFloorDateChildren(calciteOp, inputs, rexBuilder); } else if (HiveIn.INSTANCE.equals(calciteOp)) { - // if it is a single item in an IN clause, transform A IN (B) to A = B - // from IN [A,B] => EQUALS [A,B] - // if it is more than an single item in an IN clause, - // transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]] - // Rewrite to OR is done only if number of operands are less than - // the threshold configured - boolean rewriteToOr = true; - if(maxNodesForInToOrTransformation != 0) { - if(inputs.size() > maxNodesForInToOrTransformation) { - rewriteToOr = false; - } - } - if(rewriteToOr) { - // If there are non-deterministic functions, we cannot perform this rewriting - List newInputs = RexNodeConverter.transformInToOrOperands(inputs, rexBuilder); - if (newInputs != null) { - inputs = newInputs; - if (inputs.size() == 1) { - inputs.add(rexBuilder.makeLiteral(false)); - } - calciteOp = SqlStdOperatorTable.OR; - } + RexNode rewritten = RexNodeConverter.rewriteInClause(inputs, rexBuilder); + if (rewritten != null) { + assert rewritten instanceof RexCall; + RexCall call = (RexCall) rewritten; + calciteOp = call.op; + inputs = call.operands; } } else if (calciteOp.getKind() == SqlKind.COALESCE && inputs.size() > 1) {