diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9fd7dcab4c..69408f6de9 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2270,6 +2270,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Whether to transform OR clauses in Filter operators into IN clauses"), HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 2, "Minimum number of OR clauses needed to transform into IN clauses"), + HIVEOPT_TRANSFORM_IN_MAXNODES("hive.optimize.transform.in.maxnodes", 16, + "Maximum number of IN expressions beyond which IN will not be transformed into OR clause"), HIVECOUNTDISTINCTOPTIMIZER("hive.optimize.countdistinct", true, "Whether to transform count distinct into two stages"), HIVEPARTITIONCOLUMNSEPARATOR("hive.optimize.partition.columns.separate", true, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 100ee0b2d2..f97a39991f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZ; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; @@ -142,6 +143,7 @@ private InputCtx(RelDataType calciteInpDataType, ImmutableMap h private final RowResolver outerRR; private final ImmutableMap outerNameToPosMap; private int correlatedId; + private final HiveConf conf; //Constructor used by HiveRexExecutorImpl public RexNodeConverter(RelOptCluster cluster) { @@ -151,13 +153,15 @@ public RexNodeConverter(RelOptCluster cluster) { //subqueries will need outer query's row resolver public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, ImmutableMap outerNameToPosMap, - ImmutableMap nameToPosMap, RowResolver hiveRR, RowResolver outerRR, int offset, boolean flattenExpr, int correlatedId) { + ImmutableMap nameToPosMap, RowResolver hiveRR, RowResolver outerRR, + HiveConf conf, int offset, boolean flattenExpr, int correlatedId) { this.cluster = cluster; this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR, offset)); this.flattenExpr = flattenExpr; this.outerRR = outerRR; this.outerNameToPosMap = outerNameToPosMap; this.correlatedId = correlatedId; + this.conf = conf; } public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, @@ -167,6 +171,7 @@ public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, this.flattenExpr = flattenExpr; this.outerRR = null; this.outerNameToPosMap = null; + this.conf = null; } public RexNodeConverter(RelOptCluster cluster, List inpCtxLst, boolean flattenExpr) { @@ -175,6 +180,7 @@ public RexNodeConverter(RelOptCluster cluster, List inpCtxLst, boolean this.flattenExpr = flattenExpr; this.outerRR = null; this.outerNameToPosMap = null; + this.conf = null; } public RexNode convert(ExprNodeDesc expr) throws SemanticException { @@ -423,12 +429,24 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { // from IN [A,B] => EQUALS [A,B] // except complex types calciteOp = SqlStdOperatorTable.EQUALS; - } else if (RexUtil.isReferenceOrAccess(childRexNodeLst.get(0), true)) { + } else if (RexUtil.isReferenceOrAccess(childRexNodeLst.get(0), true)){ // if it is more than an single item in an IN clause, // transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]] // except complex types - childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst); - calciteOp = SqlStdOperatorTable.OR; + // Rewrite to OR is done only if number of operands are less than + // the threshold configured + boolean rewriteToOr = true; + if(conf != null) { + final long maxNodes = HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES); + if(childRexNodeLst.size() > maxNodes) { + rewriteToOr = false; + } + } + if(rewriteToOr) { + childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst); + calciteOp = SqlStdOperatorTable.OR; + } } } else if (calciteOp.getKind() == SqlKind.COALESCE && childRexNodeLst.size() > 1) { @@ -677,6 +695,9 @@ private RexNode makeCast(SqlTypeName typeName, final RexNode child) { private List rewriteInClauseChildren(SqlOperator op, List childRexNodeLst) throws SemanticException { assert op.getKind() == SqlKind.IN; + if(childRexNodeLst.size() > 100) { + return childRexNodeLst; + } RexNode firstPred = childRexNodeLst.get(0); List newChildRexNodeLst = new ArrayList(); for (int i = 1; i < childRexNodeLst.size(); i++) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 212d27a3bc..0e5d48e82b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -3195,7 +3195,7 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, .get(srcRel); RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), outerRR, - 0, true, subqueryId).convert(filterCondn); + conf,0, true, subqueryId).convert(filterCondn); RexNode factoredFilterExpr = RexUtil .pullFactors(cluster.getRexBuilder(), convertedFilterExpr); RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), @@ -3429,7 +3429,7 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, .get(srcRel); RexNode convertedFilterLHS = new RexNodeConverter(cluster, srcRel.getRowType(), outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), - outerRR, 0, true, subqueryId).convert(subQueryExpr); + outerRR, conf, 0, true, subqueryId).convert(subQueryExpr); RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), srcRel, convertedFilterLHS); @@ -4696,7 +4696,7 @@ private void setQueryHints(QB qb) throws SemanticException { RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(), outerNameToPosMap, buildHiveColNameToInputPosMap(col_list, inputRR), relToHiveRR.get(srcRel), - outerRR, 0, false, subqueryId); + outerRR, conf, 0, false, subqueryId); for (ExprNodeDesc colExpr : col_list) { calciteColLst.add(rexNodeConv.convert(colExpr)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 0c81986c84..a4c1b9ab38 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -1220,16 +1220,26 @@ protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, } outputOpList.add(nullConst); } + if (!ctx.isCBOExecuted()) { - ArrayList orOperands = TypeCheckProcFactoryUtils.rewriteInToOR(children); - if (orOperands != null) { - if (orOperands.size() == 1) { - orOperands.add(new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, false)); + + HiveConf conf; + try { + conf = Hive.get().getConf(); + } catch (HiveException e) { + throw new SemanticException(e); + } + if( children.size() <= HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES)) { + ArrayList orOperands = TypeCheckProcFactoryUtils.rewriteInToOR(children); + if (orOperands != null) { + if (orOperands.size() == 1) { + orOperands.add(new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, false)); + } + funcText = "or"; + genericUDF = new GenericUDFOPOr(); + children.clear(); + children.addAll(orOperands); } - funcText = "or"; - genericUDF = new GenericUDFOPOr(); - children.clear(); - children.addAll(orOperands); } } }