diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index 7c2a7e5..e3b4fbd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; +import java.util.Collection; import java.util.EnumSet; import java.util.HashSet; import java.util.List; @@ -119,11 +120,16 @@ public void onMatch(RelOptRuleCall call) { ImmutableList operands = RexUtil.flattenAnd(((RexCall) topFilterCondition) .getOperands()); Set operandsToPushDownDigest = new HashSet(); - List extractedCommonOperands = null; for (RexNode operand : operands) { if (operand.getKind() == SqlKind.OR) { - extractedCommonOperands = extractCommonOperands(rexBuilder, operand, maxCNFNodeCount); + Multimap commonOperands = extractCommonOperands( + rexBuilder, operand, maxCNFNodeCount); + List extractedCommonOperands = new ArrayList<>(); + for (Collection ops : commonOperands.asMap().values()) { + extractedCommonOperands.add( + RexUtil.composeDisjunction(rexBuilder, ops, false)); + } for (RexNode extractedExpr : extractedCommonOperands) { if (operandsToPushDownDigest.add(extractedExpr.toString())) { operandsToPushDown.add(extractedExpr); @@ -131,8 +137,6 @@ public void onMatch(RelOptRuleCall call) { } } - // TODO: Make expr traversal recursive. Extend to traverse inside - // elements of DNF/CNF & extract more deterministic pieces out. if (HiveCalciteUtil.isDeterministic(operand)) { deterministicExprs.add(operand); } else { @@ -158,7 +162,12 @@ public void onMatch(RelOptRuleCall call) { break; case OR: - operandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition, maxCNFNodeCount); + Multimap commonOperands = extractCommonOperands( + rexBuilder, topFilterCondition, maxCNFNodeCount); + for (Collection ops : commonOperands.asMap().values()) { + operandsToPushDown.add( + RexUtil.composeDisjunction(rexBuilder, ops, false)); + } break; default: return; @@ -194,7 +203,7 @@ public void onMatch(RelOptRuleCall call) { } - private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition, + private static Multimap extractCommonOperands(RexBuilder rexBuilder, RexNode condition, int maxCNFNodeCount) { assert condition.getKind() == SqlKind.OR; Multimap reductionCondition = LinkedHashMultimap.create(); @@ -216,11 +225,18 @@ public void onMatch(RelOptRuleCall call) { for (RexNode conjunction : conjunctions) { // We do not know what it is, we bail out for safety if (!(conjunction instanceof RexCall) || !HiveCalciteUtil.isDeterministic(conjunction)) { - return new ArrayList<>(); + return LinkedHashMultimap.create(); } RexCall conjCall = (RexCall) conjunction; RexNode ref = null; - if (COMPARISON.contains(conjCall.getOperator().getKind())) { + if (conjCall.getOperator().getKind().equals(SqlKind.OR)) { + Multimap common = extractCommonOperands(rexBuilder, conjCall, maxCNFNodeCount); + reductionCondition.putAll(common); + for (String stringRef : common.keySet()) { + refsInCurrentOperand.add(stringRef); + } + continue; + } else if (COMPARISON.contains(conjCall.getOperator().getKind())) { if (conjCall.operands.get(0) instanceof RexInputRef && conjCall.operands.get(1) instanceof RexLiteral) { ref = conjCall.operands.get(0); @@ -228,16 +244,16 @@ public void onMatch(RelOptRuleCall call) { && conjCall.operands.get(0) instanceof RexLiteral) { ref = conjCall.operands.get(1); } else { - // We do not know what it is, we bail out for safety - return new ArrayList<>(); + // Not useful, we continue + continue; } } else if (conjCall.getOperator().getKind().equals(SqlKind.IN)) { ref = conjCall.operands.get(0); } else if (conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) { ref = conjCall.operands.get(1); } else { - // We do not know what it is, we bail out for safety - return new ArrayList<>(); + // Not useful, we continue + continue; } String stringRef = ref.toString(); @@ -254,15 +270,14 @@ public void onMatch(RelOptRuleCall call) { // If we did not add any factor or there are no common factors, we can // bail out if (refsInAllOperands.isEmpty()) { - return new ArrayList<>(); + return LinkedHashMultimap.create(); } } // 2. We gather the common factors and return them - List commonOperands = new ArrayList<>(); + Multimap commonOperands = LinkedHashMultimap.create(); for (String ref : refsInAllOperands) { - commonOperands - .add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false)); + commonOperands.putAll(ref, reductionCondition.get(ref)); } return commonOperands; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 7c50155..d5be6a3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1144,20 +1144,15 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite"); } - // 2. Try factoring out common filter elements & separating deterministic + // 3. Run exhaustive PPD, add not null filters, transitive inference, + // constant propagation, constant folding + List rules = Lists.newArrayList(); + // Try factoring out common filter elements & separating deterministic // vs non-deterministic UDF. This needs to run before PPD so that PPD can // add on-clauses for old style Join Syntax // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.ARBITRARY, - new HivePreFilteringRule(maxCNFNodeCount)); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); - - // 3. Run exhaustive PPD, add not null filters, transitive inference, - // constant propagation, constant folding - List rules = Lists.newArrayList(); + rules.add(new HivePreFilteringRule(maxCNFNodeCount)); if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING)) { rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING); } else {