diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index 7c2a7e5..4517845 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; -import java.util.EnumSet; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -54,12 +54,6 @@ .getLogger(HivePreFilteringRule.class .getName()); - private static final Set COMPARISON = EnumSet.of(SqlKind.EQUALS, - SqlKind.GREATER_THAN_OR_EQUAL, - SqlKind.LESS_THAN_OR_EQUAL, - SqlKind.GREATER_THAN, SqlKind.LESS_THAN, - SqlKind.NOT_EQUALS); - private final FilterFactory filterFactory; // Max number of nodes when converting to CNF @@ -119,11 +113,16 @@ public void onMatch(RelOptRuleCall call) { ImmutableList operands = RexUtil.flattenAnd(((RexCall) topFilterCondition) .getOperands()); Set operandsToPushDownDigest = new HashSet(); - List extractedCommonOperands = null; for (RexNode operand : operands) { if (operand.getKind() == SqlKind.OR) { - extractedCommonOperands = extractCommonOperands(rexBuilder, operand, maxCNFNodeCount); + Multimap commonOperands = extractCommonOperands( + rexBuilder, operand, maxCNFNodeCount); + List extractedCommonOperands = new ArrayList<>(); + for (Collection ops : commonOperands.asMap().values()) { + extractedCommonOperands.add( + RexUtil.composeDisjunction(rexBuilder, ops, false)); + } for (RexNode extractedExpr : extractedCommonOperands) { if (operandsToPushDownDigest.add(extractedExpr.toString())) { operandsToPushDown.add(extractedExpr); @@ -131,8 +130,6 @@ public void onMatch(RelOptRuleCall call) { } } - // TODO: Make expr traversal recursive. Extend to traverse inside - // elements of DNF/CNF & extract more deterministic pieces out. if (HiveCalciteUtil.isDeterministic(operand)) { deterministicExprs.add(operand); } else { @@ -158,7 +155,12 @@ public void onMatch(RelOptRuleCall call) { break; case OR: - operandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition, maxCNFNodeCount); + Multimap commonOperands = extractCommonOperands( + rexBuilder, topFilterCondition, maxCNFNodeCount); + for (Collection ops : commonOperands.asMap().values()) { + operandsToPushDown.add( + RexUtil.composeDisjunction(rexBuilder, ops, false)); + } break; default: return; @@ -194,7 +196,7 @@ public void onMatch(RelOptRuleCall call) { } - private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition, + private static Multimap extractCommonOperands(RexBuilder rexBuilder, RexNode condition, int maxCNFNodeCount) { assert condition.getKind() == SqlKind.OR; Multimap reductionCondition = LinkedHashMultimap.create(); @@ -216,28 +218,88 @@ public void onMatch(RelOptRuleCall call) { for (RexNode conjunction : conjunctions) { // We do not know what it is, we bail out for safety if (!(conjunction instanceof RexCall) || !HiveCalciteUtil.isDeterministic(conjunction)) { - return new ArrayList<>(); + return LinkedHashMultimap.create(); } RexCall conjCall = (RexCall) conjunction; RexNode ref = null; - if (COMPARISON.contains(conjCall.getOperator().getKind())) { - if (conjCall.operands.get(0) instanceof RexInputRef - && conjCall.operands.get(1) instanceof RexLiteral) { - ref = conjCall.operands.get(0); - } else if (conjCall.operands.get(1) instanceof RexInputRef - && conjCall.operands.get(0) instanceof RexLiteral) { - ref = conjCall.operands.get(1); - } else { - // We do not know what it is, we bail out for safety - return new ArrayList<>(); - } - } else if (conjCall.getOperator().getKind().equals(SqlKind.IN)) { - ref = conjCall.operands.get(0); - } else if (conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) { - ref = conjCall.operands.get(1); - } else { - // We do not know what it is, we bail out for safety - return new ArrayList<>(); + switch(conjCall.getKind()) { + case OR: + Multimap common = extractCommonOperands(rexBuilder, conjCall, maxCNFNodeCount); + reductionCondition.putAll(common); + for (String stringRef : common.keySet()) { + refsInCurrentOperand.add(stringRef); + } + continue; + case EQUALS: + case GREATER_THAN_OR_EQUAL: + case LESS_THAN_OR_EQUAL: + case GREATER_THAN: + case NOT_EQUALS: + if (conjCall.operands.get(0) instanceof RexInputRef + && conjCall.operands.get(1) instanceof RexLiteral) { + ref = conjCall.operands.get(0); + } else if (conjCall.operands.get(0).getKind() == SqlKind.CAST + && conjCall.operands.get(1) instanceof RexLiteral) { + RexCall left = (RexCall) conjCall.operands.get(0); + if (left.operands.get(0) instanceof RexInputRef) { + ref = left.operands.get(0); + } else { + // Not useful, we continue + continue; + } + } else if (conjCall.operands.get(1) instanceof RexInputRef + && conjCall.operands.get(0) instanceof RexLiteral) { + ref = conjCall.operands.get(1); + } else if (conjCall.operands.get(1).getKind() == SqlKind.CAST + && conjCall.operands.get(0) instanceof RexLiteral) { + RexCall right = (RexCall) conjCall.operands.get(1); + if (right.operands.get(0) instanceof RexInputRef) { + ref = right.operands.get(0); + } else { + // Not useful, we continue + continue; + } + } else { + // Not useful, we continue + continue; + } + break; + case IN: + case IS_NULL: + if (conjCall.operands.get(0) instanceof RexInputRef) { + ref = conjCall.operands.get(0); + } else if (conjCall.operands.get(0).getKind() == SqlKind.CAST) { + RexCall input = (RexCall) conjCall.operands.get(0); + if (input.operands.get(0) instanceof RexInputRef) { + ref = input.operands.get(0); + } else { + // Not useful, we continue + continue; + } + } else { + // Not useful, we continue + continue; + } + break; + case BETWEEN: + if (conjCall.operands.get(1) instanceof RexInputRef) { + ref = conjCall.operands.get(1); + } else if (conjCall.operands.get(1).getKind() == SqlKind.CAST) { + RexCall input = (RexCall) conjCall.operands.get(1); + if (input.operands.get(0) instanceof RexInputRef) { + ref = input.operands.get(0); + } else { + // Not useful, we continue + continue; + } + } else { + // Not useful, we continue + continue; + } + break; + default: + // Not useful, we continue + continue; } String stringRef = ref.toString(); @@ -254,15 +316,14 @@ public void onMatch(RelOptRuleCall call) { // If we did not add any factor or there are no common factors, we can // bail out if (refsInAllOperands.isEmpty()) { - return new ArrayList<>(); + return LinkedHashMultimap.create(); } } // 2. We gather the common factors and return them - List commonOperands = new ArrayList<>(); + Multimap commonOperands = LinkedHashMultimap.create(); for (String ref : refsInAllOperands) { - commonOperands - .add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false)); + commonOperands.putAll(ref, reductionCondition.get(ref)); } return commonOperands; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 7c50155..d5be6a3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1144,20 +1144,15 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite"); } - // 2. Try factoring out common filter elements & separating deterministic + // 3. Run exhaustive PPD, add not null filters, transitive inference, + // constant propagation, constant folding + List rules = Lists.newArrayList(); + // Try factoring out common filter elements & separating deterministic // vs non-deterministic UDF. This needs to run before PPD so that PPD can // add on-clauses for old style Join Syntax // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.ARBITRARY, - new HivePreFilteringRule(maxCNFNodeCount)); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); - - // 3. Run exhaustive PPD, add not null filters, transitive inference, - // constant propagation, constant folding - List rules = Lists.newArrayList(); + rules.add(new HivePreFilteringRule(maxCNFNodeCount)); if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING)) { rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING); } else {