diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java index f6b2d84..670ed6f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java @@ -17,16 +17,25 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import java.util.List; + +import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.hep.HepRelVertex; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.rules.JoinProjectTransposeRule; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule { + private boolean includeOuter; public static final HiveJoinProjectTransposeRule BOTH_PROJECT = new HiveJoinProjectTransposeRule( operand(HiveJoin.class, @@ -80,6 +89,72 @@ private HiveJoinProjectTransposeRule( RelOptRuleOperand operand, String description, boolean includeOuter, RelBuilderFactory relBuilderFactory) { super(operand, description, includeOuter, relBuilderFactory); + this.includeOuter = includeOuter; + } + + private boolean hasAggregateChildWithIndicator(RelNode source) { + if (source == null) { + return false; + } + if (source instanceof HepRelVertex && + ((HepRelVertex)(source)).getCurrentRel() instanceof HiveAggregate) { + return ((HiveAggregate)(((HepRelVertex)(source)).getCurrentRel())).indicator; + } + List inputs = source instanceof HepRelVertex ? + ((HepRelVertex)source).getCurrentRel().getInputs() : + source.getInputs(); + if (inputs == null) { + return false; + } + for (RelNode cr : inputs) { + if (hasAggregateChildWithIndicator(cr)) { + return true; + } + } + return false; + } + + @Override + public void onMatch(RelOptRuleCall call) { + Join joinRel = call.rel(0); + JoinRelType joinType = joinRel.getJoinType(); + + Project leftProj; + Project rightProj; + RelNode leftJoinChild; + RelNode rightJoinChild; + + // If 1) the rule works on outer joins, or + // 2) input's projection doesn't generate nulls + if (hasLeftChild(call) && (includeOuter || !joinType.generatesNullsOnLeft())) { + leftProj = call.rel(1); + leftJoinChild = getProjectChild(call, leftProj, true); + } else { + leftProj = null; + leftJoinChild = call.rel(1); + } + if (hasRightChild(call) && (includeOuter || !joinType.generatesNullsOnRight())) { + rightProj = getRightChild(call); + rightJoinChild = getProjectChild(call, rightProj, false); + } else { + rightProj = null; + rightJoinChild = joinRel.getRight(); + } + if ((leftProj == null) && (rightProj == null)) { + return; + } + + // If we have an aggregate below the project with an indicator field, + // we cannot do this transformation safely in hive because of the additional indicator + // fields in calcite. This will cause inconsistency in Hive's Join Operator's Rowschema. + // The RelFieldTrimmer does not remove the indicator fields. + // TODO: We can safely remove the below restriction and this overriden function once + // CALCITE-1069 is fixed. + if (hasAggregateChildWithIndicator(leftJoinChild) || + hasAggregateChildWithIndicator(rightJoinChild)) { + return; + } + super.onMatch(call); } }