diff --git a/pom.xml b/pom.xml index e061f64fc1..3358f01f83 100644 --- a/pom.xml +++ b/pom.xml @@ -127,7 +127,7 @@ 1.12.0 1.8.2 0.8.0.RELEASE - 1.19.0 + 1.21.0 4.2.4 4.1.17 4.1.19 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index d96b1dc022..879e77cd69 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -36,7 +36,6 @@ import org.apache.calcite.rel.core.RelFactories.FilterFactory; import org.apache.calcite.rel.core.RelFactories.JoinFactory; import org.apache.calcite.rel.core.RelFactories.ProjectFactory; -import org.apache.calcite.rel.core.RelFactories.SemiJoinFactory; import org.apache.calcite.rel.core.RelFactories.SetOpFactory; import org.apache.calcite.rel.core.RelFactories.SortFactory; import org.apache.calcite.rel.type.RelDataType; @@ -72,9 +71,6 @@ public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl(); - public static final SemiJoinFactory HIVE_SEMI_JOIN_FACTORY = - new HiveSemiJoinFactoryImpl(); - public static final SortFactory HIVE_SORT_FACTORY = new HiveSortFactoryImpl(); @@ -89,7 +85,6 @@ Contexts.of(HIVE_PROJECT_FACTORY, HIVE_FILTER_FACTORY, HIVE_JOIN_FACTORY, - HIVE_SEMI_JOIN_FACTORY, HIVE_SORT_FACTORY, HIVE_AGGREGATE_FACTORY, HIVE_SET_OP_FACTORY)); @@ -123,8 +118,9 @@ public RelNode createProject(RelNode child, * . */ private static class HiveFilterFactoryImpl implements FilterFactory { + // XXX: SJC variablesSet is ignored? @Override - public RelNode createFilter(RelNode child, RexNode condition) { + public RelNode createFilter(RelNode child, RexNode condition, Set variablesSet) { RelOptCluster cluster = child.getCluster(); HiveFilter filter = new HiveFilter(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition); return filter; @@ -161,26 +157,15 @@ public RelNode createJoin(RelNode left, RelNode right, RexNode condition, Set variablesSet, JoinRelType joinType, boolean semiJoinDone) { // According to calcite, it is going to be removed before Calcite-2.0 // TODO: to handle CorrelationId + if (joinType == JoinRelType.SEMI) { + final JoinInfo joinInfo = JoinInfo.of(left, right, condition); + final RelOptCluster cluster = left.getCluster(); + return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition); + } return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType); } } - /** - * Implementation of {@link SemiJoinFactory} that returns - * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin} - * . - */ - private static class HiveSemiJoinFactoryImpl implements SemiJoinFactory { - @Override - public RelNode createSemiJoin(RelNode left, RelNode right, - RexNode condition) { - final JoinInfo joinInfo = JoinInfo.of(left, right, condition); - final RelOptCluster cluster = left.getCluster(); - return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition, - joinInfo.leftKeys, joinInfo.rightKeys); - } - } - private static class HiveSortFactoryImpl implements SortFactory { @Override public RelNode createSort(RelTraitSet traits, RelNode input, RelCollation collation, @@ -197,15 +182,11 @@ public RelNode createSort(RelNode input, RelCollation collation, RexNode offset, private static class HiveAggregateFactoryImpl implements AggregateFactory { @Override - public RelNode createAggregate(RelNode child, boolean indicator, + public RelNode createAggregate(RelNode child, ImmutableBitSet groupSet, ImmutableList groupSets, List aggCalls) { - if (indicator) { - throw new IllegalStateException("Hive does not support indicator columns but Calcite " - + "created an Aggregate operator containing them"); - } - return new HiveAggregate(child.getCluster(), child.getTraitSet(), child, - groupSet, groupSets, aggCalls); + return new HiveAggregate(child.getCluster(), child.getTraitSet(), child, + groupSet, groupSets, aggCalls); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java index 05d1dc6cf2..d72e8ff365 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java @@ -50,7 +50,6 @@ import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.server.CalciteServerStatement; -import org.apache.calcite.sql.SemiJoinType; import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; @@ -133,7 +132,6 @@ public String apply(RexNode input) { private final RelFactories.SortFactory sortFactory; private final RelFactories.SetOpFactory setOpFactory; private final RelFactories.JoinFactory joinFactory; - private final RelFactories.SemiJoinFactory semiJoinFactory; private final RelFactories.CorrelateFactory correlateFactory; private final RelFactories.ValuesFactory valuesFactory; private final RelFactories.TableScanFactory scanFactory; @@ -164,9 +162,6 @@ public HiveSubQRemoveRelBuilder(Context context, RelOptCluster cluster, this.joinFactory = Util.first(context.unwrap(RelFactories.JoinFactory.class), HiveRelFactories.HIVE_JOIN_FACTORY); - this.semiJoinFactory = - Util.first(context.unwrap(RelFactories.SemiJoinFactory.class), - HiveRelFactories.HIVE_SEMI_JOIN_FACTORY); this.correlateFactory = Util.first(context.unwrap(RelFactories.CorrelateFactory.class), RelFactories.DEFAULT_CORRELATE_FACTORY); @@ -1141,11 +1136,10 @@ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, } if(createSemiJoin) { join = correlateFactory.createCorrelate(left.rel, right.rel, id, - requiredColumns, SemiJoinType.SEMI); + requiredColumns, JoinRelType.SEMI); } else { join = correlateFactory.createCorrelate(left.rel, right.rel, id, - requiredColumns, SemiJoinType.of(joinType)); - + requiredColumns, joinType); } } else { join = joinFactory.createJoin(left.rel, right.rel, condition, @@ -1186,21 +1180,6 @@ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, String... fieldNames) return join(joinType, conditions); } - /** Creates a {@link org.apache.calcite.rel.core.SemiJoin}. */ - public HiveSubQRemoveRelBuilder semiJoin(Iterable conditions) { - final Frame right = stack.pop(); - final Frame left = stack.pop(); - final RelNode semiJoin = - semiJoinFactory.createSemiJoin(left.rel, right.rel, and(conditions)); - stack.push(new Frame(semiJoin, left.right)); - return this; - } - - /** Creates a {@link org.apache.calcite.rel.core.SemiJoin}. */ - public HiveSubQRemoveRelBuilder semiJoin(RexNode... conditions) { - return semiJoin(ImmutableList.copyOf(conditions)); - } - /** Assigns a table alias to the top entry on the stack. */ public HiveSubQRemoveRelBuilder as(String alias) { final Frame pair = stack.pop(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java index 50466e001b..6b841a5a29 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java @@ -53,14 +53,9 @@ public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, @Override public Aggregate copy(RelTraitSet traitSet, RelNode input, - boolean indicator, ImmutableBitSet groupSet, + ImmutableBitSet groupSet, List groupSets, List aggCalls) { - if (indicator) { - throw new IllegalStateException("Hive does not support indicator columns but tried " - + "to create an Aggregate operator containing them"); - } - return new HiveAggregate(getCluster(), traitSet, input, - groupSet, groupSets, aggCalls); + return new HiveAggregate(getCluster(), traitSet, input, groupSet, groupSets, aggCalls); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java index d70ead428d..23ff5e9c0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java @@ -24,9 +24,9 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.InvalidRelException; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.ImmutableIntList; @@ -35,8 +35,9 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; -public class HiveSemiJoin extends SemiJoin implements HiveRelNode { +public class HiveSemiJoin extends Join implements HiveRelNode { private final RexNode joinFilter; @@ -46,12 +47,9 @@ public static HiveSemiJoin getSemiJoin( RelTraitSet traitSet, RelNode left, RelNode right, - RexNode condition, - ImmutableIntList leftKeys, - ImmutableIntList rightKeys) { + RexNode condition) { try { - HiveSemiJoin semiJoin = new HiveSemiJoin(cluster, traitSet, left, right, - condition, leftKeys, rightKeys); + HiveSemiJoin semiJoin = new HiveSemiJoin(cluster, traitSet, left, right, condition); return semiJoin; } catch (InvalidRelException | CalciteSemanticException e) { throw new RuntimeException(e); @@ -62,10 +60,9 @@ protected HiveSemiJoin(RelOptCluster cluster, RelTraitSet traitSet, RelNode left, RelNode right, - RexNode condition, - ImmutableIntList leftKeys, - ImmutableIntList rightKeys) throws InvalidRelException, CalciteSemanticException { - super(cluster, traitSet, left, right, condition, leftKeys, rightKeys); + RexNode condition) throws InvalidRelException, CalciteSemanticException { +// XXX: SJC: make sure last param of vsriablesSet is ok + super(cluster, traitSet, left, right, condition, JoinRelType.SEMI, Sets.newHashSet()); final List systemFieldList = ImmutableList.of(); List> joinKeyExprs = new ArrayList>(); List filterNulls = new ArrayList(); @@ -81,12 +78,11 @@ public RexNode getJoinFilter() { } @Override - public SemiJoin copy(RelTraitSet traitSet, RexNode condition, + public HiveSemiJoin copy(RelTraitSet traitSet, RexNode condition, RelNode left, RelNode right, JoinRelType joinType, boolean semiJoinDone) { try { final JoinInfo joinInfo = JoinInfo.of(left, right, condition); - HiveSemiJoin semijoin = new HiveSemiJoin(getCluster(), traitSet, left, right, condition, - joinInfo.leftKeys, joinInfo.rightKeys); + HiveSemiJoin semijoin = new HiveSemiJoin(getCluster(), traitSet, left, right, condition); // If available, copy state to registry for optimization rules HiveRulesRegistry registry = semijoin.getCluster().getPlanner().getContext().unwrap(HiveRulesRegistry.class); if (registry != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index ed6659c6cc..b7f4ae8504 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -100,7 +100,7 @@ public void onMatch(RelOptRuleCall call) { // If it is not an inner join, we do not push the // aggregate operator - if (join.getJoinType() != JoinRelType.INNER) { + if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java index 4e66de3812..b2ff255d7c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java @@ -91,8 +91,8 @@ public void onMatch(RelOptRuleCall call) { Set leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0)); Set rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1)); - boolean genPredOnLeft = join.getJoinType() == JoinRelType.RIGHT || join.getJoinType() == JoinRelType.INNER; - boolean genPredOnRight = join.getJoinType() == JoinRelType.LEFT || join.getJoinType() == JoinRelType.INNER; + boolean genPredOnLeft = join.getJoinType() == JoinRelType.RIGHT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin(); + boolean genPredOnRight = join.getJoinType() == JoinRelType.LEFT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin(); RexNode newLeftPredicate = getNewPredicate(join, registry, joinPredInfo, leftPushedPredicates, genPredOnLeft, 0); RexNode newRightPredicate = getNewPredicate(join, registry, joinPredInfo, rightPushedPredicates, genPredOnRight, 1); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java index c735df81eb..a657d1313f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java @@ -117,7 +117,7 @@ public void onMatch(RelOptRuleCall call) { // These boolean values represent corresponding left, right input which is potential FK boolean leftInputPotentialFK = topRefs.intersects(leftBits); boolean rightInputPotentialFK = topRefs.intersects(rightBits); - if (leftInputPotentialFK && rightInputPotentialFK && joinType == JoinRelType.INNER) { + if (leftInputPotentialFK && rightInputPotentialFK && (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) { // Both inputs are referenced. Before making a decision, try to swap // references in join condition if it is an inner join, i.e. if a join // condition column is referenced above the join, then we can just @@ -198,6 +198,7 @@ public void onMatch(RelOptRuleCall call) { final Mode mode; switch (joinType) { + case SEMI: case INNER: if (leftInputPotentialFK && rightInputPotentialFK) { // Bails out as it references columns from both sides (or no columns) @@ -283,4 +284,4 @@ public void onMatch(RelOptRuleCall call) { // Transforms LEFT/RIGHT outer join into INNER join TRANSFORM } -} \ No newline at end of file +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java index ea5b06c0d4..19f8f78175 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java @@ -215,12 +215,12 @@ private static RelNode mergeJoin(HiveJoin join, RelNode left, RelNode right) { keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs)); ImmutableBitSet rightReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs, newInputs.size())); - if (join.getJoinType() != JoinRelType.INNER && + if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin() && (leftReferencedInputs.cardinality() > 1 || rightReferencedInputs.cardinality() > 1)) { return null; } // Otherwise, we add to the join specs - if (join.getJoinType() != JoinRelType.INNER) { + if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) { int leftInput = keysInInputs.nextSetBit(0); int rightInput = keysInInputs.nextSetBit(numberLeftInputs); joinInputs.add(Pair.of(leftInput, rightInput)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java index 38759c0525..545255cf7c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java @@ -24,7 +24,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.rules.PushProjector; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; @@ -77,7 +76,7 @@ public void onMatch(RelOptRuleCall call) { Project origProj = call.rel(0); final Join join = call.rel(1); - if (join instanceof SemiJoin) { + if (join.isSemiJoin()) { return; // TODO: support SemiJoin } // locate all fields referenced in the projection and join condition; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 86b79140c6..a89c05b203 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -76,7 +76,6 @@ import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; -import org.apache.calcite.sql.SemiJoinType; import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; @@ -503,9 +502,12 @@ public Frame decorrelateRel(Values rel) { * @param rel Aggregate to rewrite */ public Frame decorrelateRel(Aggregate rel) throws SemanticException{ + //XXX: SJC FIX THIS +/* if (rel.getGroupType() != Aggregate.Group.SIMPLE) { throw new AssertionError(Bug.CALCITE_461_FIXED); } +*/ // // Rewrite logic: // @@ -688,9 +690,12 @@ private static RexLiteral projectedLiteral(RelNode rel, int i) { } public Frame decorrelateRel(HiveAggregate rel) throws SemanticException{ +//XXX: SJC FIX THIS +/* if (rel.getGroupType() != Aggregate.Group.SIMPLE) { throw new AssertionError(Bug.CALCITE_461_FIXED); } +*/ // // Rewrite logic: // @@ -1248,7 +1253,7 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException { } if(oldInput instanceof LogicalCorrelate - && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI + && ((LogicalCorrelate) oldInput).getJoinType() == JoinRelType.SEMI && !cm.mapRefRelToCorRef.containsKey(rel)) { // this conditions need to be pushed into semi-join since this condition // corresponds to IN @@ -1261,7 +1266,7 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException { RexUtil.composeConjunction(rexBuilder, conditions, false); RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), - join.getLeft(), join.getRight(), condition, join.getLeftKeys(), join.getRightKeys()); + join.getLeft(), join.getRight(), condition); return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs); } @@ -1320,7 +1325,7 @@ public Frame decorrelateRel(Filter rel) { } if(oldInput instanceof LogicalCorrelate - && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI + && ((LogicalCorrelate) oldInput).getJoinType() == JoinRelType.SEMI && !cm.mapRefRelToCorRef.containsKey(rel)) { // this conditions need to be pushed into semi-join since this condition // corresponds to IN @@ -1332,7 +1337,7 @@ public Frame decorrelateRel(Filter rel) { final RexNode condition = RexUtil.composeConjunction(rexBuilder, conditions, false); RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), - join.getLeft(), join.getRight(), condition, join.getLeftKeys(), join.getRightKeys()); + join.getLeft(), join.getRight(), condition); return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs); } @@ -1455,14 +1460,13 @@ public Frame decorrelateRel(LogicalCorrelate rel) { RelNode newJoin = null; // this indicates original query was either correlated EXISTS or IN - if(rel.getJoinType() == SemiJoinType.SEMI) { + if(rel.getJoinType() == JoinRelType.SEMI) { final List leftKeys = new ArrayList(); final List rightKeys = new ArrayList(); RelNode[] inputRels = new RelNode[] {leftFrame.r, rightFrame.r}; newJoin = HiveSemiJoin.getSemiJoin(rel.getCluster(), - rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r, - condition, ImmutableIntList.copyOf(leftKeys), ImmutableIntList.copyOf(rightKeys)); + rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r, condition); } else { // Right input positions are shifted by newLeftFieldCount. @@ -1473,7 +1477,7 @@ public Frame decorrelateRel(LogicalCorrelate rel) { } newJoin = relBuilder.push(leftFrame.r).push(rightFrame.r) - .join(rel.getJoinType().toJoinType(), condition).build(); + .join(rel.getJoinType(), condition).build(); } valueGen.pop(); @@ -1720,7 +1724,7 @@ private RelNode aggregateCorrelatorOutput( Project project, Set isCount) { final RelNode left = correlate.getLeft(); - final JoinRelType joinType = correlate.getJoinType().toJoinType(); + final JoinRelType joinType = correlate.getJoinType(); // now create the new project final List> newProjects = Lists.newArrayList(); @@ -2258,10 +2262,10 @@ public void onMatch(RelOptRuleCall call) { // Aggregate (groupby (0) single_value()) // Project-A (may reference coVar) // RightInputRel - if(correlate.getJoinType() != SemiJoinType.LEFT) { + if(correlate.getJoinType() != JoinRelType.LEFT) { return; } - final JoinRelType joinType = correlate.getJoinType().toJoinType(); + final JoinRelType joinType = correlate.getJoinType(); // corRel.getCondition was here, however Correlate was updated so it // never includes a join condition. The code was not modified for brevity. @@ -2470,11 +2474,11 @@ public void onMatch(RelOptRuleCall call) { return; } - if(correlate.getJoinType() != SemiJoinType.LEFT) { + if(correlate.getJoinType() != JoinRelType.LEFT) { return; } - final JoinRelType joinType = correlate.getJoinType().toJoinType(); + final JoinRelType joinType = correlate.getJoinType(); // corRel.getCondition was here, however Correlate was updated so it // never includes a join condition. The code was not modified for brevity. RexNode joinCond = rexBuilder.makeLiteral(true); @@ -2877,11 +2881,11 @@ private void onMatch2( return; } - if(correlate.getJoinType() != SemiJoinType.LEFT) { + if(correlate.getJoinType() != JoinRelType.LEFT) { return; } - JoinRelType joinType = correlate.getJoinType().toJoinType(); + JoinRelType joinType = correlate.getJoinType(); // corRel.getCondition was here, however Correlate was updated so it // never includes a join condition. The code was not modified for brevity. RexNode joinCond = rexBuilder.makeLiteral(true); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java index 4992e702f9..82704a2ebd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java @@ -51,9 +51,8 @@ public HiveRemoveGBYSemiJoinRule() { @Override public void onMatch(RelOptRuleCall call) { final HiveSemiJoin semijoin= call.rel(0); - if(semijoin.getJoinType() != JoinRelType.INNER) { - return; - } + assert semijoin.getJoinType() == JoinRelType.SEMI; + final RelNode left = call.rel(1); final Aggregate rightAggregate= call.rel(2); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java index e63f163b24..7842b12b52 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java @@ -118,7 +118,7 @@ protected void perform(final RelOptRuleCall call, final ImmutableBitSet topRefs, call.transformTo(topOperator.copy(topOperator.getTraitSet(), ImmutableList.of(left))); return; } - if (join.getJoinType() != JoinRelType.INNER) { + if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java index b2b2f3c2d8..ab7f3b086c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java @@ -25,7 +25,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdDistinctRowCount; @@ -88,7 +87,7 @@ public Double getDistinctRowCount(Join rel, RelMetadataQuery mq, ImmutableBitSet if (rel instanceof HiveJoin) { HiveJoin hjRel = (HiveJoin) rel; //TODO: Improve this - if (rel instanceof SemiJoin) { + if (((HiveJoin)rel).isSemiJoin()) { return mq.getDistinctRowCount(hjRel.getLeft(), groupKey, rel.getCluster().getRexBuilder().makeLiteral(true)); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index a137bdf94f..c7fb338f9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -39,7 +39,6 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.core.Union; import org.apache.calcite.rel.metadata.BuiltInMetadata; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; @@ -316,7 +315,7 @@ public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) { public JoinConditionBasedPredicateInference(Join joinRel, RexNode lPreds, RexNode rPreds) { - this(joinRel, joinRel instanceof SemiJoin, lPreds, rPreds); + this(joinRel, (joinRel instanceof Join) && ((Join) joinRel).isSemiJoin(), lPreds, rPreds); } private JoinConditionBasedPredicateInference(Join joinRel, boolean isSemiJoin, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index d881fbd952..63530d3538 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -30,7 +30,6 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; @@ -75,6 +74,9 @@ protected HiveRelMdRowCount() { @Override public Double getRowCount(Join join, RelMetadataQuery mq) { + if (join.isSemiJoin()) { + return getRowCountSemi(join, mq); + } // Try to infer from constraints first final Pair constraintBasedResult = constraintsBasedAnalyzeJoinForPKFK(join, mq); @@ -117,8 +119,7 @@ public Double getRowCount(Join join, RelMetadataQuery mq) { return rowCount; } - @Override - public Double getRowCount(SemiJoin rel, RelMetadataQuery mq) { + public Double getRowCountSemi(Join rel, RelMetadataQuery mq) { PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, mq); if (pkfk != null) { double selectivity = pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor; @@ -249,7 +250,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery // @todo: remove this. 8/28/14 hb // for now adding because RelOptUtil.classifyFilters has an assertion about // column counts that is not true for semiJoins. - if (joinRel instanceof SemiJoin) { + if (joinRel.isSemiJoin()) { return null; } @@ -356,7 +357,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery */ public static Pair constraintsBasedAnalyzeJoinForPKFK(Join join, RelMetadataQuery mq) { - if (join instanceof SemiJoin) { + if (join.isSemiJoin()) { // TODO: Support semijoin return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index f6a6cf4f03..83f89b8a60 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -25,7 +25,6 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdSelectivity; import org.apache.calcite.rel.metadata.RelMdUtil; @@ -143,7 +142,7 @@ private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode ndvEstimate = exponentialBackoff(peLst, colStatMap); } - if (j instanceof SemiJoin) { + if (j.isSemiJoin()) { ndvEstimate = Math.min(mq.getRowCount(j.getLeft()), ndvEstimate); } else if (j instanceof HiveJoin) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java index 893cb9975c..fd3553d5f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java @@ -20,7 +20,6 @@ import java.util.List; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdSize; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -31,6 +30,7 @@ import org.apache.calcite.util.ImmutableNullableList; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.plan.ColStatistics; @@ -78,8 +78,7 @@ private HiveRelMdSize() {} return list.build(); } - @Override - public List averageColumnSizes(SemiJoin rel, RelMetadataQuery mq) { + public List averageColumnSizes(HiveSemiJoin rel, RelMetadataQuery mq) { final RelNode left = rel.getLeft(); final List lefts = mq.getAverageColumnSizes(left); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index c4c771e490..adcab91504 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -155,11 +155,13 @@ public static ASTNode table(final RelNode scan) { return b.node(); } + //XXX: SJC is semiJoin easy to factor out? public static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond, boolean semiJoin) { ASTBuilder b = null; switch (joinType) { + case SEMI: case INNER: if (semiJoin) { b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 8382f25548..5460a55441 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -36,7 +36,6 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableFunctionScan; import org.apache.calcite.rel.core.TableScan; @@ -367,7 +366,7 @@ private QueryBlockInfo convertSource(RelNode r) throws CalciteSemanticException QueryBlockInfo right = convertSource(join.getRight()); s = new Schema(left.schema, right.schema); ASTNode cond = join.getCondition().accept(new RexVisitor(s, false, r.getCluster().getRexBuilder())); - boolean semiJoin = join instanceof SemiJoin; + boolean semiJoin = join.isSemiJoin(); if (join.getRight() instanceof Join && !semiJoin) { // should not be done for semijoin since it will change the semantics // Invert join inputs; this is done because otherwise the SemanticAnalyzer diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index d1585c2901..daaa5cbb28 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -37,7 +37,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; @@ -175,8 +174,6 @@ OpAttr dispatch(RelNode rn) throws SemanticException { return visit((HiveMultiJoin) rn); } else if (rn instanceof HiveJoin) { return visit((HiveJoin) rn); - } else if (rn instanceof SemiJoin) { - return visit((SemiJoin)rn); } else if (rn instanceof HiveFilter) { return visit((HiveFilter) rn); } else if (rn instanceof HiveSortLimit) { @@ -333,10 +330,6 @@ OpAttr visit(HiveJoin joinRel) throws SemanticException { } - OpAttr visit(SemiJoin joinRel) throws SemanticException { - return translateJoin(joinRel); - } - private String getHiveDerivedTableAlias() { return "$hdt$_" + (this.uniqueCounter++); } @@ -365,8 +358,8 @@ private OpAttr translateJoin(RelNode joinRel) throws SemanticException { // 3. Virtual columns Set newVcolsInCalcite = new HashSet(); newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite); - if (joinRel instanceof HiveMultiJoin || - !(joinRel instanceof SemiJoin)) { + if (joinRel instanceof HiveMultiJoin || + !((joinRel instanceof Join) && ((Join) joinRel).isSemiJoin())) { int shift = inputs[0].inputs.get(0).getSchema().getSignature().size(); for (int i = 1; i < inputs.length; i++) { newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift)); @@ -904,7 +897,7 @@ private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressio noOuterJoin = !hmj.isOuterJoin(); } else { joinCondns = new JoinCondDesc[1]; - semiJoin = join instanceof SemiJoin; + semiJoin = (join instanceof Join) && ((Join) join).isSemiJoin(); JoinType joinType; if (semiJoin) { joinType = JoinType.LEFTSEMI; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 9dd6954f35..58e161f013 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -2726,8 +2726,7 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r ImmutableList.of(remainingEquiCond, nonEquiConds), false) : nonEquiConds; topRel = HiveSemiJoin.getSemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - inputRels[0], inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys), - ImmutableIntList.copyOf(rightKeys)); + inputRels[0], inputRels[1], calciteJoinCond); // Create join RR: we need to check whether we need to update left RR in case // previous call to projectNonColumnEquiConditions updated it