diff --git a/pom.xml b/pom.xml index 351df449d6..480369b06a 100644 --- a/pom.xml +++ b/pom.xml @@ -127,7 +127,7 @@ 1.12.0 1.8.2 0.8.0.RELEASE - 1.19.0 + 1.21.0 4.2.4 4.1.17 4.1.19 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index 653a3c1170..c1ab64c90f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdPredicates; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMaxRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRuntimeRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity; @@ -83,6 +84,7 @@ new HiveRelMdCost(HiveDefaultCostModel.getCostModel()).getMetadataProvider(), HiveRelMdSelectivity.SOURCE, HiveRelMdRuntimeRowCount.SOURCE, + HiveRelMdMaxRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, HiveRelMdSize.SOURCE, @@ -154,6 +156,7 @@ private RelMetadataProvider init(HiveConf hiveConf) { new HiveRelMdCost(HiveOnTezCostModel.getCostModel(hiveConf)).getMetadataProvider(), HiveRelMdSelectivity.SOURCE, HiveRelMdRowCount.SOURCE, + HiveRelMdMaxRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, HiveRelMdSize.SOURCE, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java index f50779d8ef..c50cef69e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java @@ -92,6 +92,7 @@ public static RelBuilderFactory proto(Object... factories) { @Override public RelBuilder filter(Iterable predicates) { + System.out.println("SJC: IN RELBUILDER FILTER"); final RexNode x = RexUtil.composeConjunction( cluster.getRexBuilder(), predicates, false); if (!x.isAlwaysTrue()) { @@ -112,10 +113,26 @@ public RelBuilder filter(Iterable predicates) { */ @Override public RelBuilder empty() { + System.out.println("SJC: IN EMPTY"); final RelNode input = build(); - final RelNode sort = HiveRelFactories.HIVE_SORT_FACTORY.createSort( - input, RelCollations.of(), null, literal(0)); - return this.push(sort); + final RelNode filter = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(input, literal(false)); + return this.push(filter); + } + + @Override + public RelBuilder sort(int... fields) { + System.out.println("SJC: IN SORT(1)"); + return super.sort(fields); + } + @Override + public RelBuilder sort(RexNode... nodes) { + System.out.println("SJC: IN SORT(2)"); + return super.sort(nodes); + } + @Override + public RelBuilder sort(Iterable nodes) { + System.out.println("SJC: IN SORT(3)"); + return super.sort(nodes); } public static SqlFunction getFloorSqlFunction(TimeUnitRange flag) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index d96b1dc022..879e77cd69 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -36,7 +36,6 @@ import org.apache.calcite.rel.core.RelFactories.FilterFactory; import org.apache.calcite.rel.core.RelFactories.JoinFactory; import org.apache.calcite.rel.core.RelFactories.ProjectFactory; -import org.apache.calcite.rel.core.RelFactories.SemiJoinFactory; import org.apache.calcite.rel.core.RelFactories.SetOpFactory; import org.apache.calcite.rel.core.RelFactories.SortFactory; import org.apache.calcite.rel.type.RelDataType; @@ -72,9 +71,6 @@ public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl(); - public static final SemiJoinFactory HIVE_SEMI_JOIN_FACTORY = - new HiveSemiJoinFactoryImpl(); - public static final SortFactory HIVE_SORT_FACTORY = new HiveSortFactoryImpl(); @@ -89,7 +85,6 @@ Contexts.of(HIVE_PROJECT_FACTORY, HIVE_FILTER_FACTORY, HIVE_JOIN_FACTORY, - HIVE_SEMI_JOIN_FACTORY, HIVE_SORT_FACTORY, HIVE_AGGREGATE_FACTORY, HIVE_SET_OP_FACTORY)); @@ -123,8 +118,9 @@ public RelNode createProject(RelNode child, * . */ private static class HiveFilterFactoryImpl implements FilterFactory { + // XXX: SJC variablesSet is ignored? @Override - public RelNode createFilter(RelNode child, RexNode condition) { + public RelNode createFilter(RelNode child, RexNode condition, Set variablesSet) { RelOptCluster cluster = child.getCluster(); HiveFilter filter = new HiveFilter(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition); return filter; @@ -161,26 +157,15 @@ public RelNode createJoin(RelNode left, RelNode right, RexNode condition, Set variablesSet, JoinRelType joinType, boolean semiJoinDone) { // According to calcite, it is going to be removed before Calcite-2.0 // TODO: to handle CorrelationId + if (joinType == JoinRelType.SEMI) { + final JoinInfo joinInfo = JoinInfo.of(left, right, condition); + final RelOptCluster cluster = left.getCluster(); + return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition); + } return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType); } } - /** - * Implementation of {@link SemiJoinFactory} that returns - * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin} - * . - */ - private static class HiveSemiJoinFactoryImpl implements SemiJoinFactory { - @Override - public RelNode createSemiJoin(RelNode left, RelNode right, - RexNode condition) { - final JoinInfo joinInfo = JoinInfo.of(left, right, condition); - final RelOptCluster cluster = left.getCluster(); - return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition, - joinInfo.leftKeys, joinInfo.rightKeys); - } - } - private static class HiveSortFactoryImpl implements SortFactory { @Override public RelNode createSort(RelTraitSet traits, RelNode input, RelCollation collation, @@ -197,15 +182,11 @@ public RelNode createSort(RelNode input, RelCollation collation, RexNode offset, private static class HiveAggregateFactoryImpl implements AggregateFactory { @Override - public RelNode createAggregate(RelNode child, boolean indicator, + public RelNode createAggregate(RelNode child, ImmutableBitSet groupSet, ImmutableList groupSets, List aggCalls) { - if (indicator) { - throw new IllegalStateException("Hive does not support indicator columns but Calcite " - + "created an Aggregate operator containing them"); - } - return new HiveAggregate(child.getCluster(), child.getTraitSet(), child, - groupSet, groupSets, aggCalls); + return new HiveAggregate(child.getCluster(), child.getTraitSet(), child, + groupSet, groupSets, aggCalls); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java index 110136ddcd..25ee38fbd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java @@ -108,7 +108,7 @@ public RelNode visit(HiveFilter filter) { @Override public RelNode visit(HiveJoin join) { - if (join.getJoinType() != JoinRelType.INNER) { + if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) { setAutomaticRewritingInvalidReason(join.getJoinType() + " join type is not supported by rewriting algorithm."); } checkExpr(join.getCondition()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index b8380d63cd..5c21a37328 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -547,7 +547,7 @@ public static PKFKJoinInfo extractPKFKJoin( final PKFKJoinInfo cannotExtract = PKFKJoinInfo.of(false, null, null); - if (joinType != JoinRelType.INNER) { + if (joinType != JoinRelType.INNER && join.isSemiJoin()) { // If it is not an inner, we transform it as the metadata // providers for expressions do not pull information through // outer join (as it would not be correct) @@ -741,7 +741,7 @@ public static RewritablePKFKJoinInfo isRewritablePKFKJoin(Join join, final RelNode nonFkInput = leftInputPotentialFK ? join.getRight() : join.getLeft(); final RewritablePKFKJoinInfo nonRewritable = RewritablePKFKJoinInfo.of(false, null); - if (joinType != JoinRelType.INNER) { + if (joinType != JoinRelType.INNER && !join.isSemiJoin()) { // If it is not an inner, we transform it as the metadata // providers for expressions do not pull information through // outer join (as it would not be correct) @@ -848,7 +848,7 @@ public static RewritablePKFKJoinInfo isRewritablePKFKJoin(Join join, if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) && ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) { if (foreignKeyColumnType.isNullable()) { - if (joinType == JoinRelType.INNER) { + if (joinType == JoinRelType.INNER || join.isSemiJoin()) { // If it is nullable and it is an INNER, we just need a IS NOT NULL filter RexNode originalCondOp = refToRex.get(foreignKeyColumnRef); assert originalCondOp != null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java index 05d1dc6cf2..a7f51f499d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java @@ -50,7 +50,6 @@ import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.server.CalciteServerStatement; -import org.apache.calcite.sql.SemiJoinType; import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; @@ -133,7 +132,6 @@ public String apply(RexNode input) { private final RelFactories.SortFactory sortFactory; private final RelFactories.SetOpFactory setOpFactory; private final RelFactories.JoinFactory joinFactory; - private final RelFactories.SemiJoinFactory semiJoinFactory; private final RelFactories.CorrelateFactory correlateFactory; private final RelFactories.ValuesFactory valuesFactory; private final RelFactories.TableScanFactory scanFactory; @@ -164,9 +162,6 @@ public HiveSubQRemoveRelBuilder(Context context, RelOptCluster cluster, this.joinFactory = Util.first(context.unwrap(RelFactories.JoinFactory.class), HiveRelFactories.HIVE_JOIN_FACTORY); - this.semiJoinFactory = - Util.first(context.unwrap(RelFactories.SemiJoinFactory.class), - HiveRelFactories.HIVE_SEMI_JOIN_FACTORY); this.correlateFactory = Util.first(context.unwrap(RelFactories.CorrelateFactory.class), RelFactories.DEFAULT_CORRELATE_FACTORY); @@ -1141,11 +1136,10 @@ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, } if(createSemiJoin) { join = correlateFactory.createCorrelate(left.rel, right.rel, id, - requiredColumns, SemiJoinType.SEMI); + requiredColumns, JoinRelType.SEMI); } else { join = correlateFactory.createCorrelate(left.rel, right.rel, id, - requiredColumns, SemiJoinType.of(joinType)); - + requiredColumns, joinType); } } else { join = joinFactory.createJoin(left.rel, right.rel, condition, @@ -1186,21 +1180,6 @@ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, String... fieldNames) return join(joinType, conditions); } - /** Creates a {@link org.apache.calcite.rel.core.SemiJoin}. */ - public HiveSubQRemoveRelBuilder semiJoin(Iterable conditions) { - final Frame right = stack.pop(); - final Frame left = stack.pop(); - final RelNode semiJoin = - semiJoinFactory.createSemiJoin(left.rel, right.rel, and(conditions)); - stack.push(new Frame(semiJoin, left.right)); - return this; - } - - /** Creates a {@link org.apache.calcite.rel.core.SemiJoin}. */ - public HiveSubQRemoveRelBuilder semiJoin(RexNode... conditions) { - return semiJoin(ImmutableList.copyOf(conditions)); - } - /** Assigns a table alias to the top entry on the stack. */ public HiveSubQRemoveRelBuilder as(String alias) { final Frame pair = stack.pop(); @@ -1297,9 +1276,8 @@ private boolean allNull(Object[] values, int column, int columnCount) { */ public HiveSubQRemoveRelBuilder empty() { final RelNode input = build(); - final RelNode sort = HiveRelFactories.HIVE_SORT_FACTORY.createSort( - input, RelCollations.of(), null, literal(0)); - return this.push(sort); + final RelNode filter = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(input, literal(false)); + return this.push(filter); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java index 50466e001b..6b841a5a29 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java @@ -53,14 +53,9 @@ public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, @Override public Aggregate copy(RelTraitSet traitSet, RelNode input, - boolean indicator, ImmutableBitSet groupSet, + ImmutableBitSet groupSet, List groupSets, List aggCalls) { - if (indicator) { - throw new IllegalStateException("Hive does not support indicator columns but tried " - + "to create an Aggregate operator containing them"); - } - return new HiveAggregate(getCluster(), traitSet, input, - groupSet, groupSets, aggCalls); + return new HiveAggregate(getCluster(), traitSet, input, groupSet, groupSets, aggCalls); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java index 7a8cf0aa32..705644700c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java @@ -225,7 +225,7 @@ public JoinPredicateInfo getJoinPredicateInfo() { private boolean containsOuter() { for (JoinRelType joinType : joinTypes) { - if (joinType != JoinRelType.INNER) { + if (joinType != JoinRelType.INNER && joinType != JoinRelType.SEMI) { return true; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java index d70ead428d..23ff5e9c0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java @@ -24,9 +24,9 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.InvalidRelException; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.ImmutableIntList; @@ -35,8 +35,9 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; -public class HiveSemiJoin extends SemiJoin implements HiveRelNode { +public class HiveSemiJoin extends Join implements HiveRelNode { private final RexNode joinFilter; @@ -46,12 +47,9 @@ public static HiveSemiJoin getSemiJoin( RelTraitSet traitSet, RelNode left, RelNode right, - RexNode condition, - ImmutableIntList leftKeys, - ImmutableIntList rightKeys) { + RexNode condition) { try { - HiveSemiJoin semiJoin = new HiveSemiJoin(cluster, traitSet, left, right, - condition, leftKeys, rightKeys); + HiveSemiJoin semiJoin = new HiveSemiJoin(cluster, traitSet, left, right, condition); return semiJoin; } catch (InvalidRelException | CalciteSemanticException e) { throw new RuntimeException(e); @@ -62,10 +60,9 @@ protected HiveSemiJoin(RelOptCluster cluster, RelTraitSet traitSet, RelNode left, RelNode right, - RexNode condition, - ImmutableIntList leftKeys, - ImmutableIntList rightKeys) throws InvalidRelException, CalciteSemanticException { - super(cluster, traitSet, left, right, condition, leftKeys, rightKeys); + RexNode condition) throws InvalidRelException, CalciteSemanticException { +// XXX: SJC: make sure last param of vsriablesSet is ok + super(cluster, traitSet, left, right, condition, JoinRelType.SEMI, Sets.newHashSet()); final List systemFieldList = ImmutableList.of(); List> joinKeyExprs = new ArrayList>(); List filterNulls = new ArrayList(); @@ -81,12 +78,11 @@ public RexNode getJoinFilter() { } @Override - public SemiJoin copy(RelTraitSet traitSet, RexNode condition, + public HiveSemiJoin copy(RelTraitSet traitSet, RexNode condition, RelNode left, RelNode right, JoinRelType joinType, boolean semiJoinDone) { try { final JoinInfo joinInfo = JoinInfo.of(left, right, condition); - HiveSemiJoin semijoin = new HiveSemiJoin(getCluster(), traitSet, left, right, condition, - joinInfo.leftKeys, joinInfo.rightKeys); + HiveSemiJoin semijoin = new HiveSemiJoin(getCluster(), traitSet, left, right, condition); // If available, copy state to registry for optimization rules HiveRulesRegistry registry = semijoin.getCluster().getPlanner().getContext().unwrap(HiveRulesRegistry.class); if (registry != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index ed6659c6cc..b7f4ae8504 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -100,7 +100,7 @@ public void onMatch(RelOptRuleCall call) { // If it is not an inner join, we do not push the // aggregate operator - if (join.getJoinType() != JoinRelType.INNER) { + if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java index 4e66de3812..b2ff255d7c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java @@ -91,8 +91,8 @@ public void onMatch(RelOptRuleCall call) { Set leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0)); Set rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1)); - boolean genPredOnLeft = join.getJoinType() == JoinRelType.RIGHT || join.getJoinType() == JoinRelType.INNER; - boolean genPredOnRight = join.getJoinType() == JoinRelType.LEFT || join.getJoinType() == JoinRelType.INNER; + boolean genPredOnLeft = join.getJoinType() == JoinRelType.RIGHT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin(); + boolean genPredOnRight = join.getJoinType() == JoinRelType.LEFT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin(); RexNode newLeftPredicate = getNewPredicate(join, registry, joinPredInfo, leftPushedPredicates, genPredOnLeft, 0); RexNode newRightPredicate = getNewPredicate(join, registry, joinPredInfo, rightPushedPredicates, genPredOnRight, 1); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java index c735df81eb..a657d1313f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java @@ -117,7 +117,7 @@ public void onMatch(RelOptRuleCall call) { // These boolean values represent corresponding left, right input which is potential FK boolean leftInputPotentialFK = topRefs.intersects(leftBits); boolean rightInputPotentialFK = topRefs.intersects(rightBits); - if (leftInputPotentialFK && rightInputPotentialFK && joinType == JoinRelType.INNER) { + if (leftInputPotentialFK && rightInputPotentialFK && (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) { // Both inputs are referenced. Before making a decision, try to swap // references in join condition if it is an inner join, i.e. if a join // condition column is referenced above the join, then we can just @@ -198,6 +198,7 @@ public void onMatch(RelOptRuleCall call) { final Mode mode; switch (joinType) { + case SEMI: case INNER: if (leftInputPotentialFK && rightInputPotentialFK) { // Bails out as it references columns from both sides (or no columns) @@ -283,4 +284,4 @@ public void onMatch(RelOptRuleCall call) { // Transforms LEFT/RIGHT outer join into INNER join TRANSFORM } -} \ No newline at end of file +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java index ea5b06c0d4..19f8f78175 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java @@ -215,12 +215,12 @@ private static RelNode mergeJoin(HiveJoin join, RelNode left, RelNode right) { keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs)); ImmutableBitSet rightReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs, newInputs.size())); - if (join.getJoinType() != JoinRelType.INNER && + if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin() && (leftReferencedInputs.cardinality() > 1 || rightReferencedInputs.cardinality() > 1)) { return null; } // Otherwise, we add to the join specs - if (join.getJoinType() != JoinRelType.INNER) { + if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) { int leftInput = keysInInputs.nextSetBit(0); int rightInput = keysInInputs.nextSetBit(numberLeftInputs); joinInputs.add(Pair.of(leftInput, rightInput)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java index 38759c0525..545255cf7c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java @@ -24,7 +24,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.rules.PushProjector; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; @@ -77,7 +76,7 @@ public void onMatch(RelOptRuleCall call) { Project origProj = call.rel(0); final Join join = call.rel(1); - if (join instanceof SemiJoin) { + if (join.isSemiJoin()) { return; // TODO: support SemiJoin } // locate all fields referenced in the projection and join condition; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 86b79140c6..a89c05b203 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -76,7 +76,6 @@ import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; -import org.apache.calcite.sql.SemiJoinType; import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; @@ -503,9 +502,12 @@ public Frame decorrelateRel(Values rel) { * @param rel Aggregate to rewrite */ public Frame decorrelateRel(Aggregate rel) throws SemanticException{ + //XXX: SJC FIX THIS +/* if (rel.getGroupType() != Aggregate.Group.SIMPLE) { throw new AssertionError(Bug.CALCITE_461_FIXED); } +*/ // // Rewrite logic: // @@ -688,9 +690,12 @@ private static RexLiteral projectedLiteral(RelNode rel, int i) { } public Frame decorrelateRel(HiveAggregate rel) throws SemanticException{ +//XXX: SJC FIX THIS +/* if (rel.getGroupType() != Aggregate.Group.SIMPLE) { throw new AssertionError(Bug.CALCITE_461_FIXED); } +*/ // // Rewrite logic: // @@ -1248,7 +1253,7 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException { } if(oldInput instanceof LogicalCorrelate - && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI + && ((LogicalCorrelate) oldInput).getJoinType() == JoinRelType.SEMI && !cm.mapRefRelToCorRef.containsKey(rel)) { // this conditions need to be pushed into semi-join since this condition // corresponds to IN @@ -1261,7 +1266,7 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException { RexUtil.composeConjunction(rexBuilder, conditions, false); RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), - join.getLeft(), join.getRight(), condition, join.getLeftKeys(), join.getRightKeys()); + join.getLeft(), join.getRight(), condition); return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs); } @@ -1320,7 +1325,7 @@ public Frame decorrelateRel(Filter rel) { } if(oldInput instanceof LogicalCorrelate - && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI + && ((LogicalCorrelate) oldInput).getJoinType() == JoinRelType.SEMI && !cm.mapRefRelToCorRef.containsKey(rel)) { // this conditions need to be pushed into semi-join since this condition // corresponds to IN @@ -1332,7 +1337,7 @@ public Frame decorrelateRel(Filter rel) { final RexNode condition = RexUtil.composeConjunction(rexBuilder, conditions, false); RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), - join.getLeft(), join.getRight(), condition, join.getLeftKeys(), join.getRightKeys()); + join.getLeft(), join.getRight(), condition); return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs); } @@ -1455,14 +1460,13 @@ public Frame decorrelateRel(LogicalCorrelate rel) { RelNode newJoin = null; // this indicates original query was either correlated EXISTS or IN - if(rel.getJoinType() == SemiJoinType.SEMI) { + if(rel.getJoinType() == JoinRelType.SEMI) { final List leftKeys = new ArrayList(); final List rightKeys = new ArrayList(); RelNode[] inputRels = new RelNode[] {leftFrame.r, rightFrame.r}; newJoin = HiveSemiJoin.getSemiJoin(rel.getCluster(), - rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r, - condition, ImmutableIntList.copyOf(leftKeys), ImmutableIntList.copyOf(rightKeys)); + rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r, condition); } else { // Right input positions are shifted by newLeftFieldCount. @@ -1473,7 +1477,7 @@ public Frame decorrelateRel(LogicalCorrelate rel) { } newJoin = relBuilder.push(leftFrame.r).push(rightFrame.r) - .join(rel.getJoinType().toJoinType(), condition).build(); + .join(rel.getJoinType(), condition).build(); } valueGen.pop(); @@ -1720,7 +1724,7 @@ private RelNode aggregateCorrelatorOutput( Project project, Set isCount) { final RelNode left = correlate.getLeft(); - final JoinRelType joinType = correlate.getJoinType().toJoinType(); + final JoinRelType joinType = correlate.getJoinType(); // now create the new project final List> newProjects = Lists.newArrayList(); @@ -2258,10 +2262,10 @@ public void onMatch(RelOptRuleCall call) { // Aggregate (groupby (0) single_value()) // Project-A (may reference coVar) // RightInputRel - if(correlate.getJoinType() != SemiJoinType.LEFT) { + if(correlate.getJoinType() != JoinRelType.LEFT) { return; } - final JoinRelType joinType = correlate.getJoinType().toJoinType(); + final JoinRelType joinType = correlate.getJoinType(); // corRel.getCondition was here, however Correlate was updated so it // never includes a join condition. The code was not modified for brevity. @@ -2470,11 +2474,11 @@ public void onMatch(RelOptRuleCall call) { return; } - if(correlate.getJoinType() != SemiJoinType.LEFT) { + if(correlate.getJoinType() != JoinRelType.LEFT) { return; } - final JoinRelType joinType = correlate.getJoinType().toJoinType(); + final JoinRelType joinType = correlate.getJoinType(); // corRel.getCondition was here, however Correlate was updated so it // never includes a join condition. The code was not modified for brevity. RexNode joinCond = rexBuilder.makeLiteral(true); @@ -2877,11 +2881,11 @@ private void onMatch2( return; } - if(correlate.getJoinType() != SemiJoinType.LEFT) { + if(correlate.getJoinType() != JoinRelType.LEFT) { return; } - JoinRelType joinType = correlate.getJoinType().toJoinType(); + JoinRelType joinType = correlate.getJoinType(); // corRel.getCondition was here, however Correlate was updated so it // never includes a join condition. The code was not modified for brevity. RexNode joinCond = rexBuilder.makeLiteral(true); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java index 4992e702f9..82704a2ebd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java @@ -51,9 +51,8 @@ public HiveRemoveGBYSemiJoinRule() { @Override public void onMatch(RelOptRuleCall call) { final HiveSemiJoin semijoin= call.rel(0); - if(semijoin.getJoinType() != JoinRelType.INNER) { - return; - } + assert semijoin.getJoinType() == JoinRelType.SEMI; + final RelNode left = call.rel(1); final Aggregate rightAggregate= call.rel(2); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java index e63f163b24..7842b12b52 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java @@ -118,7 +118,7 @@ protected void perform(final RelOptRuleCall call, final ImmutableBitSet topRefs, call.transformTo(topOperator.copy(topOperator.getTraitSet(), ImmutableList.of(left))); return; } - if (join.getJoinType() != JoinRelType.INNER) { + if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/HiveJdbcImplementor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/HiveJdbcImplementor.java index db955b9a9d..eefd2bd2c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/HiveJdbcImplementor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/HiveJdbcImplementor.java @@ -109,7 +109,7 @@ public HiveJdbcImplementor(SqlDialect dialect, JavaTypeFactory typeFactory) { SqlNode sqlCondition = null; SqlLiteral condType = JoinConditionType.ON.symbol(POS); JoinType joinType = joinType(e.getJoinType()); - if (e.getJoinType() == JoinRelType.INNER && e.getCondition().isAlwaysTrue()) { + if ((e.getJoinType() == JoinRelType.INNER || e.isSemiJoin()) && e.getCondition().isAlwaysTrue()) { joinType = JoinType.COMMA; condType = JoinConditionType.NONE.symbol(POS); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java index b2b2f3c2d8..18ed1aea3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java @@ -25,7 +25,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdDistinctRowCount; @@ -41,6 +40,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.plan.ColStatistics; @@ -85,16 +85,9 @@ public static Double getDistinctRowCount(RelNode r, RelMetadataQuery mq, int ind @Override public Double getDistinctRowCount(Join rel, RelMetadataQuery mq, ImmutableBitSet groupKey, RexNode predicate) { - if (rel instanceof HiveJoin) { - HiveJoin hjRel = (HiveJoin) rel; - //TODO: Improve this - if (rel instanceof SemiJoin) { - return mq.getDistinctRowCount(hjRel.getLeft(), groupKey, - rel.getCluster().getRexBuilder().makeLiteral(true)); - } else { - return getJoinDistinctRowCount(mq, rel, rel.getJoinType(), - groupKey, predicate, true); - } + if (rel instanceof HiveJoin || rel instanceof HiveSemiJoin) { + return getJoinDistinctRowCount(mq, rel, rel.getJoinType(), + groupKey, predicate, true); } return mq.getDistinctRowCount(rel, groupKey, predicate); @@ -144,7 +137,7 @@ public static Double getJoinDistinctRowCount(RelMetadataQuery mq, joinRel, predList, joinType, - joinType == JoinRelType.INNER, + (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI), !joinType.generatesNullsOnLeft(), !joinType.generatesNullsOnRight(), joinFilters, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java new file mode 100644 index 0000000000..a85bb2fe4b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdDistinctRowCount; +import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdMaxRowCount; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.NumberUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +import com.google.common.collect.ImmutableList; + +public class HiveRelMdMaxRowCount extends RelMdMaxRowCount { + + private static final HiveRelMdMaxRowCount INSTANCE = + new HiveRelMdMaxRowCount(); + + public static final RelMetadataProvider SOURCE = + ChainedRelMetadataProvider.of( + ImmutableList.of( + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.MAX_ROW_COUNT.method, new HiveRelMdMaxRowCount()), + RelMdMaxRowCount.SOURCE)); + + private HiveRelMdMaxRowCount() { + super(); + } + + @Override + public Double getMaxRowCount(Aggregate rel, RelMetadataQuery mq) { + System.out.println("SJC: MIRACLE! IT HIT OUR CODE!"); + if (rel.getGroupSet().isEmpty()) { + // Aggregate with no GROUP BY always returns 1 row (even on empty table). + return 1D; + } + + final Double rowCount = mq.getMaxRowCount(rel.getInput()); + if (rowCount == null) { + return null; + } + return rowCount * rel.getGroupSets().size(); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index a137bdf94f..4b01c776e5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -39,7 +39,6 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.core.Union; import org.apache.calcite.rel.metadata.BuiltInMetadata; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; @@ -316,7 +315,7 @@ public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) { public JoinConditionBasedPredicateInference(Join joinRel, RexNode lPreds, RexNode rPreds) { - this(joinRel, joinRel instanceof SemiJoin, lPreds, rPreds); + this(joinRel, (joinRel instanceof Join) && ((Join) joinRel).isSemiJoin(), lPreds, rPreds); } private JoinConditionBasedPredicateInference(Join joinRel, boolean isSemiJoin, @@ -416,6 +415,7 @@ public RelOptPredicateList inferPredicates( switch (joinType) { case INNER: case LEFT: + case SEMI: infer(leftPreds, allExprsDigests, inferredPredicates, nonFieldsPredicates, includeEqualityInference, joinType == JoinRelType.LEFT ? rightFieldsBitSet @@ -425,6 +425,7 @@ public RelOptPredicateList inferPredicates( switch (joinType) { case INNER: case RIGHT: + case SEMI: infer(rightPreds, allExprsDigests, inferredPredicates, nonFieldsPredicates, includeEqualityInference, joinType == JoinRelType.RIGHT ? leftFieldsBitSet @@ -453,7 +454,7 @@ public RelOptPredicateList inferPredicates( } } - if (joinType == JoinRelType.INNER && !nonFieldsPredicates.isEmpty()) { + if ((joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI) && !nonFieldsPredicates.isEmpty()) { // Predicates without field references can be pushed to both inputs final Set leftPredsSet = new HashSet( Lists.transform(leftPreds, HiveCalciteUtil.REX_STR_FN)); @@ -471,15 +472,13 @@ public RelOptPredicateList inferPredicates( switch (joinType) { case INNER: - Iterable pulledUpPredicates; - if (isSemiJoin) { - pulledUpPredicates = Iterables.concat(leftPreds, leftInferredPredicates); - } else { - pulledUpPredicates = Iterables.concat(leftPreds, rightPreds, + Iterable pulledUpPredicates = Iterables.concat(leftPreds, rightPreds, RelOptUtil.conjunctions(joinRel.getCondition()), inferredPredicates); - } return RelOptPredicateList.of(rexBuilder, pulledUpPredicates, leftInferredPredicates, rightInferredPredicates); + case SEMI: + return RelOptPredicateList.of(rexBuilder, Iterables.concat(leftPreds, leftInferredPredicates), + leftInferredPredicates, rightInferredPredicates); case LEFT: return RelOptPredicateList.of(rexBuilder, leftPreds, EMPTY_LIST, rightInferredPredicates); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index d881fbd952..86d5fb9d8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -30,7 +30,6 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; @@ -75,6 +74,9 @@ protected HiveRelMdRowCount() { @Override public Double getRowCount(Join join, RelMetadataQuery mq) { + if (join.isSemiJoin()) { + return getRowCountSemi(join, mq); + } // Try to infer from constraints first final Pair constraintBasedResult = constraintsBasedAnalyzeJoinForPKFK(join, mq); @@ -117,8 +119,7 @@ public Double getRowCount(Join join, RelMetadataQuery mq) { return rowCount; } - @Override - public Double getRowCount(SemiJoin rel, RelMetadataQuery mq) { + public Double getRowCountSemi(Join rel, RelMetadataQuery mq) { PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, mq); if (pkfk != null) { double selectivity = pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor; @@ -249,7 +250,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery // @todo: remove this. 8/28/14 hb // for now adding because RelOptUtil.classifyFilters has an assertion about // column counts that is not true for semiJoins. - if (joinRel instanceof SemiJoin) { + if (joinRel.isSemiJoin()) { return null; } @@ -278,9 +279,9 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery * If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return * null. */ - boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.getJoinType() == JoinRelType.RIGHT) + boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.isSemiJoin() || joinRel.getJoinType() == JoinRelType.RIGHT) && isKey(lBitSet, left, mq); - boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.getJoinType() == JoinRelType.LEFT) + boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.isSemiJoin() || joinRel.getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right, mq); if (!leftIsKey && !rightIsKey) { @@ -356,7 +357,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery */ public static Pair constraintsBasedAnalyzeJoinForPKFK(Join join, RelMetadataQuery mq) { - if (join instanceof SemiJoin) { + if (join.isSemiJoin()) { // TODO: Support semijoin return null; } @@ -391,9 +392,9 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery return null; } - boolean leftIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.RIGHT) + boolean leftIsKey = (join.getJoinType() == JoinRelType.INNER || join.isSemiJoin() || join.getJoinType() == JoinRelType.RIGHT) && leftInputResult.isPkFkJoin; - boolean rightIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.LEFT) + boolean rightIsKey = (join.getJoinType() == JoinRelType.INNER || join.isSemiJoin() || join.getJoinType() == JoinRelType.LEFT) && rightInputResult.isPkFkJoin; if (!leftIsKey && !rightIsKey) { // Nothing to do here, bail out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index f6a6cf4f03..1724ab1d21 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -25,7 +25,6 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdSelectivity; import org.apache.calcite.rel.metadata.RelMdUtil; @@ -65,7 +64,7 @@ public Double getSelectivity(HiveTableScan t, RelMetadataQuery mq, RexNode predi } public Double getSelectivity(Join j, RelMetadataQuery mq, RexNode predicate) { - if (j.getJoinType().equals(JoinRelType.INNER)) { + if (j.getJoinType().equals(JoinRelType.INNER) || j.isSemiJoin()) { return computeInnerJoinSelectivity(j, mq, predicate); } else if (j.getJoinType().equals(JoinRelType.LEFT) || j.getJoinType().equals(JoinRelType.RIGHT)) { @@ -143,7 +142,7 @@ private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode ndvEstimate = exponentialBackoff(peLst, colStatMap); } - if (j instanceof SemiJoin) { + if (j.isSemiJoin()) { ndvEstimate = Math.min(mq.getRowCount(j.getLeft()), ndvEstimate); } else if (j instanceof HiveJoin) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java index 893cb9975c..fd3553d5f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java @@ -20,7 +20,6 @@ import java.util.List; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdSize; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -31,6 +30,7 @@ import org.apache.calcite.util.ImmutableNullableList; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.plan.ColStatistics; @@ -78,8 +78,7 @@ private HiveRelMdSize() {} return list.build(); } - @Override - public List averageColumnSizes(SemiJoin rel, RelMetadataQuery mq) { + public List averageColumnSizes(HiveSemiJoin rel, RelMetadataQuery mq) { final RelNode left = rel.getLeft(); final List lefts = mq.getAverageColumnSizes(left); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index c4c771e490..adcab91504 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -155,11 +155,13 @@ public static ASTNode table(final RelNode scan) { return b.node(); } + //XXX: SJC is semiJoin easy to factor out? public static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond, boolean semiJoin) { ASTBuilder b = null; switch (joinType) { + case SEMI: case INNER: if (semiJoin) { b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 6c4edeb905..213ed7b8ee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -36,7 +36,6 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableFunctionScan; import org.apache.calcite.rel.core.TableScan; @@ -378,7 +377,7 @@ private QueryBlockInfo convertSource(RelNode r) throws CalciteSemanticException QueryBlockInfo right = convertSource(join.getRight()); s = new Schema(left.schema, right.schema); ASTNode cond = join.getCondition().accept(new RexVisitor(s, false, r.getCluster().getRexBuilder())); - boolean semiJoin = join instanceof SemiJoin; + boolean semiJoin = join.isSemiJoin(); if (join.getRight() instanceof Join && !semiJoin) { // should not be done for semijoin since it will change the semantics // Invert join inputs; this is done because otherwise the SemanticAnalyzer diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index 165cb9efcf..509a070b84 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -37,7 +37,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; @@ -175,8 +174,8 @@ OpAttr dispatch(RelNode rn) throws SemanticException { return visit((HiveMultiJoin) rn); } else if (rn instanceof HiveJoin) { return visit((HiveJoin) rn); - } else if (rn instanceof SemiJoin) { - return visit((SemiJoin)rn); + } else if (rn instanceof HiveSemiJoin) { + return visit((HiveSemiJoin) rn); } else if (rn instanceof HiveFilter) { return visit((HiveFilter) rn); } else if (rn instanceof HiveSortLimit) { @@ -332,8 +331,7 @@ OpAttr visit(HiveJoin joinRel) throws SemanticException { return translateJoin(joinRel); } - - OpAttr visit(SemiJoin joinRel) throws SemanticException { + OpAttr visit(HiveSemiJoin joinRel) throws SemanticException { return translateJoin(joinRel); } @@ -365,8 +363,8 @@ private OpAttr translateJoin(RelNode joinRel) throws SemanticException { // 3. Virtual columns Set newVcolsInCalcite = new HashSet(); newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite); - if (joinRel instanceof HiveMultiJoin || - !(joinRel instanceof SemiJoin)) { + if (joinRel instanceof HiveMultiJoin || + !((joinRel instanceof Join) && ((Join) joinRel).isSemiJoin())) { int shift = inputs[0].inputs.get(0).getSchema().getSignature().size(); for (int i = 1; i < inputs.length; i++) { newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift)); @@ -904,7 +902,7 @@ private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressio noOuterJoin = !hmj.isOuterJoin(); } else { joinCondns = new JoinCondDesc[1]; - semiJoin = join instanceof SemiJoin; + semiJoin = (join instanceof Join) && ((Join) join).isSemiJoin(); JoinType joinType; if (semiJoin) { joinType = JoinType.LEFTSEMI; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 60cd71583f..9334a8ee19 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -2697,7 +2697,7 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r calciteJoinType = JoinRelType.FULL; break; case LEFTSEMI: - calciteJoinType = JoinRelType.INNER; + calciteJoinType = JoinRelType.SEMI; leftSemiJoin = true; break; case INNER: @@ -2731,8 +2731,7 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r ImmutableList.of(remainingEquiCond, nonEquiConds), false) : nonEquiConds; topRel = HiveSemiJoin.getSemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - inputRels[0], inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys), - ImmutableIntList.copyOf(rightKeys)); + inputRels[0], inputRels[1], calciteJoinCond); // Create join RR: we need to check whether we need to update left RR in case // previous call to projectNonColumnEquiConditions updated it diff --git a/ql/src/test/results/clientpositive/llap/semijoin.q.out b/ql/src/test/results/clientpositive/llap/semijoin.q.out index 99ed8de40b..6a0cd4c6ae 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin.q.out @@ -3277,7 +3277,7 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### CBO PLAN: HiveProject(p_partkey=[$0]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(p_partkey=[$0], p_name=[$1]) HiveFilter(condition=[IS NOT NULL($1)]) HiveTableScan(table=[[default, part]], table:alias=[pp]) @@ -3339,7 +3339,7 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]) - HiveSemiJoin(condition=[=($0, $1)], joinType=[inner]) + HiveSemiJoin(condition=[=($0, $1)], joinType=[semi]) HiveProject(p_partkey=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, part]], table:alias=[pp]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out index 6aaf3a0c8d..4254e66a07 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out @@ -121,7 +121,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(sales=[$0]) HiveUnion(all=[true]) HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) - HiveSemiJoin(condition=[=($2, $7)], joinType=[inner]) + HiveSemiJoin(condition=[=($2, $7)], joinType=[semi]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) HiveAggregate(group=[{1}]) @@ -176,7 +176,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) - HiveSemiJoin(condition=[=($3, $7)], joinType=[inner]) + HiveSemiJoin(condition=[=($3, $7)], joinType=[semi]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) HiveAggregate(group=[{1}]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out index 5ee3bfc644..474193229a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out @@ -164,7 +164,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) @@ -188,7 +188,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) @@ -212,7 +212,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out index dfa794d1b8..d196ce69c8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out @@ -121,7 +121,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(sales=[$0]) HiveUnion(all=[true]) HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) - HiveSemiJoin(condition=[=($3, $7)], joinType=[inner]) + HiveSemiJoin(condition=[=($3, $7)], joinType=[semi]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_customer_sk=[$0]) HiveAggregate(group=[{0}]) @@ -167,7 +167,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) - HiveSemiJoin(condition=[=($2, $7)], joinType=[inner]) + HiveSemiJoin(condition=[=($2, $7)], joinType=[semi]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_customer_sk=[$0]) HiveAggregate(group=[{0}]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out index 6c3404d979..fe05a6e300 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out @@ -160,7 +160,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) @@ -182,7 +182,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) @@ -204,7 +204,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/spark/semijoin.q.out b/ql/src/test/results/clientpositive/spark/semijoin.q.out index c42332d964..b6d6559c30 100644 --- a/ql/src/test/results/clientpositive/spark/semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -2955,7 +2955,7 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### CBO PLAN: HiveProject(p_partkey=[$0]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) HiveProject(p_partkey=[$0], p_name=[$1]) HiveFilter(condition=[IS NOT NULL($1)]) HiveTableScan(table=[[default, part]], table:alias=[pp]) @@ -3017,7 +3017,7 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]) - HiveSemiJoin(condition=[=($0, $1)], joinType=[inner]) + HiveSemiJoin(condition=[=($0, $1)], joinType=[semi]) HiveProject(p_partkey=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, part]], table:alias=[pp])