diff --git a/pom.xml b/pom.xml
index 351df449d6..480369b06a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -127,7 +127,7 @@
1.12.0
1.8.2
0.8.0.RELEASE
- 1.19.0
+ 1.21.0
4.2.4
4.1.17
4.1.19
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index 653a3c1170..c1ab64c90f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -61,6 +61,7 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdPredicates;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMaxRowCount;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRuntimeRowCount;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity;
@@ -83,6 +84,7 @@
new HiveRelMdCost(HiveDefaultCostModel.getCostModel()).getMetadataProvider(),
HiveRelMdSelectivity.SOURCE,
HiveRelMdRuntimeRowCount.SOURCE,
+ HiveRelMdMaxRowCount.SOURCE,
HiveRelMdUniqueKeys.SOURCE,
HiveRelMdColumnUniqueness.SOURCE,
HiveRelMdSize.SOURCE,
@@ -154,6 +156,7 @@ private RelMetadataProvider init(HiveConf hiveConf) {
new HiveRelMdCost(HiveOnTezCostModel.getCostModel(hiveConf)).getMetadataProvider(),
HiveRelMdSelectivity.SOURCE,
HiveRelMdRowCount.SOURCE,
+ HiveRelMdMaxRowCount.SOURCE,
HiveRelMdUniqueKeys.SOURCE,
HiveRelMdColumnUniqueness.SOURCE,
HiveRelMdSize.SOURCE,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
index f50779d8ef..79b47cd0e7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
@@ -112,10 +112,23 @@ public RelBuilder filter(Iterable extends RexNode> predicates) {
*/
@Override
public RelBuilder empty() {
+ System.out.println("SJC: IN EMPTY");
final RelNode input = build();
- final RelNode sort = HiveRelFactories.HIVE_SORT_FACTORY.createSort(
- input, RelCollations.of(), null, literal(0));
- return this.push(sort);
+ final RelNode filter = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(input, literal(false));
+ return this.push(filter);
+ }
+
+ @Override
+ public RelBuilder sort(int... fields) {
+ return super.sort(fields);
+ }
+ @Override
+ public RelBuilder sort(RexNode... nodes) {
+ return super.sort(nodes);
+ }
+ @Override
+ public RelBuilder sort(Iterable extends RexNode> nodes) {
+ return super.sort(nodes);
}
public static SqlFunction getFloorSqlFunction(TimeUnitRange flag) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
index d96b1dc022..b3db3f987c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
@@ -123,8 +123,9 @@ public RelNode createProject(RelNode child,
* .
*/
private static class HiveFilterFactoryImpl implements FilterFactory {
+ // XXX: SJC variablesSet is ignored?
@Override
- public RelNode createFilter(RelNode child, RexNode condition) {
+ public RelNode createFilter(RelNode child, RexNode condition, Set variablesSet) {
RelOptCluster cluster = child.getCluster();
HiveFilter filter = new HiveFilter(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition);
return filter;
@@ -161,6 +162,11 @@ public RelNode createJoin(RelNode left, RelNode right, RexNode condition,
Set variablesSet, JoinRelType joinType, boolean semiJoinDone) {
// According to calcite, it is going to be removed before Calcite-2.0
// TODO: to handle CorrelationId
+ if (joinType == JoinRelType.SEMI) {
+ final JoinInfo joinInfo = JoinInfo.of(left, right, condition);
+ final RelOptCluster cluster = left.getCluster();
+ return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition);
+ }
return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType);
}
}
@@ -176,8 +182,7 @@ public RelNode createSemiJoin(RelNode left, RelNode right,
RexNode condition) {
final JoinInfo joinInfo = JoinInfo.of(left, right, condition);
final RelOptCluster cluster = left.getCluster();
- return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition,
- joinInfo.leftKeys, joinInfo.rightKeys);
+ return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition);
}
}
@@ -197,15 +202,11 @@ public RelNode createSort(RelNode input, RelCollation collation, RexNode offset,
private static class HiveAggregateFactoryImpl implements AggregateFactory {
@Override
- public RelNode createAggregate(RelNode child, boolean indicator,
+ public RelNode createAggregate(RelNode child,
ImmutableBitSet groupSet, ImmutableList groupSets,
List aggCalls) {
- if (indicator) {
- throw new IllegalStateException("Hive does not support indicator columns but Calcite "
- + "created an Aggregate operator containing them");
- }
- return new HiveAggregate(child.getCluster(), child.getTraitSet(), child,
- groupSet, groupSets, aggCalls);
+ return new HiveAggregate(child.getCluster(), child.getTraitSet(), child,
+ groupSet, groupSets, aggCalls);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
index 110136ddcd..25ee38fbd4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java
@@ -108,7 +108,7 @@ public RelNode visit(HiveFilter filter) {
@Override
public RelNode visit(HiveJoin join) {
- if (join.getJoinType() != JoinRelType.INNER) {
+ if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) {
setAutomaticRewritingInvalidReason(join.getJoinType() + " join type is not supported by rewriting algorithm.");
}
checkExpr(join.getCondition());
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
index b8380d63cd..fdbcf4304d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
@@ -547,7 +547,7 @@ public static PKFKJoinInfo extractPKFKJoin(
final PKFKJoinInfo cannotExtract =
PKFKJoinInfo.of(false, null, null);
- if (joinType != JoinRelType.INNER) {
+ if (joinType != JoinRelType.INNER && !join.isSemiJoin()) {
// If it is not an inner, we transform it as the metadata
// providers for expressions do not pull information through
// outer join (as it would not be correct)
@@ -741,7 +741,7 @@ public static RewritablePKFKJoinInfo isRewritablePKFKJoin(Join join,
final RelNode nonFkInput = leftInputPotentialFK ? join.getRight() : join.getLeft();
final RewritablePKFKJoinInfo nonRewritable = RewritablePKFKJoinInfo.of(false, null);
- if (joinType != JoinRelType.INNER) {
+ if (joinType != JoinRelType.INNER && !join.isSemiJoin()) {
// If it is not an inner, we transform it as the metadata
// providers for expressions do not pull information through
// outer join (as it would not be correct)
@@ -848,7 +848,7 @@ public static RewritablePKFKJoinInfo isRewritablePKFKJoin(Join join,
if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) &&
ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) {
if (foreignKeyColumnType.isNullable()) {
- if (joinType == JoinRelType.INNER) {
+ if (joinType == JoinRelType.INNER || join.isSemiJoin()) {
// If it is nullable and it is an INNER, we just need a IS NOT NULL filter
RexNode originalCondOp = refToRex.get(foreignKeyColumnRef);
assert originalCondOp != null;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
index 05d1dc6cf2..f4988e7d3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
@@ -50,7 +50,6 @@
import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.server.CalciteServerStatement;
-import org.apache.calcite.sql.SemiJoinType;
import org.apache.calcite.sql.SqlAggFunction;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
@@ -1141,11 +1140,10 @@ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition,
}
if(createSemiJoin) {
join = correlateFactory.createCorrelate(left.rel, right.rel, id,
- requiredColumns, SemiJoinType.SEMI);
+ requiredColumns, JoinRelType.SEMI);
} else {
join = correlateFactory.createCorrelate(left.rel, right.rel, id,
- requiredColumns, SemiJoinType.of(joinType));
-
+ requiredColumns, joinType);
}
} else {
join = joinFactory.createJoin(left.rel, right.rel, condition,
@@ -1297,9 +1295,8 @@ private boolean allNull(Object[] values, int column, int columnCount) {
*/
public HiveSubQRemoveRelBuilder empty() {
final RelNode input = build();
- final RelNode sort = HiveRelFactories.HIVE_SORT_FACTORY.createSort(
- input, RelCollations.of(), null, literal(0));
- return this.push(sort);
+ final RelNode filter = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(input, literal(false));
+ return this.push(filter);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
index 50466e001b..6b841a5a29 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
@@ -53,14 +53,9 @@ public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
@Override
public Aggregate copy(RelTraitSet traitSet, RelNode input,
- boolean indicator, ImmutableBitSet groupSet,
+ ImmutableBitSet groupSet,
List groupSets, List aggCalls) {
- if (indicator) {
- throw new IllegalStateException("Hive does not support indicator columns but tried "
- + "to create an Aggregate operator containing them");
- }
- return new HiveAggregate(getCluster(), traitSet, input,
- groupSet, groupSets, aggCalls);
+ return new HiveAggregate(getCluster(), traitSet, input, groupSet, groupSets, aggCalls);
}
@Override
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java
index 7a8cf0aa32..705644700c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java
@@ -225,7 +225,7 @@ public JoinPredicateInfo getJoinPredicateInfo() {
private boolean containsOuter() {
for (JoinRelType joinType : joinTypes) {
- if (joinType != JoinRelType.INNER) {
+ if (joinType != JoinRelType.INNER && joinType != JoinRelType.SEMI) {
return true;
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java
index d70ead428d..23ff5e9c0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java
@@ -24,9 +24,9 @@
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.InvalidRelException;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinInfo;
import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.util.ImmutableIntList;
@@ -35,8 +35,9 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Sets;
-public class HiveSemiJoin extends SemiJoin implements HiveRelNode {
+public class HiveSemiJoin extends Join implements HiveRelNode {
private final RexNode joinFilter;
@@ -46,12 +47,9 @@ public static HiveSemiJoin getSemiJoin(
RelTraitSet traitSet,
RelNode left,
RelNode right,
- RexNode condition,
- ImmutableIntList leftKeys,
- ImmutableIntList rightKeys) {
+ RexNode condition) {
try {
- HiveSemiJoin semiJoin = new HiveSemiJoin(cluster, traitSet, left, right,
- condition, leftKeys, rightKeys);
+ HiveSemiJoin semiJoin = new HiveSemiJoin(cluster, traitSet, left, right, condition);
return semiJoin;
} catch (InvalidRelException | CalciteSemanticException e) {
throw new RuntimeException(e);
@@ -62,10 +60,9 @@ protected HiveSemiJoin(RelOptCluster cluster,
RelTraitSet traitSet,
RelNode left,
RelNode right,
- RexNode condition,
- ImmutableIntList leftKeys,
- ImmutableIntList rightKeys) throws InvalidRelException, CalciteSemanticException {
- super(cluster, traitSet, left, right, condition, leftKeys, rightKeys);
+ RexNode condition) throws InvalidRelException, CalciteSemanticException {
+// XXX: SJC: make sure last param of vsriablesSet is ok
+ super(cluster, traitSet, left, right, condition, JoinRelType.SEMI, Sets.newHashSet());
final List systemFieldList = ImmutableList.of();
List> joinKeyExprs = new ArrayList>();
List filterNulls = new ArrayList();
@@ -81,12 +78,11 @@ public RexNode getJoinFilter() {
}
@Override
- public SemiJoin copy(RelTraitSet traitSet, RexNode condition,
+ public HiveSemiJoin copy(RelTraitSet traitSet, RexNode condition,
RelNode left, RelNode right, JoinRelType joinType, boolean semiJoinDone) {
try {
final JoinInfo joinInfo = JoinInfo.of(left, right, condition);
- HiveSemiJoin semijoin = new HiveSemiJoin(getCluster(), traitSet, left, right, condition,
- joinInfo.leftKeys, joinInfo.rightKeys);
+ HiveSemiJoin semijoin = new HiveSemiJoin(getCluster(), traitSet, left, right, condition);
// If available, copy state to registry for optimization rules
HiveRulesRegistry registry = semijoin.getCluster().getPlanner().getContext().unwrap(HiveRulesRegistry.class);
if (registry != null) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
index ed6659c6cc..b7f4ae8504 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
@@ -100,7 +100,7 @@ public void onMatch(RelOptRuleCall call) {
// If it is not an inner join, we do not push the
// aggregate operator
- if (join.getJoinType() != JoinRelType.INNER) {
+ if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) {
return;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java
index 4e66de3812..b2ff255d7c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java
@@ -91,8 +91,8 @@ public void onMatch(RelOptRuleCall call) {
Set leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
Set rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
- boolean genPredOnLeft = join.getJoinType() == JoinRelType.RIGHT || join.getJoinType() == JoinRelType.INNER;
- boolean genPredOnRight = join.getJoinType() == JoinRelType.LEFT || join.getJoinType() == JoinRelType.INNER;
+ boolean genPredOnLeft = join.getJoinType() == JoinRelType.RIGHT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin();
+ boolean genPredOnRight = join.getJoinType() == JoinRelType.LEFT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin();
RexNode newLeftPredicate = getNewPredicate(join, registry, joinPredInfo, leftPushedPredicates, genPredOnLeft, 0);
RexNode newRightPredicate = getNewPredicate(join, registry, joinPredInfo, rightPushedPredicates, genPredOnRight, 1);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
index c735df81eb..a657d1313f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
@@ -117,7 +117,7 @@ public void onMatch(RelOptRuleCall call) {
// These boolean values represent corresponding left, right input which is potential FK
boolean leftInputPotentialFK = topRefs.intersects(leftBits);
boolean rightInputPotentialFK = topRefs.intersects(rightBits);
- if (leftInputPotentialFK && rightInputPotentialFK && joinType == JoinRelType.INNER) {
+ if (leftInputPotentialFK && rightInputPotentialFK && (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
// Both inputs are referenced. Before making a decision, try to swap
// references in join condition if it is an inner join, i.e. if a join
// condition column is referenced above the join, then we can just
@@ -198,6 +198,7 @@ public void onMatch(RelOptRuleCall call) {
final Mode mode;
switch (joinType) {
+ case SEMI:
case INNER:
if (leftInputPotentialFK && rightInputPotentialFK) {
// Bails out as it references columns from both sides (or no columns)
@@ -283,4 +284,4 @@ public void onMatch(RelOptRuleCall call) {
// Transforms LEFT/RIGHT outer join into INNER join
TRANSFORM
}
-}
\ No newline at end of file
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java
index ea5b06c0d4..efb96042ab 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java
@@ -84,6 +84,7 @@ public void onMatch(RelOptRuleCall call) {
// 1. We try to merge this join with the left child
RelNode multiJoin = mergeJoin(join, left, right);
if (multiJoin != null) {
+ System.out.println("SJC: DOING A TRANSFORM 1");
call.transformTo(multiJoin);
return;
}
@@ -113,6 +114,7 @@ public void onMatch(RelOptRuleCall call) {
topProject.getChildExps(),
topProject.getRowType().getFieldNames());
}
+ System.out.println("SJC: DOING A TRANSFORM 2");
call.transformTo(multiJoin);
return;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java
index 38759c0525..545255cf7c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java
@@ -24,7 +24,6 @@
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.Project;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.rules.PushProjector;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexNode;
@@ -77,7 +76,7 @@ public void onMatch(RelOptRuleCall call) {
Project origProj = call.rel(0);
final Join join = call.rel(1);
- if (join instanceof SemiJoin) {
+ if (join.isSemiJoin()) {
return; // TODO: support SemiJoin
}
// locate all fields referenced in the projection and join condition;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
index 86b79140c6..a89c05b203 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
@@ -76,7 +76,6 @@
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitorImpl;
-import org.apache.calcite.sql.SemiJoinType;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
@@ -503,9 +502,12 @@ public Frame decorrelateRel(Values rel) {
* @param rel Aggregate to rewrite
*/
public Frame decorrelateRel(Aggregate rel) throws SemanticException{
+ //XXX: SJC FIX THIS
+/*
if (rel.getGroupType() != Aggregate.Group.SIMPLE) {
throw new AssertionError(Bug.CALCITE_461_FIXED);
}
+*/
//
// Rewrite logic:
//
@@ -688,9 +690,12 @@ private static RexLiteral projectedLiteral(RelNode rel, int i) {
}
public Frame decorrelateRel(HiveAggregate rel) throws SemanticException{
+//XXX: SJC FIX THIS
+/*
if (rel.getGroupType() != Aggregate.Group.SIMPLE) {
throw new AssertionError(Bug.CALCITE_461_FIXED);
}
+*/
//
// Rewrite logic:
//
@@ -1248,7 +1253,7 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException {
}
if(oldInput instanceof LogicalCorrelate
- && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI
+ && ((LogicalCorrelate) oldInput).getJoinType() == JoinRelType.SEMI
&& !cm.mapRefRelToCorRef.containsKey(rel)) {
// this conditions need to be pushed into semi-join since this condition
// corresponds to IN
@@ -1261,7 +1266,7 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException {
RexUtil.composeConjunction(rexBuilder, conditions, false);
RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(),
- join.getLeft(), join.getRight(), condition, join.getLeftKeys(), join.getRightKeys());
+ join.getLeft(), join.getRight(), condition);
return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs);
}
@@ -1320,7 +1325,7 @@ public Frame decorrelateRel(Filter rel) {
}
if(oldInput instanceof LogicalCorrelate
- && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI
+ && ((LogicalCorrelate) oldInput).getJoinType() == JoinRelType.SEMI
&& !cm.mapRefRelToCorRef.containsKey(rel)) {
// this conditions need to be pushed into semi-join since this condition
// corresponds to IN
@@ -1332,7 +1337,7 @@ public Frame decorrelateRel(Filter rel) {
final RexNode condition =
RexUtil.composeConjunction(rexBuilder, conditions, false);
RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(),
- join.getLeft(), join.getRight(), condition, join.getLeftKeys(), join.getRightKeys());
+ join.getLeft(), join.getRight(), condition);
return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs);
}
@@ -1455,14 +1460,13 @@ public Frame decorrelateRel(LogicalCorrelate rel) {
RelNode newJoin = null;
// this indicates original query was either correlated EXISTS or IN
- if(rel.getJoinType() == SemiJoinType.SEMI) {
+ if(rel.getJoinType() == JoinRelType.SEMI) {
final List leftKeys = new ArrayList();
final List rightKeys = new ArrayList();
RelNode[] inputRels = new RelNode[] {leftFrame.r, rightFrame.r};
newJoin = HiveSemiJoin.getSemiJoin(rel.getCluster(),
- rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r,
- condition, ImmutableIntList.copyOf(leftKeys), ImmutableIntList.copyOf(rightKeys));
+ rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r, condition);
} else {
// Right input positions are shifted by newLeftFieldCount.
@@ -1473,7 +1477,7 @@ public Frame decorrelateRel(LogicalCorrelate rel) {
}
newJoin = relBuilder.push(leftFrame.r).push(rightFrame.r)
- .join(rel.getJoinType().toJoinType(), condition).build();
+ .join(rel.getJoinType(), condition).build();
}
valueGen.pop();
@@ -1720,7 +1724,7 @@ private RelNode aggregateCorrelatorOutput(
Project project,
Set isCount) {
final RelNode left = correlate.getLeft();
- final JoinRelType joinType = correlate.getJoinType().toJoinType();
+ final JoinRelType joinType = correlate.getJoinType();
// now create the new project
final List> newProjects = Lists.newArrayList();
@@ -2258,10 +2262,10 @@ public void onMatch(RelOptRuleCall call) {
// Aggregate (groupby (0) single_value())
// Project-A (may reference coVar)
// RightInputRel
- if(correlate.getJoinType() != SemiJoinType.LEFT) {
+ if(correlate.getJoinType() != JoinRelType.LEFT) {
return;
}
- final JoinRelType joinType = correlate.getJoinType().toJoinType();
+ final JoinRelType joinType = correlate.getJoinType();
// corRel.getCondition was here, however Correlate was updated so it
// never includes a join condition. The code was not modified for brevity.
@@ -2470,11 +2474,11 @@ public void onMatch(RelOptRuleCall call) {
return;
}
- if(correlate.getJoinType() != SemiJoinType.LEFT) {
+ if(correlate.getJoinType() != JoinRelType.LEFT) {
return;
}
- final JoinRelType joinType = correlate.getJoinType().toJoinType();
+ final JoinRelType joinType = correlate.getJoinType();
// corRel.getCondition was here, however Correlate was updated so it
// never includes a join condition. The code was not modified for brevity.
RexNode joinCond = rexBuilder.makeLiteral(true);
@@ -2877,11 +2881,11 @@ private void onMatch2(
return;
}
- if(correlate.getJoinType() != SemiJoinType.LEFT) {
+ if(correlate.getJoinType() != JoinRelType.LEFT) {
return;
}
- JoinRelType joinType = correlate.getJoinType().toJoinType();
+ JoinRelType joinType = correlate.getJoinType();
// corRel.getCondition was here, however Correlate was updated so it
// never includes a join condition. The code was not modified for brevity.
RexNode joinCond = rexBuilder.makeLiteral(true);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java
index 4992e702f9..82704a2ebd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java
@@ -51,9 +51,8 @@ public HiveRemoveGBYSemiJoinRule() {
@Override public void onMatch(RelOptRuleCall call) {
final HiveSemiJoin semijoin= call.rel(0);
- if(semijoin.getJoinType() != JoinRelType.INNER) {
- return;
- }
+ assert semijoin.getJoinType() == JoinRelType.SEMI;
+
final RelNode left = call.rel(1);
final Aggregate rightAggregate= call.rel(2);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
index e63f163b24..7842b12b52 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
@@ -118,7 +118,7 @@ protected void perform(final RelOptRuleCall call, final ImmutableBitSet topRefs,
call.transformTo(topOperator.copy(topOperator.getTraitSet(), ImmutableList.of(left)));
return;
}
- if (join.getJoinType() != JoinRelType.INNER) {
+ if (join.getJoinType() != JoinRelType.INNER && !join.isSemiJoin()) {
return;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/HiveJdbcImplementor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/HiveJdbcImplementor.java
index db955b9a9d..eefd2bd2c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/HiveJdbcImplementor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/HiveJdbcImplementor.java
@@ -109,7 +109,7 @@ public HiveJdbcImplementor(SqlDialect dialect, JavaTypeFactory typeFactory) {
SqlNode sqlCondition = null;
SqlLiteral condType = JoinConditionType.ON.symbol(POS);
JoinType joinType = joinType(e.getJoinType());
- if (e.getJoinType() == JoinRelType.INNER && e.getCondition().isAlwaysTrue()) {
+ if ((e.getJoinType() == JoinRelType.INNER || e.isSemiJoin()) && e.getCondition().isAlwaysTrue()) {
joinType = JoinType.COMMA;
condType = JoinConditionType.NONE.symbol(POS);
} else {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
index b2b2f3c2d8..63b4e8b730 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
@@ -25,7 +25,6 @@
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
@@ -41,6 +40,7 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
@@ -86,15 +86,8 @@ public static Double getDistinctRowCount(RelNode r, RelMetadataQuery mq, int ind
public Double getDistinctRowCount(Join rel, RelMetadataQuery mq, ImmutableBitSet groupKey,
RexNode predicate) {
if (rel instanceof HiveJoin) {
- HiveJoin hjRel = (HiveJoin) rel;
- //TODO: Improve this
- if (rel instanceof SemiJoin) {
- return mq.getDistinctRowCount(hjRel.getLeft(), groupKey,
- rel.getCluster().getRexBuilder().makeLiteral(true));
- } else {
- return getJoinDistinctRowCount(mq, rel, rel.getJoinType(),
- groupKey, predicate, true);
- }
+ return getJoinDistinctRowCount(mq, rel, rel.getJoinType(),
+ groupKey, predicate, true);
}
return mq.getDistinctRowCount(rel, groupKey, predicate);
@@ -116,7 +109,7 @@ public Double getDistinctRowCount(Join rel, RelMetadataQuery mq, ImmutableBitSet
* otherwise use left NDV * right NDV.
* @return number of distinct rows
*/
- public static Double getJoinDistinctRowCount(RelMetadataQuery mq,
+ private static Double getJoinDistinctRowCount(RelMetadataQuery mq,
RelNode joinRel, JoinRelType joinType, ImmutableBitSet groupKey,
RexNode predicate, boolean useMaxNdv) {
Double distRowCount = null;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java
new file mode 100644
index 0000000000..e95d6c4665
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdMaxRowCount;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.NumberUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveRelMdMaxRowCount extends RelMdMaxRowCount {
+
+ private static final HiveRelMdMaxRowCount INSTANCE =
+ new HiveRelMdMaxRowCount();
+
+ public static final RelMetadataProvider SOURCE =
+ ChainedRelMetadataProvider.of(
+ ImmutableList.of(
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.MAX_ROW_COUNT.method, new HiveRelMdMaxRowCount()),
+ RelMdMaxRowCount.SOURCE));
+
+ private HiveRelMdMaxRowCount() {
+ super();
+ }
+
+ @Override
+ public Double getMaxRowCount(Aggregate rel, RelMetadataQuery mq) {
+ if (rel.getGroupSet().isEmpty()) {
+ // Aggregate with no GROUP BY always returns 1 row (even on empty table).
+ return 1D;
+ }
+
+ final Double rowCount = mq.getMaxRowCount(rel.getInput());
+ if (rowCount == null) {
+ return null;
+ }
+ return rowCount * rel.getGroupSets().size();
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
index a137bdf94f..4b01c776e5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
@@ -39,7 +39,6 @@
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.core.Union;
import org.apache.calcite.rel.metadata.BuiltInMetadata;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
@@ -316,7 +315,7 @@ public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) {
public JoinConditionBasedPredicateInference(Join joinRel,
RexNode lPreds, RexNode rPreds) {
- this(joinRel, joinRel instanceof SemiJoin, lPreds, rPreds);
+ this(joinRel, (joinRel instanceof Join) && ((Join) joinRel).isSemiJoin(), lPreds, rPreds);
}
private JoinConditionBasedPredicateInference(Join joinRel, boolean isSemiJoin,
@@ -416,6 +415,7 @@ public RelOptPredicateList inferPredicates(
switch (joinType) {
case INNER:
case LEFT:
+ case SEMI:
infer(leftPreds, allExprsDigests, inferredPredicates,
nonFieldsPredicates, includeEqualityInference,
joinType == JoinRelType.LEFT ? rightFieldsBitSet
@@ -425,6 +425,7 @@ public RelOptPredicateList inferPredicates(
switch (joinType) {
case INNER:
case RIGHT:
+ case SEMI:
infer(rightPreds, allExprsDigests, inferredPredicates,
nonFieldsPredicates, includeEqualityInference,
joinType == JoinRelType.RIGHT ? leftFieldsBitSet
@@ -453,7 +454,7 @@ public RelOptPredicateList inferPredicates(
}
}
- if (joinType == JoinRelType.INNER && !nonFieldsPredicates.isEmpty()) {
+ if ((joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI) && !nonFieldsPredicates.isEmpty()) {
// Predicates without field references can be pushed to both inputs
final Set leftPredsSet = new HashSet(
Lists.transform(leftPreds, HiveCalciteUtil.REX_STR_FN));
@@ -471,15 +472,13 @@ public RelOptPredicateList inferPredicates(
switch (joinType) {
case INNER:
- Iterable pulledUpPredicates;
- if (isSemiJoin) {
- pulledUpPredicates = Iterables.concat(leftPreds, leftInferredPredicates);
- } else {
- pulledUpPredicates = Iterables.concat(leftPreds, rightPreds,
+ Iterable pulledUpPredicates = Iterables.concat(leftPreds, rightPreds,
RelOptUtil.conjunctions(joinRel.getCondition()), inferredPredicates);
- }
return RelOptPredicateList.of(rexBuilder,
pulledUpPredicates, leftInferredPredicates, rightInferredPredicates);
+ case SEMI:
+ return RelOptPredicateList.of(rexBuilder, Iterables.concat(leftPreds, leftInferredPredicates),
+ leftInferredPredicates, rightInferredPredicates);
case LEFT:
return RelOptPredicateList.of(rexBuilder,
leftPreds, EMPTY_LIST, rightInferredPredicates);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
index d881fbd952..c0601c0685 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
@@ -30,7 +30,6 @@
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
@@ -75,6 +74,9 @@ protected HiveRelMdRowCount() {
@Override
public Double getRowCount(Join join, RelMetadataQuery mq) {
+ if (join.isSemiJoin()) {
+ return getRowCountSemi(join, mq);
+ }
// Try to infer from constraints first
final Pair constraintBasedResult =
constraintsBasedAnalyzeJoinForPKFK(join, mq);
@@ -117,8 +119,7 @@ public Double getRowCount(Join join, RelMetadataQuery mq) {
return rowCount;
}
- @Override
- public Double getRowCount(SemiJoin rel, RelMetadataQuery mq) {
+ public Double getRowCountSemi(Join rel, RelMetadataQuery mq) {
PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, mq);
if (pkfk != null) {
double selectivity = pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor;
@@ -249,7 +250,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery
// @todo: remove this. 8/28/14 hb
// for now adding because RelOptUtil.classifyFilters has an assertion about
// column counts that is not true for semiJoins.
- if (joinRel instanceof SemiJoin) {
+ if (joinRel.isSemiJoin()) {
return null;
}
@@ -356,7 +357,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery
*/
public static Pair constraintsBasedAnalyzeJoinForPKFK(Join join, RelMetadataQuery mq) {
- if (join instanceof SemiJoin) {
+ if (join.isSemiJoin()) {
// TODO: Support semijoin
return null;
}
@@ -391,9 +392,9 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery
return null;
}
- boolean leftIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.RIGHT)
+ boolean leftIsKey = (join.getJoinType() == JoinRelType.INNER || join.isSemiJoin() || join.getJoinType() == JoinRelType.RIGHT)
&& leftInputResult.isPkFkJoin;
- boolean rightIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.LEFT)
+ boolean rightIsKey = (join.getJoinType() == JoinRelType.INNER || join.isSemiJoin() || join.getJoinType() == JoinRelType.LEFT)
&& rightInputResult.isPkFkJoin;
if (!leftIsKey && !rightIsKey) {
// Nothing to do here, bail out
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
index f6a6cf4f03..1724ab1d21 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
@@ -25,7 +25,6 @@
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdSelectivity;
import org.apache.calcite.rel.metadata.RelMdUtil;
@@ -65,7 +64,7 @@ public Double getSelectivity(HiveTableScan t, RelMetadataQuery mq, RexNode predi
}
public Double getSelectivity(Join j, RelMetadataQuery mq, RexNode predicate) {
- if (j.getJoinType().equals(JoinRelType.INNER)) {
+ if (j.getJoinType().equals(JoinRelType.INNER) || j.isSemiJoin()) {
return computeInnerJoinSelectivity(j, mq, predicate);
} else if (j.getJoinType().equals(JoinRelType.LEFT) ||
j.getJoinType().equals(JoinRelType.RIGHT)) {
@@ -143,7 +142,7 @@ private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode
ndvEstimate = exponentialBackoff(peLst, colStatMap);
}
- if (j instanceof SemiJoin) {
+ if (j.isSemiJoin()) {
ndvEstimate = Math.min(mq.getRowCount(j.getLeft()),
ndvEstimate);
} else if (j instanceof HiveJoin) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
index 893cb9975c..fd3553d5f8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
@@ -20,7 +20,6 @@
import java.util.List;
import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdSize;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
@@ -31,6 +30,7 @@
import org.apache.calcite.util.ImmutableNullableList;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
@@ -78,8 +78,7 @@ private HiveRelMdSize() {}
return list.build();
}
- @Override
- public List averageColumnSizes(SemiJoin rel, RelMetadataQuery mq) {
+ public List averageColumnSizes(HiveSemiJoin rel, RelMetadataQuery mq) {
final RelNode left = rel.getLeft();
final List lefts =
mq.getAverageColumnSizes(left);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
index c4c771e490..adcab91504 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
@@ -155,11 +155,13 @@ public static ASTNode table(final RelNode scan) {
return b.node();
}
+ //XXX: SJC is semiJoin easy to factor out?
public static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond,
boolean semiJoin) {
ASTBuilder b = null;
switch (joinType) {
+ case SEMI:
case INNER:
if (semiJoin) {
b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN");
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index 6c4edeb905..213ed7b8ee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -36,7 +36,6 @@
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.TableFunctionScan;
import org.apache.calcite.rel.core.TableScan;
@@ -378,7 +377,7 @@ private QueryBlockInfo convertSource(RelNode r) throws CalciteSemanticException
QueryBlockInfo right = convertSource(join.getRight());
s = new Schema(left.schema, right.schema);
ASTNode cond = join.getCondition().accept(new RexVisitor(s, false, r.getCluster().getRexBuilder()));
- boolean semiJoin = join instanceof SemiJoin;
+ boolean semiJoin = join.isSemiJoin();
if (join.getRight() instanceof Join && !semiJoin) {
// should not be done for semijoin since it will change the semantics
// Invert join inputs; this is done because otherwise the SemanticAnalyzer
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index 165cb9efcf..509a070b84 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -37,7 +37,6 @@
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
@@ -175,8 +174,8 @@ OpAttr dispatch(RelNode rn) throws SemanticException {
return visit((HiveMultiJoin) rn);
} else if (rn instanceof HiveJoin) {
return visit((HiveJoin) rn);
- } else if (rn instanceof SemiJoin) {
- return visit((SemiJoin)rn);
+ } else if (rn instanceof HiveSemiJoin) {
+ return visit((HiveSemiJoin) rn);
} else if (rn instanceof HiveFilter) {
return visit((HiveFilter) rn);
} else if (rn instanceof HiveSortLimit) {
@@ -332,8 +331,7 @@ OpAttr visit(HiveJoin joinRel) throws SemanticException {
return translateJoin(joinRel);
}
-
- OpAttr visit(SemiJoin joinRel) throws SemanticException {
+ OpAttr visit(HiveSemiJoin joinRel) throws SemanticException {
return translateJoin(joinRel);
}
@@ -365,8 +363,8 @@ private OpAttr translateJoin(RelNode joinRel) throws SemanticException {
// 3. Virtual columns
Set newVcolsInCalcite = new HashSet();
newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
- if (joinRel instanceof HiveMultiJoin ||
- !(joinRel instanceof SemiJoin)) {
+ if (joinRel instanceof HiveMultiJoin ||
+ !((joinRel instanceof Join) && ((Join) joinRel).isSemiJoin())) {
int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
for (int i = 1; i < inputs.length; i++) {
newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
@@ -904,7 +902,7 @@ private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressio
noOuterJoin = !hmj.isOuterJoin();
} else {
joinCondns = new JoinCondDesc[1];
- semiJoin = join instanceof SemiJoin;
+ semiJoin = (join instanceof Join) && ((Join) join).isSemiJoin();
JoinType joinType;
if (semiJoin) {
joinType = JoinType.LEFTSEMI;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 60cd71583f..9334a8ee19 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2697,7 +2697,7 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r
calciteJoinType = JoinRelType.FULL;
break;
case LEFTSEMI:
- calciteJoinType = JoinRelType.INNER;
+ calciteJoinType = JoinRelType.SEMI;
leftSemiJoin = true;
break;
case INNER:
@@ -2731,8 +2731,7 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r
ImmutableList.of(remainingEquiCond, nonEquiConds), false) :
nonEquiConds;
topRel = HiveSemiJoin.getSemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
- inputRels[0], inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys),
- ImmutableIntList.copyOf(rightKeys));
+ inputRels[0], inputRels[1], calciteJoinCond);
// Create join RR: we need to check whether we need to update left RR in case
// previous call to projectNonColumnEquiConditions updated it
diff --git a/ql/src/test/results/clientpositive/llap/semijoin.q.out b/ql/src/test/results/clientpositive/llap/semijoin.q.out
index 99ed8de40b..6a0cd4c6ae 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin.q.out
@@ -3277,7 +3277,7 @@ POSTHOOK: Input: default@part
#### A masked pattern was here ####
CBO PLAN:
HiveProject(p_partkey=[$0])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(p_partkey=[$0], p_name=[$1])
HiveFilter(condition=[IS NOT NULL($1)])
HiveTableScan(table=[[default, part]], table:alias=[pp])
@@ -3339,7 +3339,7 @@ POSTHOOK: Input: default@part
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count()])
- HiveSemiJoin(condition=[=($0, $1)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
HiveProject(p_partkey=[$0])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, part]], table:alias=[pp])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out
index 9df533b6f0..e076022ffa 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out
@@ -137,7 +137,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], sort5=
HiveFilter(condition=[OR(IS NOT NULL($14), IS NOT NULL($16))])
HiveJoin(condition=[=($0, $17)], joinType=[left], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($0, $15)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $14)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $14)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out
index dfb4c33864..69c49c0981 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out
@@ -231,7 +231,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveFilter(condition=[IS NOT NULL($3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()])
HiveProject(i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f3=[*(CAST($6):DECIMAL(10, 0), $7)])
- HiveSemiJoin(condition=[=($5, $9)], joinType=[inner])
+ HiveSemiJoin(condition=[=($5, $9)], joinType=[semi])
HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11])
HiveFilter(condition=[IS NOT NULL($0)])
@@ -328,7 +328,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveFilter(condition=[IS NOT NULL($3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()])
HiveProject(i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f3=[*(CAST($6):DECIMAL(10, 0), $7)])
- HiveSemiJoin(condition=[=($5, $9)], joinType=[inner])
+ HiveSemiJoin(condition=[=($5, $9)], joinType=[semi])
HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11])
HiveFilter(condition=[IS NOT NULL($0)])
@@ -425,7 +425,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveFilter(condition=[IS NOT NULL($3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()])
HiveProject(i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f3=[*(CAST($6):DECIMAL(10, 0), $7)])
- HiveSemiJoin(condition=[=($5, $9)], joinType=[inner])
+ HiveSemiJoin(condition=[=($5, $9)], joinType=[semi])
HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11])
HiveFilter(condition=[IS NOT NULL($0)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out
index 8126e432b2..1bfd9d8855 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out
@@ -74,7 +74,7 @@ CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveFilter(condition=[IS NULL($13)])
HiveJoin(condition=[=($4, $14)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[semi])
HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12])
HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
index 6aaf3a0c8d..4254e66a07 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
@@ -121,7 +121,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(sales=[$0])
HiveUnion(all=[true])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
- HiveSemiJoin(condition=[=($2, $7)], joinType=[inner])
+ HiveSemiJoin(condition=[=($2, $7)], joinType=[semi])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(i_item_sk=[$0])
HiveAggregate(group=[{1}])
@@ -176,7 +176,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
- HiveSemiJoin(condition=[=($3, $7)], joinType=[inner])
+ HiveSemiJoin(condition=[=($3, $7)], joinType=[semi])
HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(i_item_sk=[$0])
HiveAggregate(group=[{1}])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out
index dcfc53be15..0eb3f7094c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out
@@ -134,7 +134,7 @@ HiveProject(ca_state=[$0], cd_gender=[$1], cd_marital_status=[$2], cnt1=[$3], _o
HiveFilter(condition=[OR(IS NOT NULL($11), IS NOT NULL($13))])
HiveJoin(condition=[=($0, $14)], joinType=[left], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($0, $12)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $11)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $11)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out
index f0cca27177..4e617f99ed 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out
@@ -115,7 +115,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], dir0=[
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_addr_sk=[$2], ca_address_sk=[$3], ca_state=[$4], cd_demo_sk=[$5], cd_gender=[$6], cd_marital_status=[$7], cd_education_status=[$8], cd_purchase_estimate=[$9], cd_credit_rating=[$10], literalTrue=[$11], ws_bill_customer_sk0=[$12])
HiveFilter(condition=[IS NULL($11)])
HiveJoin(condition=[=($0, $12)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $11)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $11)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out
index 5ee3bfc644..474193229a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out
@@ -164,7 +164,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
@@ -188,7 +188,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
@@ -212,7 +212,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out
index 5cf486f480..b8521f4189 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out
@@ -70,7 +70,7 @@ CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveFilter(condition=[IS NULL($13)])
HiveJoin(condition=[=($4, $14)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[semi])
HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12])
HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out
index 2427cbd2ef..8633ba2e8e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out
@@ -137,7 +137,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], sort5=
HiveFilter(condition=[OR(IS NOT NULL($14), IS NOT NULL($16))])
HiveJoin(condition=[=($0, $17)], joinType=[left], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($0, $15)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $14)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $14)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out
index 13c6477438..b40ca94ac6 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out
@@ -74,7 +74,7 @@ CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveFilter(condition=[IS NULL($13)])
HiveJoin(condition=[=($4, $14)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[semi])
HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12])
HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out
index dfa794d1b8..d196ce69c8 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out
@@ -121,7 +121,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(sales=[$0])
HiveUnion(all=[true])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
- HiveSemiJoin(condition=[=($3, $7)], joinType=[inner])
+ HiveSemiJoin(condition=[=($3, $7)], joinType=[semi])
HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ss_customer_sk=[$0])
HiveAggregate(group=[{0}])
@@ -167,7 +167,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)])
HiveTableScan(table=[[default, item]], table:alias=[item])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
- HiveSemiJoin(condition=[=($2, $7)], joinType=[inner])
+ HiveSemiJoin(condition=[=($2, $7)], joinType=[semi])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ss_customer_sk=[$0])
HiveAggregate(group=[{0}])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out
index d3d6658280..2de681801d 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out
@@ -134,7 +134,7 @@ HiveProject(ca_state=[$0], cd_gender=[$1], cd_marital_status=[$2], cnt1=[$3], _o
HiveFilter(condition=[OR(IS NOT NULL($11), IS NOT NULL($13))])
HiveJoin(condition=[=($0, $14)], joinType=[left], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($0, $12)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $11)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $11)], joinType=[semi])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_addr_sk=[$2], ca_address_sk=[$9], ca_state=[$10], cd_demo_sk=[$3], cd_gender=[$4], cd_marital_status=[$5], cd_dep_count=[$6], cd_dep_employed_count=[$7], cd_dep_college_count=[$8])
HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out
index 5b739ce310..8cb93b4271 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out
@@ -115,7 +115,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], dir0=[
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_addr_sk=[$2], ca_address_sk=[$3], ca_state=[$4], cd_demo_sk=[$5], cd_gender=[$6], cd_marital_status=[$7], cd_education_status=[$8], cd_purchase_estimate=[$9], cd_credit_rating=[$10], literalTrue=[$11], ws_bill_customer_sk0=[$12])
HiveFilter(condition=[IS NULL($11)])
HiveJoin(condition=[=($0, $12)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $11)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $11)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out
index 346a3a9a46..347c4db60f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out
@@ -84,11 +84,11 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-l
HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[count($4)], agg#2=[count()])
HiveProject($f0=[$15], $f1=[$13], $f2=[$19], $f3=[CASE(IS NULL($25), 1, 0)], $f4=[CASE(IS NOT NULL($25), 1, 0)])
HiveJoin(condition=[AND(=($26, $4), =($27, $6))], joinType=[left], algorithm=[none], cost=[not available])
- HiveProject(cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$19], w_warehouse_name=[$20], i_item_sk=[$24], i_item_desc=[$25], cd_demo_sk=[$12], hd_demo_sk=[$13], d_date_sk=[$16], d_week_seq=[$17], +=[$18], d_date_sk0=[$14], d_week_seq0=[$15], d_date_sk1=[$22], CAST=[$23], p_promo_sk=[$21])
+ HiveProject(cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$20], w_warehouse_name=[$21], i_item_sk=[$24], i_item_desc=[$25], cd_demo_sk=[$12], hd_demo_sk=[$13], d_date_sk=[$16], d_week_seq=[$17], +=[$18], d_date_sk0=[$14], d_week_seq0=[$15], d_date_sk1=[$22], CAST=[$23], p_promo_sk=[$19])
HiveJoin(condition=[=($24, $8)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[AND(=($5, $22), >($23, $18))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($9, $21)], joinType=[left], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($19, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($20, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($9, $19)], joinType=[left], algorithm=[none], cost=[not available])
HiveJoin(condition=[AND(=($0, $14), =($8, $1), <($3, $11))], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3])
HiveFilter(condition=[IS NOT NULL($3)])
@@ -113,10 +113,10 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-l
HiveProject(d_date_sk=[$0], d_week_seq=[$4], +=[+(CAST($2):DOUBLE, 5)])
HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4), IS NOT NULL(CAST($2):DOUBLE))])
HiveTableScan(table=[[default, date_dim]], table:alias=[d1])
- HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2])
- HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse])
- HiveProject(p_promo_sk=[$0])
- HiveTableScan(table=[[default, promotion]], table:alias=[promotion])
+ HiveProject(p_promo_sk=[$0])
+ HiveTableScan(table=[[default, promotion]], table:alias=[promotion])
+ HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2])
+ HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse])
HiveProject(d_date_sk=[$0], CAST=[CAST($2):DOUBLE])
HiveFilter(condition=[IS NOT NULL(CAST($2):DOUBLE)])
HiveTableScan(table=[[default, date_dim]], table:alias=[d3])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out
index 6c3404d979..fe05a6e300 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out
@@ -160,7 +160,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
@@ -182,7 +182,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
@@ -204,7 +204,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out
index 8efda1ebd2..ef108a6578 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out
@@ -70,7 +70,7 @@ CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveFilter(condition=[IS NULL($13)])
HiveJoin(condition=[=($4, $14)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[semi])
HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12])
HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/spark/semijoin.q.out b/ql/src/test/results/clientpositive/spark/semijoin.q.out
index c42332d964..b6d6559c30 100644
--- a/ql/src/test/results/clientpositive/spark/semijoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/semijoin.q.out
@@ -2955,7 +2955,7 @@ POSTHOOK: Input: default@part
#### A masked pattern was here ####
CBO PLAN:
HiveProject(p_partkey=[$0])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(p_partkey=[$0], p_name=[$1])
HiveFilter(condition=[IS NOT NULL($1)])
HiveTableScan(table=[[default, part]], table:alias=[pp])
@@ -3017,7 +3017,7 @@ POSTHOOK: Input: default@part
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count()])
- HiveSemiJoin(condition=[=($0, $1)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
HiveProject(p_partkey=[$0])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, part]], table:alias=[pp])