diff --git a/pom.xml b/pom.xml
index 3c06188..2bda92f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -109,7 +109,7 @@
3.4
1.7.7
0.8.0.RELEASE
- 1.5.0
+ 1.6.0
4.2.1
4.1.6
4.1.7
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
index 1c15012..58a7cff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -47,6 +47,7 @@
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexOver;
import org.apache.calcite.rex.RexRangeRef;
+import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitor;
import org.apache.calcite.rex.RexVisitorImpl;
@@ -666,7 +667,7 @@ public String apply(RexNode r) {
// Note: this is the last step, trying to avoid the expensive call to the metadata provider
// if possible
Set predicatesInSubtree = Sets.newHashSet();
- for (RexNode pred : RelMetadataQuery.getPulledUpPredicates(inp).pulledUpPredicates) {
+ for (RexNode pred : RelMetadataQuery.instance().getPulledUpPredicates(inp).pulledUpPredicates) {
predicatesInSubtree.add(pred.toString());
predicatesInSubtree.addAll(Lists.transform(RelOptUtil.conjunctions(pred), REX_STR_FN));
}
@@ -935,6 +936,12 @@ public Boolean visitFieldAccess(RexFieldAccess fieldAccess) {
// ".FIELD" is constant iff "" is constant.
return fieldAccess.getReferenceExpr().accept(this);
}
+
+ @Override
+ public Boolean visitSubQuery(RexSubQuery subQuery) {
+ // it seems that it is not used by anything.
+ return false;
+ }
}
public static Set getInputRefs(RexNode expr) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
index eeec44e..83205bc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
@@ -28,6 +28,7 @@
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.core.JoinInfo;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.RelFactories.AggregateFactory;
@@ -147,6 +148,14 @@ public RelNode createJoin(RelNode left, RelNode right, RexNode condition, JoinRe
Set variablesStopped, boolean semiJoinDone) {
return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType, false);
}
+
+ @Override
+ public RelNode createJoin(RelNode left, RelNode right, RexNode condition,
+ Set variablesSet, JoinRelType joinType, boolean semiJoinDone) {
+ // According to calcite, it is going to be removed before Calcite-2.0
+ // TODO: to handle CorrelationId
+ return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType, semiJoinDone);
+ }
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java
index 6840418..6522714 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java
@@ -200,7 +200,7 @@ public double computeSMBMapJoinIOCost(
}
public static boolean isFittingIntoMemory(Double maxSize, RelNode input, int buckets) {
- Double currentMemory = RelMetadataQuery.cumulativeMemoryWithinPhase(input);
+ Double currentMemory = RelMetadataQuery.instance().cumulativeMemoryWithinPhase(input);
if (currentMemory != null) {
if(currentMemory / buckets > maxSize) {
return false;
@@ -311,11 +311,12 @@ public static Double getJoinMemory(HiveJoin join) {
public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation streamingSide) {
Double memory = 0.0;
+ RelMetadataQuery mq = RelMetadataQuery.instance();
if (streamingSide == MapJoinStreamingRelation.NONE ||
streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) {
// Left side
- final Double leftAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
- final Double leftRowCount = RelMetadataQuery.getRowCount(join.getLeft());
+ final Double leftAvgRowSize = mq.getAverageRowSize(join.getLeft());
+ final Double leftRowCount = mq.getRowCount(join.getLeft());
if (leftAvgRowSize == null || leftRowCount == null) {
return null;
}
@@ -324,8 +325,8 @@ public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation strea
if (streamingSide == MapJoinStreamingRelation.NONE ||
streamingSide == MapJoinStreamingRelation.LEFT_RELATION) {
// Right side
- final Double rightAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getRight());
- final Double rightRowCount = RelMetadataQuery.getRowCount(join.getRight());
+ final Double rightAvgRowSize = mq.getAverageRowSize(join.getRight());
+ final Double rightRowCount = mq.getRowCount(join.getRight());
if (rightAvgRowSize == null || rightRowCount == null) {
return null;
}
@@ -338,8 +339,9 @@ public static Integer getSplitCountWithRepartition(HiveJoin join) {
final Double maxSplitSize = join.getCluster().getPlanner().getContext().
unwrap(HiveAlgorithmsConf.class).getMaxSplitSize();
// We repartition: new number of splits
- final Double averageRowSize = RelMetadataQuery.getAverageRowSize(join);
- final Double rowCount = RelMetadataQuery.getRowCount(join);
+ RelMetadataQuery mq = RelMetadataQuery.instance();
+ final Double averageRowSize = mq.getAverageRowSize(join);
+ final Double rowCount = mq.getRowCount(join);
if (averageRowSize == null || rowCount == null) {
return null;
}
@@ -357,7 +359,7 @@ public static Integer getSplitCountWithoutRepartition(HiveJoin join) {
} else {
return null;
}
- return RelMetadataQuery.splitCount(largeInput);
+ return RelMetadataQuery.instance().splitCount(largeInput);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java
index 6669d32..badb8ca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java
@@ -84,8 +84,9 @@ public boolean isExecutable(HiveJoin join) {
@Override
public RelOptCost getCost(HiveJoin join) {
- double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
- double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
+ RelMetadataQuery mq = RelMetadataQuery.instance();
+ double leftRCount = mq.getRowCount(join.getLeft());
+ double rightRCount = mq.getRowCount(join.getRight());
return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java
index 61a3a64..a4233b9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java
@@ -78,7 +78,7 @@ public RelOptCost getDefaultCost() {
@Override
public RelOptCost getScanCost(HiveTableScan ts) {
- return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.getAverageRowSize(ts));
+ return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.instance().getAverageRowSize(ts));
}
@Override
@@ -86,8 +86,9 @@ public RelOptCost getAggregateCost(HiveAggregate aggregate) {
if (aggregate.isBucketedInput()) {
return HiveCost.FACTORY.makeZeroCost();
} else {
+ RelMetadataQuery mq = RelMetadataQuery.instance();
// 1. Sum of input cardinalities
- final Double rCount = RelMetadataQuery.getRowCount(aggregate.getInput());
+ final Double rCount = mq.getRowCount(aggregate.getInput());
if (rCount == null) {
return null;
}
@@ -96,7 +97,7 @@ public RelOptCost getAggregateCost(HiveAggregate aggregate) {
// 3. IO cost = cost of writing intermediary results to local FS +
// cost of reading from local FS for transferring to GBy +
// cost of transferring map outputs to GBy operator
- final Double rAverageSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput());
+ final Double rAverageSize = mq.getAverageRowSize(aggregate.getInput());
if (rAverageSize == null) {
return null;
}
@@ -128,9 +129,10 @@ public boolean isExecutable(HiveJoin join) {
@Override
public RelOptCost getCost(HiveJoin join) {
+ RelMetadataQuery mq = RelMetadataQuery.instance();
// 1. Sum of input cardinalities
- final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
- final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
+ final Double leftRCount = mq.getRowCount(join.getLeft());
+ final Double rightRCount = mq.getRowCount(join.getRight());
if (leftRCount == null || rightRCount == null) {
return null;
}
@@ -151,8 +153,8 @@ public RelOptCost getCost(HiveJoin join) {
// 3. IO cost = cost of writing intermediary results to local FS +
// cost of reading from local FS for transferring to join +
// cost of transferring map outputs to Join operator
- final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
- final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
+ final Double leftRAverageSize = mq.getAverageRowSize(join.getLeft());
+ final Double rightRAverageSize = mq.getAverageRowSize(join.getRight());
if (leftRAverageSize == null || rightRAverageSize == null) {
return null;
}
@@ -187,8 +189,8 @@ public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) {
join.setJoinAlgorithm(TezCommonJoinAlgorithm.INSTANCE);
final Double memoryWithinPhase =
- RelMetadataQuery.cumulativeMemoryWithinPhase(join);
- final Integer splitCount = RelMetadataQuery.splitCount(join);
+ RelMetadataQuery.instance().cumulativeMemoryWithinPhase(join);
+ final Integer splitCount = RelMetadataQuery.instance().splitCount(join);
join.setJoinAlgorithm(oldAlgo);
if (memoryWithinPhase == null || splitCount == null) {
@@ -238,9 +240,10 @@ public boolean isExecutable(HiveJoin join) {
@Override
public RelOptCost getCost(HiveJoin join) {
+ RelMetadataQuery mq = RelMetadataQuery.instance();
// 1. Sum of input cardinalities
- final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
- final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
+ final Double leftRCount = mq.getRowCount(join.getLeft());
+ final Double rightRCount = mq.getRowCount(join.getRight());
if (leftRCount == null || rightRCount == null) {
return null;
}
@@ -251,7 +254,7 @@ public RelOptCost getCost(HiveJoin join) {
add(leftRCount).
add(rightRCount).
build();
- ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder();
+ ImmutableBitSet.Builder streamingBuilder = ImmutableBitSet.builder();
switch (join.getStreamingSide()) {
case LEFT_RELATION:
streamingBuilder.set(0);
@@ -266,8 +269,8 @@ public RelOptCost getCost(HiveJoin join) {
final double cpuCost = HiveAlgorithmsUtil.computeMapJoinCPUCost(cardinalities, streaming);
// 3. IO cost = cost of transferring small tables to join node *
// degree of parallelism
- final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
- final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
+ final Double leftRAverageSize = mq.getAverageRowSize(join.getLeft());
+ final Double rightRAverageSize = mq.getAverageRowSize(join.getRight());
if (leftRAverageSize == null || rightRAverageSize == null) {
return null;
}
@@ -277,8 +280,8 @@ public RelOptCost getCost(HiveJoin join) {
build();
JoinAlgorithm oldAlgo = join.getJoinAlgorithm();
join.setJoinAlgorithm(TezMapJoinAlgorithm.INSTANCE);
- final int parallelism = RelMetadataQuery.splitCount(join) == null
- ? 1 : RelMetadataQuery.splitCount(join);
+ final int parallelism = mq.splitCount(join) == null
+ ? 1 : mq.splitCount(join);
join.setJoinAlgorithm(oldAlgo);
final double ioCost = algoUtils.computeMapJoinIOCost(relationInfos, streaming, parallelism);
// 4. Result
@@ -322,7 +325,7 @@ public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) {
return null;
}
// If simple map join, the whole relation goes in memory
- return RelMetadataQuery.cumulativeMemoryWithinPhase(inMemoryInput);
+ return RelMetadataQuery.instance().cumulativeMemoryWithinPhase(inMemoryInput);
}
@Override
@@ -376,7 +379,7 @@ public boolean isExecutable(HiveJoin join) {
// What we need is a way to get buckets not splits
JoinAlgorithm oldAlgo = join.getJoinAlgorithm();
join.setJoinAlgorithm(TezBucketJoinAlgorithm.INSTANCE);
- Integer buckets = RelMetadataQuery.splitCount(smallInput);
+ Integer buckets = RelMetadataQuery.instance().splitCount(smallInput);
join.setJoinAlgorithm(oldAlgo);
if (buckets == null) {
@@ -388,7 +391,7 @@ public boolean isExecutable(HiveJoin join) {
for (int i=0; i joinKeysInChildren = new ArrayList();
@@ -203,8 +204,8 @@ public ImmutableBitSet getSortedInputs() throws CalciteSemanticException {
for (int i=0; i exps, Rel
}
@Override
- public RelOptCost computeSelfCost(RelOptPlanner planner) {
- return RelMetadataQuery.getNonCumulativeCost(this);
+ public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
+ return mq.getNonCumulativeCost(this);
}
@Override
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java
index 4fac13e..d899667 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java
@@ -108,8 +108,8 @@ public void implement(Implementor implementor) {
}
@Override
- public RelOptCost computeSelfCost(RelOptPlanner planner) {
- return RelMetadataQuery.getNonCumulativeCost(this);
+ public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
+ return mq.getNonCumulativeCost(this);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
index 5788805..e9e9d0b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
@@ -122,8 +122,8 @@ public HiveTableScan copy(RelDataType newRowtype) {
}
@Override
- public RelOptCost computeSelfCost(RelOptPlanner planner) {
- return RelMetadataQuery.getNonCumulativeCost(this);
+ public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
+ return mq.getNonCumulativeCost(this);
}
@Override public RelWriter explainTerms(RelWriter pw) {
@@ -147,7 +147,7 @@ public void implement(Implementor implementor) {
}
@Override
- public double getRows() {
+ public double estimateRowCount(RelMetadataQuery mq) {
return ((RelOptHiveTable) table).getRowCount();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
index 070c7ea..e9a4d88 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
@@ -119,9 +119,10 @@ public void onMatch(RelOptRuleCall call) {
}
// Do the columns used by the join appear in the output of the aggregate?
+ RelMetadataQuery mq = RelMetadataQuery.instance();
final ImmutableBitSet aggregateColumns = aggregate.getGroupSet();
final ImmutableBitSet keyColumns = keyColumns(aggregateColumns,
- RelMetadataQuery.getPulledUpPredicates(join).pulledUpPredicates);
+ mq.getPulledUpPredicates(join).pulledUpPredicates);
final ImmutableBitSet joinColumns =
RelOptUtil.InputFinder.bits(join.getCondition());
final boolean allColumnsInAggregate =
@@ -179,7 +180,7 @@ public void onMatch(RelOptRuleCall call) {
unique = true;
} else {
final Boolean unique0 =
- RelMetadataQuery.areColumnsUnique(joinInput, belowAggregateKey);
+ mq.areColumnsUnique(joinInput, belowAggregateKey);
unique = unique0 != null && unique0;
}
if (unique) {
@@ -299,8 +300,8 @@ public Integer apply(Integer a0) {
}
// Make a cost based decision to pick cheaper plan
- RelOptCost afterCost = RelMetadataQuery.getCumulativeCost(r);
- RelOptCost beforeCost = RelMetadataQuery.getCumulativeCost(aggregate);
+ RelOptCost afterCost = mq.getCumulativeCost(r);
+ RelOptCost beforeCost = mq.getCumulativeCost(aggregate);
if (afterCost.isLt(beforeCost)) {
call.transformTo(r);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java
index 7d7631b..7d4411a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java
@@ -112,7 +112,7 @@ public void onMatch(RelOptRuleCall call) {
// arguments then we can use a more efficient form.
if ((nonDistinctCount == 0) && (argListSets.size() == 1)) {
for (Integer arg : argListSets.iterator().next()) {
- Set colOrigs = RelMetadataQuery.getColumnOrigins(aggregate, arg);
+ Set colOrigs = RelMetadataQuery.instance().getColumnOrigins(aggregate, arg);
if (null != colOrigs) {
for (RelColumnOrigin colOrig : colOrigs) {
RelOptHiveTable hiveTbl = (RelOptHiveTable)colOrig.getOriginTable();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java
index 07928d8..994af97 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java
@@ -80,7 +80,7 @@ public HiveJoinPushTransitivePredicatesRule(Class extends Join> clazz,
public void onMatch(RelOptRuleCall call) {
Join join = call.rel(0);
- RelOptPredicateList preds = RelMetadataQuery.getPulledUpPredicates(join);
+ RelOptPredicateList preds = RelMetadataQuery.instance().getPulledUpPredicates(join);
HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
assert registry != null;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java
index 50e139b..d736f21 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java
@@ -128,7 +128,7 @@ public FilterReduceExpressionsRule(Class extends Filter> filterClass,
RexNode newConditionExp;
boolean reduced;
final RelOptPredicateList predicates =
- RelMetadataQuery.getPulledUpPredicates(filter.getInput());
+ RelMetadataQuery.instance().getPulledUpPredicates(filter.getInput());
if (reduceExpressions(filter, expList, predicates)) {
assert expList.size() == 1;
newConditionExp = expList.get(0);
@@ -242,7 +242,7 @@ public boolean matches(RelOptRuleCall call) {
registry.registerVisited(this, project);
}
final RelOptPredicateList predicates =
- RelMetadataQuery.getPulledUpPredicates(project.getInput());
+ RelMetadataQuery.instance().getPulledUpPredicates(project.getInput());
final List expList =
Lists.newArrayList(project.getProjects());
if (reduceExpressions(project, expList, predicates)) {
@@ -273,10 +273,11 @@ public JoinReduceExpressionsRule(Class extends HiveJoin> joinClass,
final HiveJoin join = call.rel(0);
final List expList = Lists.newArrayList(join.getCondition());
final int fieldCount = join.getLeft().getRowType().getFieldCount();
+ RelMetadataQuery mq = RelMetadataQuery.instance();
final RelOptPredicateList leftPredicates =
- RelMetadataQuery.getPulledUpPredicates(join.getLeft());
+ mq.getPulledUpPredicates(join.getLeft());
final RelOptPredicateList rightPredicates =
- RelMetadataQuery.getPulledUpPredicates(join.getRight());
+ mq.getPulledUpPredicates(join.getRight());
final RelOptPredicateList predicates =
leftPredicates.union(rightPredicates.shift(fieldCount));
if (!reduceExpressions(join, expList, predicates)) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index 9f7f8a4..18145ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -23,17 +23,35 @@
import java.util.List;
import java.util.Set;
+import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexCorrelVariable;
+import org.apache.calcite.rex.RexFieldAccess;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexPermuteInputsShuttle;
+import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitor;
import org.apache.calcite.sql.validate.SqlValidator;
+import org.apache.calcite.sql2rel.CorrelationReferenceFinder;
import org.apache.calcite.sql2rel.RelFieldTrimmer;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Stacks;
+import org.apache.calcite.util.Util;
import org.apache.calcite.util.mapping.IntPair;
import org.apache.calcite.util.mapping.Mapping;
import org.apache.calcite.util.mapping.MappingType;
@@ -41,16 +59,20 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
public class HiveRelFieldTrimmer extends RelFieldTrimmer {
protected static final Log LOG = LogFactory.getLog(HiveRelFieldTrimmer.class);
+ private RelBuilder relBuilder;
public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder) {
super(validator, relBuilder);
+ this.relBuilder = relBuilder;
}
/**
@@ -153,4 +175,187 @@ public TrimResult trimFields(
return new TrimResult(newJoin, mapping);
}
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.core.Sort}.
+ */
+ public TrimResult trimFields(
+ HiveSortLimit sort,
+ ImmutableBitSet fieldsUsed,
+ Set extraFields) {
+ final RelDataType rowType = sort.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ final RelCollation collation = sort.getCollation();
+ final RelNode input = sort.getInput();
+ RelOptCluster cluster = sort.getCluster();
+
+ // We use the fields used by the consumer, plus any fields used as sort
+ // keys.
+ final ImmutableBitSet.Builder inputFieldsUsed =
+ ImmutableBitSet.builder(fieldsUsed);
+ for (RelFieldCollation field : collation.getFieldCollations()) {
+ inputFieldsUsed.set(field.getFieldIndex());
+ }
+
+ // Create input with trimmed columns.
+ final Set inputExtraFields = Collections.emptySet();
+ TrimResult trimResult =
+ trimChild(sort, input, inputFieldsUsed.build(), inputExtraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing we can do.
+ if (newInput == input
+ && inputMapping.isIdentity()
+ && fieldsUsed.cardinality() == fieldCount) {
+ return result(sort, Mappings.createIdentity(fieldCount));
+ }
+
+ relBuilder.push(newInput);
+ final int offset =
+ sort.offset == null ? 0 : RexLiteral.intValue(sort.offset);
+ final int fetch =
+ sort.fetch == null ? -1 : RexLiteral.intValue(sort.fetch);
+ final ImmutableList fields =
+ relBuilder.fields(RexUtil.apply(inputMapping, collation));
+
+ // The result has the same mapping as the input gave us. Sometimes we
+ // return fields that the consumer didn't ask for, because the filter
+ // needs them for its condition.
+ // TODO: Calcite will return empty LogicalValues when offset == 0 && fetch == 0.
+ // However, Hive ASTConverter can not deal with LogicalValues.
+ sortLimit(cluster, relBuilder, offset, fetch, fields);
+ return result(relBuilder.build(), inputMapping);
+ }
+
+ private List projects(RelDataType inputRowType, RelOptCluster cluster) {
+ final List exprList = new ArrayList<>();
+ for (RelDataTypeField field : inputRowType.getFieldList()) {
+ final RexBuilder rexBuilder = cluster.getRexBuilder();
+ exprList.add(rexBuilder.makeInputRef(field.getType(), field.getIndex()));
+ }
+ return exprList;
+ }
+
+ private static RelFieldCollation collation(RexNode node,
+ RelFieldCollation.Direction direction,
+ RelFieldCollation.NullDirection nullDirection, List extraNodes) {
+ switch (node.getKind()) {
+ case INPUT_REF:
+ return new RelFieldCollation(((RexInputRef) node).getIndex(), direction,
+ Util.first(nullDirection, direction.defaultNullDirection()));
+ case DESCENDING:
+ return collation(((RexCall) node).getOperands().get(0),
+ RelFieldCollation.Direction.DESCENDING,
+ nullDirection, extraNodes);
+ case NULLS_FIRST:
+ return collation(((RexCall) node).getOperands().get(0), direction,
+ RelFieldCollation.NullDirection.FIRST, extraNodes);
+ case NULLS_LAST:
+ return collation(((RexCall) node).getOperands().get(0), direction,
+ RelFieldCollation.NullDirection.LAST, extraNodes);
+ default:
+ final int fieldIndex = extraNodes.size();
+ extraNodes.add(node);
+ return new RelFieldCollation(fieldIndex, direction,
+ Util.first(nullDirection, direction.defaultNullDirection()));
+ }
+ }
+
+ private void sortLimit(RelOptCluster cluster, RelBuilder relBuilder, int offset, int fetch,
+ Iterable extends RexNode> nodes) {
+ final List fieldCollations = new ArrayList<>();
+ final RelDataType inputRowType = relBuilder.peek().getRowType();
+ final List extraNodes = projects(inputRowType, cluster);
+ final List originalExtraNodes = ImmutableList.copyOf(extraNodes);
+ for (RexNode node : nodes) {
+ fieldCollations.add(
+ collation(node, RelFieldCollation.Direction.ASCENDING, null,
+ extraNodes));
+ }
+ final RexNode offsetNode = offset <= 0 ? null : relBuilder.literal(offset);
+ final RexNode fetchNode = fetch < 0 ? null : relBuilder.literal(fetch);
+ if (offsetNode == null && fetchNode == null && fieldCollations.isEmpty()) {
+ return; // sort is trivial
+ }
+
+ final boolean addedFields = extraNodes.size() > originalExtraNodes.size();
+ if (fieldCollations.isEmpty()) {
+ assert !addedFields;
+ RelNode top = relBuilder.peek();
+ if (top instanceof Sort) {
+ final Sort sort2 = (Sort) top;
+ if (sort2.offset == null && sort2.fetch == null) {
+ relBuilder.build();
+ relBuilder.push(sort2.getInput());
+ final RelNode sort =
+ HiveSortLimit.create(relBuilder.build(), sort2.collation,
+ offsetNode, fetchNode);
+ relBuilder.push(sort);
+ return;
+ }
+ }
+ if (top instanceof Project) {
+ final Project project = (Project) top;
+ if (project.getInput() instanceof Sort) {
+ final Sort sort2 = (Sort) project.getInput();
+ if (sort2.offset == null && sort2.fetch == null) {
+ relBuilder.build();
+ relBuilder.push(sort2.getInput());
+ final RelNode sort =
+ HiveSortLimit.create(relBuilder.build(), sort2.collation,
+ offsetNode, fetchNode);
+ relBuilder.push(sort);
+ relBuilder.project(project.getProjects());
+ return;
+ }
+ }
+ }
+ }
+ if (addedFields) {
+ relBuilder.project(extraNodes);
+ }
+ final RelNode sort =
+ HiveSortLimit.create(relBuilder.build(), RelCollations.of(fieldCollations),
+ offsetNode, fetchNode);
+ relBuilder.push(sort);
+ if (addedFields) {
+ relBuilder.project(originalExtraNodes);
+ }
+ return;
+ }
+
+ private TrimResult result(RelNode r, final Mapping mapping) {
+ final RexBuilder rexBuilder = relBuilder.getRexBuilder();
+ for (final CorrelationId correlation : r.getVariablesSet()) {
+ r = r.accept(
+ new CorrelationReferenceFinder() {
+ @Override
+ protected RexNode handle(RexFieldAccess fieldAccess) {
+ final RexCorrelVariable v =
+ (RexCorrelVariable) fieldAccess.getReferenceExpr();
+ if (v.id.equals(correlation)
+ && v.getType().getFieldCount() == mapping.getSourceCount()) {
+ final int old = fieldAccess.getField().getIndex();
+ final int new_ = mapping.getTarget(old);
+ final RelDataTypeFactory.FieldInfoBuilder typeBuilder =
+ relBuilder.getTypeFactory().builder();
+ for (int target : Util.range(mapping.getTargetCount())) {
+ typeBuilder.add(
+ v.getType().getFieldList().get(mapping.getSource(target)));
+ }
+ final RexNode newV =
+ rexBuilder.makeCorrel(typeBuilder.build(), v.id);
+ if (old != new_) {
+ return rexBuilder.makeFieldAccess(newV, new_);
+ }
+ }
+ return fieldAccess;
+ }
+
+ });
+ }
+ return new TrimResult(r, mapping);
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java
index 0af60e8..2f2297d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java
@@ -96,7 +96,7 @@ public boolean matches(RelOptRuleCall call) {
// Finally, if we do not reduce the input size, we bail out
final int offset = sortLimit.offset == null ? 0 : RexLiteral.intValue(sortLimit.offset);
if (offset + RexLiteral.intValue(sortLimit.fetch)
- >= RelMetadataQuery.getRowCount(reducedInput)) {
+ >= RelMetadataQuery.instance().getRowCount(reducedInput)) {
return false;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java
index 618c717..573b75a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java
@@ -59,7 +59,7 @@ public boolean matches(RelOptRuleCall call) {
// Finally, if we do not reduce the size input enough, we bail out
int limit = RexLiteral.intValue(sortLimit.fetch);
- Double rowCount = RelMetadataQuery.getRowCount(sortLimit.getInput());
+ Double rowCount = RelMetadataQuery.instance().getRowCount(sortLimit.getInput());
if (rowCount != null && limit <= reductionProportion * rowCount &&
rowCount - limit >= reductionTuples) {
return false;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java
index 0ec8bf1..04b94c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java
@@ -80,7 +80,7 @@ public void onMatch(RelOptRuleCall call) {
final int offset = sort.offset == null ? 0 : RexLiteral.intValue(sort.offset);
for (RelNode input : union.getInputs()) {
// If we do not reduce the input size, we bail out
- if (RexLiteral.intValue(sort.fetch) + offset < RelMetadataQuery.getRowCount(input)) {
+ if (RexLiteral.intValue(sort.fetch) + offset < RelMetadataQuery.instance().getRowCount(input)) {
finishPushSortPastUnion = false;
// Here we do some query rewrite. We first get the new fetchRN, which is
// a sum of offset and fetch.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
index c04060f..6f26d7d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
@@ -47,7 +47,7 @@
protected FilterSelectivityEstimator(RelNode childRel) {
super(true);
this.childRel = childRel;
- this.childCardinality = RelMetadataQuery.getRowCount(childRel);
+ this.childCardinality = RelMetadataQuery.instance().getRowCount(childRel);
}
public Double estimateSelectivity(RexNode predicate) {
@@ -251,10 +251,10 @@ private Double getMaxNDV(RexCall call) {
double tmpNDV;
double maxNDV = 1.0;
InputReferencedVisitor irv;
-
+ RelMetadataQuery mq = RelMetadataQuery.instance();
for (RexNode op : call.getOperands()) {
if (op instanceof RexInputRef) {
- tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel,
+ tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, mq,
((RexInputRef) op).getIndex());
if (tmpNDV > maxNDV)
maxNDV = tmpNDV;
@@ -262,7 +262,8 @@ private Double getMaxNDV(RexCall call) {
irv = new InputReferencedVisitor();
irv.apply(op);
for (Integer childProjIndx : irv.inputPosReferenced) {
- tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, childProjIndx);
+ tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel,
+ mq, childProjIndx);
if (tmpNDV > maxNDV)
maxNDV = tmpNDV;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java
index 84fa518..18fe650 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java
@@ -24,6 +24,7 @@
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdCollation;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.util.BuiltInMethod;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
@@ -46,7 +47,7 @@ private HiveRelMdCollation() {}
//~ Methods ----------------------------------------------------------------
- public ImmutableList collations(HiveAggregate aggregate) {
+ public ImmutableList collations(HiveAggregate aggregate, RelMetadataQuery mq) {
// Compute collations
ImmutableList.Builder collationListBuilder =
new ImmutableList.Builder();
@@ -60,7 +61,7 @@ private HiveRelMdCollation() {}
new HiveRelCollation(collationListBuilder.build())));
}
- public ImmutableList collations(HiveJoin join) {
+ public ImmutableList collations(HiveJoin join, RelMetadataQuery mq) {
return join.getCollation();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
index 1220401..e6384a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
@@ -58,19 +58,19 @@ private HiveRelMdDistinctRowCount() {
// Catch-all rule when none of the others apply.
@Override
- public Double getDistinctRowCount(RelNode rel, ImmutableBitSet groupKey,
+ public Double getDistinctRowCount(RelNode rel, RelMetadataQuery mq, ImmutableBitSet groupKey,
RexNode predicate) {
if (rel instanceof HiveTableScan) {
- return getDistinctRowCount((HiveTableScan) rel, groupKey, predicate);
+ return getDistinctRowCount((HiveTableScan) rel, mq, groupKey, predicate);
}
/*
* For now use Calcite' default formulas for propagating NDVs up the Query
* Tree.
*/
- return super.getDistinctRowCount(rel, groupKey, predicate);
+ return super.getDistinctRowCount(rel, mq, groupKey, predicate);
}
- private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey,
+ private Double getDistinctRowCount(HiveTableScan htRel, RelMetadataQuery mq, ImmutableBitSet groupKey,
RexNode predicate) {
List projIndxLst = HiveCalciteUtil
.translateBitSetToProjIndx(groupKey);
@@ -83,39 +83,39 @@ private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey
return Math.min(noDistinctRows, htRel.getRows());
}
- public static Double getDistinctRowCount(RelNode r, int indx) {
+ public static Double getDistinctRowCount(RelNode r, RelMetadataQuery mq, int indx) {
ImmutableBitSet bitSetOfRqdProj = ImmutableBitSet.of(indx);
- return RelMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r
+ return mq.getDistinctRowCount(r, bitSetOfRqdProj, r
.getCluster().getRexBuilder().makeLiteral(true));
}
@Override
- public Double getDistinctRowCount(Join rel, ImmutableBitSet groupKey,
+ public Double getDistinctRowCount(Join rel, RelMetadataQuery mq, ImmutableBitSet groupKey,
RexNode predicate) {
if (rel instanceof HiveJoin) {
HiveJoin hjRel = (HiveJoin) rel;
//TODO: Improve this
if (hjRel.isLeftSemiJoin()) {
- return RelMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey,
+ return mq.getDistinctRowCount(hjRel.getLeft(), groupKey,
rel.getCluster().getRexBuilder().makeLiteral(true));
} else {
- return RelMdUtil.getJoinDistinctRowCount(rel, rel.getJoinType(),
+ return RelMdUtil.getJoinDistinctRowCount(mq, rel, rel.getJoinType(),
groupKey, predicate, true);
}
}
- return RelMetadataQuery.getDistinctRowCount(rel, groupKey, predicate);
+ return mq.getDistinctRowCount(rel, groupKey, predicate);
}
/*
* Favor Broad Plans over Deep Plans.
*/
- public RelOptCost getCumulativeCost(HiveJoin rel) {
- RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel);
+ public RelOptCost getCumulativeCost(HiveJoin rel, RelMetadataQuery mq) {
+ RelOptCost cost = mq.getNonCumulativeCost(rel);
List inputs = rel.getInputs();
RelOptCost maxICost = HiveCost.ZERO;
for (RelNode input : inputs) {
- RelOptCost iCost = RelMetadataQuery.getCumulativeCost(input);
+ RelOptCost iCost = mq.getCumulativeCost(input);
if (maxICost.isLt(iCost)) {
maxICost = iCost;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java
index b83f240..62d3ead 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java
@@ -22,6 +22,7 @@
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdDistribution;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.util.BuiltInMethod;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
@@ -35,8 +36,7 @@
ChainedRelMetadataProvider.of(
ImmutableList.of(
ReflectiveRelMetadataProvider.reflectiveSource(
- BuiltInMethod.DISTRIBUTION.method, new HiveRelMdDistribution()),
- RelMdDistribution.SOURCE));
+ BuiltInMethod.DISTRIBUTION.method, new HiveRelMdDistribution())));
//~ Constructors -----------------------------------------------------------
@@ -44,12 +44,12 @@ private HiveRelMdDistribution() {}
//~ Methods ----------------------------------------------------------------
- public RelDistribution distribution(HiveAggregate aggregate) {
+ public RelDistribution distribution(HiveAggregate aggregate, RelMetadataQuery mq) {
return new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED,
aggregate.getGroupSet().asList());
}
- public RelDistribution distribution(HiveJoin join) {
+ public RelDistribution distribution(HiveJoin join, RelMetadataQuery mq) {
return join.getDistribution();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java
index bea5943..923738a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java
@@ -47,24 +47,24 @@ private HiveRelMdMemory() {}
//~ Methods ----------------------------------------------------------------
- public Double memory(HiveTableScan tableScan) {
+ public Double memory(HiveTableScan tableScan, RelMetadataQuery mq) {
return 0.0d;
}
- public Double memory(HiveAggregate aggregate) {
- final Double avgRowSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput());
- final Double rowCount = RelMetadataQuery.getRowCount(aggregate.getInput());
+ public Double memory(HiveAggregate aggregate, RelMetadataQuery mq) {
+ final Double avgRowSize = mq.getAverageRowSize(aggregate.getInput());
+ final Double rowCount = mq.getRowCount(aggregate.getInput());
if (avgRowSize == null || rowCount == null) {
return null;
}
return avgRowSize * rowCount;
}
- public Double memory(HiveFilter filter) {
+ public Double memory(HiveFilter filter, RelMetadataQuery mq) {
return 0.0;
}
- public Double memory(HiveJoin join) {
+ public Double memory(HiveJoin join, RelMetadataQuery mq) {
return join.getMemory();
}
@@ -72,15 +72,15 @@ public Double cumulativeMemoryWithinPhaseSplit(HiveJoin join) {
return join.getCumulativeMemoryWithinPhaseSplit();
}
- public Double memory(HiveProject project) {
+ public Double memory(HiveProject project, RelMetadataQuery mq) {
return 0.0;
}
- public Double memory(HiveSortLimit sort) {
+ public Double memory(HiveSortLimit sort, RelMetadataQuery mq) {
if (sort.getCollation() != RelCollations.EMPTY) {
// It sorts
- final Double avgRowSize = RelMetadataQuery.getAverageRowSize(sort.getInput());
- final Double rowCount = RelMetadataQuery.getRowCount(sort.getInput());
+ final Double avgRowSize = mq.getAverageRowSize(sort.getInput());
+ final Double rowCount = mq.getRowCount(sort.getInput());
if (avgRowSize == null || rowCount == null) {
return null;
}
@@ -90,7 +90,7 @@ public Double memory(HiveSortLimit sort) {
return 0.0;
}
- public Double memory(HiveUnion union) {
+ public Double memory(HiveUnion union, RelMetadataQuery mq) {
return 0.0;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java
index 2f51d3b..1c7842c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java
@@ -48,21 +48,21 @@ public RelMetadataProvider getMetadataProvider() {
//~ Methods ----------------------------------------------------------------
- public Boolean isPhaseTransition(HiveJoin join) {
+ public Boolean isPhaseTransition(HiveJoin join, RelMetadataQuery mq) {
return join.isPhaseTransition();
}
- public Boolean isPhaseTransition(HiveSortLimit sort) {
+ public Boolean isPhaseTransition(HiveSortLimit sort, RelMetadataQuery mq) {
// As Exchange operator is introduced later on, we make a
// sort operator create a new stage for the moment
return true;
}
- public Integer splitCount(HiveJoin join) {
+ public Integer splitCount(HiveJoin join, RelMetadataQuery mq) {
return join.getSplitCount();
}
- public Integer splitCount(HiveTableScan scan) {
+ public Integer splitCount(HiveTableScan scan, RelMetadataQuery mq) {
Integer splitCount;
RelOptHiveTable table = (RelOptHiveTable) scan.getTable();
@@ -70,7 +70,7 @@ public Integer splitCount(HiveTableScan scan) {
if (bucketCols != null && !bucketCols.isEmpty()) {
splitCount = table.getHiveTableMD().getNumBuckets();
} else {
- splitCount = splitCountRepartition(scan);
+ splitCount = splitCountRepartition(scan, mq);
if (splitCount == null) {
throw new RuntimeException("Could not get split count for table: "
+ scan.getTable().getQualifiedName());
@@ -80,8 +80,8 @@ public Integer splitCount(HiveTableScan scan) {
return splitCount;
}
- public Integer splitCount(RelNode rel) {
- Boolean newPhase = RelMetadataQuery.isPhaseTransition(rel);
+ public Integer splitCount(RelNode rel, RelMetadataQuery mq) {
+ Boolean newPhase = mq.isPhaseTransition(rel);
if (newPhase == null) {
return null;
@@ -89,21 +89,21 @@ public Integer splitCount(RelNode rel) {
if (newPhase) {
// We repartition: new number of splits
- return splitCountRepartition(rel);
+ return splitCountRepartition(rel, mq);
}
// We do not repartition: take number of splits from children
Integer splitCount = 0;
for (RelNode input : rel.getInputs()) {
- splitCount += RelMetadataQuery.splitCount(input);
+ splitCount += mq.splitCount(input);
}
return splitCount;
}
- public Integer splitCountRepartition(RelNode rel) {
+ public Integer splitCountRepartition(RelNode rel, RelMetadataQuery mq) {
// We repartition: new number of splits
- final Double averageRowSize = RelMetadataQuery.getAverageRowSize(rel);
- final Double rowCount = RelMetadataQuery.getRowCount(rel);
+ final Double averageRowSize = mq.getAverageRowSize(rel);
+ final Double rowCount = mq.getRowCount(rel);
if (averageRowSize == null || rowCount == null) {
return null;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
index b7244fd..36d0b45 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
@@ -99,10 +99,10 @@
*
*/
@Override
- public RelOptPredicateList getPredicates(Project project) {
+ public RelOptPredicateList getPredicates(Project project, RelMetadataQuery mq) {
RelNode child = project.getInput();
final RexBuilder rexBuilder = project.getCluster().getRexBuilder();
- RelOptPredicateList childInfo = RelMetadataQuery.getPulledUpPredicates(child);
+ RelOptPredicateList childInfo = mq.getPulledUpPredicates(child);
List projectPullUpPredicates = new ArrayList();
HashMultimap inpIndxToOutIndxMap = HashMultimap.create();
@@ -151,13 +151,13 @@ public RelOptPredicateList getPredicates(Project project) {
/** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */
@Override
- public RelOptPredicateList getPredicates(Join join) {
+ public RelOptPredicateList getPredicates(Join join, RelMetadataQuery mq) {
RexBuilder rB = join.getCluster().getRexBuilder();
RelNode left = join.getInput(0);
RelNode right = join.getInput(1);
- RelOptPredicateList leftInfo = RelMetadataQuery.getPulledUpPredicates(left);
- RelOptPredicateList rightInfo = RelMetadataQuery.getPulledUpPredicates(right);
+ RelOptPredicateList leftInfo = mq.getPulledUpPredicates(left);
+ RelOptPredicateList rightInfo = mq.getPulledUpPredicates(right);
HiveJoinConditionBasedPredicateInference jI = new HiveJoinConditionBasedPredicateInference(join,
RexUtil.composeConjunction(rB, leftInfo.pulledUpPredicates, false),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
index caf8978..7bba80b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
@@ -62,8 +62,8 @@ protected HiveRelMdRowCount() {
super();
}
- public Double getRowCount(Join join) {
- PKFKRelationInfo pkfk = analyzeJoinForPKFK(join);
+ public Double getRowCount(Join join, RelMetadataQuery mq) {
+ PKFKRelationInfo pkfk = analyzeJoinForPKFK(join, mq);
if (pkfk != null) {
double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
selectivity = Math.min(1.0, selectivity);
@@ -76,8 +76,8 @@ public Double getRowCount(Join join) {
}
@Override
- public Double getRowCount(SemiJoin rel) {
- PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel);
+ public Double getRowCount(SemiJoin rel, RelMetadataQuery mq) {
+ PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, mq);
if (pkfk != null) {
double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
selectivity = Math.min(1.0, selectivity);
@@ -86,12 +86,12 @@ public Double getRowCount(SemiJoin rel) {
}
return pkfk.fkInfo.rowCount * selectivity;
}
- return super.getRowCount(rel);
+ return super.getRowCount(rel, mq);
}
@Override
- public Double getRowCount(Sort rel) {
- final Double rowCount = RelMetadataQuery.getRowCount(rel.getInput());
+ public Double getRowCount(Sort rel, RelMetadataQuery mq) {
+ final Double rowCount = mq.getRowCount(rel.getInput());
if (rowCount != null && rel.fetch != null) {
final int offset = rel.offset == null ? 0 : RexLiteral.intValue(rel.offset);
final int limit = RexLiteral.intValue(rel.fetch);
@@ -176,7 +176,7 @@ public String toString() {
* or Fact roj Dim b) The selectivity factor applied on the Fact Table should
* be 1.
*/
- public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) {
+ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery mq) {
RelNode left = joinRel.getInputs().get(0);
RelNode right = joinRel.getInputs().get(1);
@@ -229,16 +229,16 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) {
*/
boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
.getJoinType() == JoinRelType.RIGHT)
- && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left);
+ && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left, mq);
boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
- .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right);
+ .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right, mq);
if (!leftIsKey && !rightIsKey) {
return null;
}
- double leftRowCount = RelMetadataQuery.getRowCount(left);
- double rightRowCount = RelMetadataQuery.getRowCount(right);
+ double leftRowCount = mq.getRowCount(left);
+ double rightRowCount = mq.getRowCount(right);
if (leftIsKey && rightIsKey) {
if (rightRowCount < leftRowCount) {
@@ -251,10 +251,10 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) {
boolean isPKSideSimpleTree = pkSide != -1 ?
IsSimpleTreeOnJoinKey.check(
pkSide == 0 ? left : right,
- pkSide == 0 ? leftColIdx : rightColIdx) : false;
+ pkSide == 0 ? leftColIdx : rightColIdx, mq) : false;
- double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(left, lBitSet, leftPred) : -1;
- double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(right, rBitSet, rightPred) : -1;
+ double leftNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(left, lBitSet, leftPred) : -1;
+ double rightNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(right, rBitSet, rightPred) : -1;
/*
* If the ndv of the PK - FK side don't match, and the PK side is a filter
@@ -285,7 +285,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) {
if (pkSide == 0) {
FKSideInfo fkInfo = new FKSideInfo(rightRowCount,
rightNDV);
- double pkSelectivity = pkSelectivity(joinRel, true, left, leftRowCount);
+ double pkSelectivity = pkSelectivity(joinRel, mq, true, left, leftRowCount);
PKSideInfo pkInfo = new PKSideInfo(leftRowCount,
leftNDV,
joinRel.getJoinType().generatesNullsOnRight() ? 1.0 :
@@ -297,7 +297,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) {
if (pkSide == 1) {
FKSideInfo fkInfo = new FKSideInfo(leftRowCount,
leftNDV);
- double pkSelectivity = pkSelectivity(joinRel, false, right, rightRowCount);
+ double pkSelectivity = pkSelectivity(joinRel, mq, false, right, rightRowCount);
PKSideInfo pkInfo = new PKSideInfo(rightRowCount,
rightNDV,
joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 :
@@ -309,7 +309,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) {
return null;
}
- private static double pkSelectivity(Join joinRel, boolean leftChild,
+ private static double pkSelectivity(Join joinRel, RelMetadataQuery mq, boolean leftChild,
RelNode child,
double childRowCount) {
if ((leftChild && joinRel.getJoinType().generatesNullsOnRight()) ||
@@ -318,7 +318,7 @@ private static double pkSelectivity(Join joinRel, boolean leftChild,
} else {
HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true);
if (tScan != null) {
- double tRowCount = RelMetadataQuery.getRowCount(tScan);
+ double tRowCount = mq.getRowCount(tScan);
return childRowCount / tRowCount;
} else {
return 1.0;
@@ -326,9 +326,9 @@ private static double pkSelectivity(Join joinRel, boolean leftChild,
}
}
- private static boolean isKey(ImmutableBitSet c, RelNode rel) {
+ private static boolean isKey(ImmutableBitSet c, RelNode rel, RelMetadataQuery mq) {
boolean isKey = false;
- Set keys = RelMetadataQuery.getUniqueKeys(rel);
+ Set keys = mq.getUniqueKeys(rel);
if (keys != null) {
for (ImmutableBitSet key : keys) {
if (key.equals(c)) {
@@ -402,16 +402,18 @@ private static boolean isKey(ImmutableBitSet c, RelNode rel) {
int joinKey;
boolean simpleTree;
+ RelMetadataQuery mq;
- static boolean check(RelNode r, int joinKey) {
- IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey);
+ static boolean check(RelNode r, int joinKey, RelMetadataQuery mq) {
+ IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey, mq);
v.go(r);
return v.simpleTree;
}
- IsSimpleTreeOnJoinKey(int joinKey) {
+ IsSimpleTreeOnJoinKey(int joinKey, RelMetadataQuery mq) {
super();
this.joinKey = joinKey;
+ this.mq = mq;
simpleTree = true;
}
@@ -427,7 +429,7 @@ public void visit(RelNode node, int ordinal, RelNode parent) {
} else if (node instanceof Project) {
simpleTree = isSimple((Project) node);
} else if (node instanceof Filter) {
- simpleTree = isSimple((Filter) node);
+ simpleTree = isSimple((Filter) node, mq);
} else {
simpleTree = false;
}
@@ -446,9 +448,9 @@ private boolean isSimple(Project project) {
return false;
}
- private boolean isSimple(Filter filter) {
+ private boolean isSimple(Filter filter, RelMetadataQuery mq) {
ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition());
- return isKey(condBits, filter);
+ return isKey(condBits, filter, mq);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
index a0eb83d..0d03ebb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
@@ -49,7 +49,7 @@ protected HiveRelMdSelectivity() {
super();
}
- public Double getSelectivity(HiveTableScan t, RexNode predicate) {
+ public Double getSelectivity(HiveTableScan t, RelMetadataQuery mq, RexNode predicate) {
if (predicate != null) {
FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t);
return filterSelEstmator.estimateSelectivity(predicate);
@@ -58,15 +58,15 @@ public Double getSelectivity(HiveTableScan t, RexNode predicate) {
return 1.0;
}
- public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException {
+ public Double getSelectivity(HiveJoin j, RelMetadataQuery mq, RexNode predicate) throws CalciteSemanticException {
if (j.getJoinType().equals(JoinRelType.INNER)) {
- return computeInnerJoinSelectivity(j, predicate);
+ return computeInnerJoinSelectivity(j, mq, predicate);
} else if (j.getJoinType().equals(JoinRelType.LEFT) ||
j.getJoinType().equals(JoinRelType.RIGHT)) {
- double left = RelMetadataQuery.getRowCount(j.getLeft());
- double right = RelMetadataQuery.getRowCount(j.getRight());
+ double left = mq.getRowCount(j.getLeft());
+ double right = mq.getRowCount(j.getRight());
double product = left * right;
- double innerJoinSelectivity = computeInnerJoinSelectivity(j, predicate);
+ double innerJoinSelectivity = computeInnerJoinSelectivity(j, mq, predicate);
if (j.getJoinType().equals(JoinRelType.LEFT)) {
return Math.max(innerJoinSelectivity, left/product);
}
@@ -75,7 +75,7 @@ public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemant
return 1.0;
}
- private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException {
+ private Double computeInnerJoinSelectivity(HiveJoin j, RelMetadataQuery mq, RexNode predicate) throws CalciteSemanticException {
double ndvCrossProduct = 1;
Pair predInfo =
getCombinedPredicateForJoin(j, predicate);
@@ -97,14 +97,14 @@ private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws
// Join which are part of join keys
for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
colStatMapBuilder.put(ljk,
- HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), ljk));
+ HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), mq, ljk));
}
// 2. Update Col Stats Map with col stats for columns from right side of
// Join which are part of join keys
for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
colStatMapBuilder.put(rjk + rightOffSet,
- HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), rjk));
+ HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), mq, rjk));
}
colStatMap = colStatMapBuilder.build();
@@ -116,11 +116,11 @@ private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws
ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
if (j.isLeftSemiJoin())
- ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()),
+ ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()),
ndvCrossProduct);
else
- ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft())
- * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct);
+ ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft())
+ * mq.getRowCount(j.getRight()), ndvCrossProduct);
}
// 4. Join Selectivity = 1/NDV
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
index 3224039..31adb41 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
@@ -50,7 +50,7 @@ private HiveRelMdSize() {}
//~ Methods ----------------------------------------------------------------
- public List averageColumnSizes(HiveTableScan scan) {
+ public List averageColumnSizes(HiveTableScan scan, RelMetadataQuery mq) {
List neededcolsLst = scan.getNeededColIndxsFrmReloptHT();
List columnStatistics = ((RelOptHiveTable) scan.getTable())
.getColStat(neededcolsLst, true);
@@ -77,14 +77,14 @@ private HiveRelMdSize() {}
return list.build();
}
- public List averageColumnSizes(HiveJoin rel) {
+ public List averageColumnSizes(HiveJoin rel, RelMetadataQuery mq) {
final RelNode left = rel.getLeft();
final RelNode right = rel.getRight();
final List lefts =
- RelMetadataQuery.getAverageColumnSizes(left);
+ mq.getAverageColumnSizes(left);
List rights = null;
if (!rel.isLeftSemiJoin()) {
- rights = RelMetadataQuery.getAverageColumnSizes(right);
+ rights = mq.getAverageColumnSizes(right);
}
if (lefts == null && rights == null) {
return null;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
index 7c22c33..0718150 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
@@ -34,6 +34,7 @@
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdUniqueKeys;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.util.BitSets;
@@ -59,15 +60,12 @@
* Inferring Uniqueness for all columns is very expensive right now. The flip
* side of doing this is, it only works post Field Trimming.
*/
- public Set getUniqueKeys(Project rel, boolean ignoreNulls) {
+ public Set getUniqueKeys(Project rel, RelMetadataQuery mq, boolean ignoreNulls) {
HiveTableScan tScan = getTableScan(rel.getInput(), false);
- if ( tScan == null ) {
- Function fn = RelMdUniqueKeys.SOURCE.apply(
- rel.getClass(), BuiltInMetadata.UniqueKeys.class);
- return ((BuiltInMetadata.UniqueKeys) fn.apply(rel))
- .getUniqueKeys(ignoreNulls);
+ if (tScan == null) {
+ return mq.getUniqueKeys(rel, ignoreNulls);
}
Map posMap = new HashMap();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index a0e374c..b841315 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -430,8 +430,8 @@ OpAttr visit(HiveSortLimit sortRel) throws SemanticException {
// 1.a. Extract order for each column from collation
// Generate sortCols and order
- ImmutableBitSet.Builder sortColsPosBuilder = new ImmutableBitSet.Builder();
- ImmutableBitSet.Builder sortOutputColsPosBuilder = new ImmutableBitSet.Builder();
+ ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder();
+ ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder();
Map obRefToCallMap = sortRel.getInputRefToCallMap();
List sortCols = new ArrayList();
StringBuilder order = new StringBuilder();
diff --git a/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out b/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out
index d4bc93c..ba83ace 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out
@@ -593,7 +593,7 @@ PREHOOK: Input: default@dept
PREHOOK: Input: default@emp
PREHOOK: Input: default@project
PREHOOK: Output: default@tgt
-{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]}
+{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]}
PREHOOK: query: drop table if exists dest_l2
PREHOOK: type: DROPTABLE
PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile
diff --git a/ql/src/test/results/clientpositive/lineage2.q.out b/ql/src/test/results/clientpositive/lineage2.q.out
index ec64c10..be0b564 100644
--- a/ql/src/test/results/clientpositive/lineage2.q.out
+++ b/ql/src/test/results/clientpositive/lineage2.q.out
@@ -593,7 +593,7 @@ PREHOOK: Input: default@dept
PREHOOK: Input: default@emp
PREHOOK: Input: default@project
PREHOOK: Output: default@tgt
-{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = e.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"e.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]}
+{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = e.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"e.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]}
PREHOOK: query: drop table if exists dest_l2
PREHOOK: type: DROPTABLE
PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 120894d..b39d2fc 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -1257,10 +1257,10 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_14]
- Select Operator [SEL_13] (rows=8 width=101)
+ Select Operator [SEL_13] (rows=12 width=101)
Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_12] (rows=8 width=101)
- predicate:(((_col1 + _col4) = 2) and ((_col1 > 0) or (_col6 >= 0)) and ((_col4 + 1) = 2) and ((_col1 > 0) or (_col6 >= 0)))
+ Filter Operator [FIL_12] (rows=12 width=101)
+ predicate:(((_col1 + _col4) = 2) and ((_col1 > 0) or (_col6 >= 0)) and ((_col4 + 1) = 2))
Merge Join Operator [MERGEJOIN_19] (rows=72 width=101)
Conds:RS_8._col0=RS_9._col0(Right Outer),RS_8._col0=RS_10._col0(Right Outer),Output:["_col1","_col2","_col3","_col4","_col6"]
<-Map 1 [SIMPLE_EDGE]