diff --git a/pom.xml b/pom.xml index 3c06188..2bda92f 100644 --- a/pom.xml +++ b/pom.xml @@ -109,7 +109,7 @@ 3.4 1.7.7 0.8.0.RELEASE - 1.5.0 + 1.6.0 4.2.1 4.1.6 4.1.7 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 1c15012..396b9b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -47,6 +47,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexRangeRef; +import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitor; import org.apache.calcite.rex.RexVisitorImpl; @@ -666,7 +667,7 @@ public String apply(RexNode r) { // Note: this is the last step, trying to avoid the expensive call to the metadata provider // if possible Set predicatesInSubtree = Sets.newHashSet(); - for (RexNode pred : RelMetadataQuery.getPulledUpPredicates(inp).pulledUpPredicates) { + for (RexNode pred : RelMetadataQuery.instance().getPulledUpPredicates(inp).pulledUpPredicates) { predicatesInSubtree.add(pred.toString()); predicatesInSubtree.addAll(Lists.transform(RelOptUtil.conjunctions(pred), REX_STR_FN)); } @@ -935,6 +936,12 @@ public Boolean visitFieldAccess(RexFieldAccess fieldAccess) { // ".FIELD" is constant iff "" is constant. return fieldAccess.getReferenceExpr().accept(this); } + + @Override + public Boolean visitSubQuery(RexSubQuery subQuery) { + // it seems that it is not used by anything. + return null; + } } public static Set getInputRefs(RexNode expr) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index eeec44e..83205bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -28,6 +28,7 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.RelFactories.AggregateFactory; @@ -147,6 +148,14 @@ public RelNode createJoin(RelNode left, RelNode right, RexNode condition, JoinRe Set variablesStopped, boolean semiJoinDone) { return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType, false); } + + @Override + public RelNode createJoin(RelNode left, RelNode right, RexNode condition, + Set variablesSet, JoinRelType joinType, boolean semiJoinDone) { + // According to calcite, it is going to be removed before Calcite-2.0 + // TODO: to handle CorrelationId + return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType, semiJoinDone); + } } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java index 6840418..8c00322 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java @@ -200,7 +200,7 @@ public double computeSMBMapJoinIOCost( } public static boolean isFittingIntoMemory(Double maxSize, RelNode input, int buckets) { - Double currentMemory = RelMetadataQuery.cumulativeMemoryWithinPhase(input); + Double currentMemory = RelMetadataQuery.instance().cumulativeMemoryWithinPhase(input); if (currentMemory != null) { if(currentMemory / buckets > maxSize) { return false; @@ -314,8 +314,8 @@ public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation strea if (streamingSide == MapJoinStreamingRelation.NONE || streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) { // Left side - final Double leftAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); - final Double leftRowCount = RelMetadataQuery.getRowCount(join.getLeft()); + final Double leftAvgRowSize = RelMetadataQuery.instance().getAverageRowSize(join.getLeft()); + final Double leftRowCount = RelMetadataQuery.instance().getRowCount(join.getLeft()); if (leftAvgRowSize == null || leftRowCount == null) { return null; } @@ -324,8 +324,8 @@ public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation strea if (streamingSide == MapJoinStreamingRelation.NONE || streamingSide == MapJoinStreamingRelation.LEFT_RELATION) { // Right side - final Double rightAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getRight()); - final Double rightRowCount = RelMetadataQuery.getRowCount(join.getRight()); + final Double rightAvgRowSize = RelMetadataQuery.instance().getAverageRowSize(join.getRight()); + final Double rightRowCount = RelMetadataQuery.instance().getRowCount(join.getRight()); if (rightAvgRowSize == null || rightRowCount == null) { return null; } @@ -338,8 +338,8 @@ public static Integer getSplitCountWithRepartition(HiveJoin join) { final Double maxSplitSize = join.getCluster().getPlanner().getContext(). unwrap(HiveAlgorithmsConf.class).getMaxSplitSize(); // We repartition: new number of splits - final Double averageRowSize = RelMetadataQuery.getAverageRowSize(join); - final Double rowCount = RelMetadataQuery.getRowCount(join); + final Double averageRowSize = RelMetadataQuery.instance().getAverageRowSize(join); + final Double rowCount = RelMetadataQuery.instance().getRowCount(join); if (averageRowSize == null || rowCount == null) { return null; } @@ -357,7 +357,7 @@ public static Integer getSplitCountWithoutRepartition(HiveJoin join) { } else { return null; } - return RelMetadataQuery.splitCount(largeInput); + return RelMetadataQuery.instance().splitCount(largeInput); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java index 6669d32..f1037e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java @@ -84,8 +84,8 @@ public boolean isExecutable(HiveJoin join) { @Override public RelOptCost getCost(HiveJoin join) { - double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); - double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + double leftRCount = RelMetadataQuery.instance().getRowCount(join.getLeft()); + double rightRCount = RelMetadataQuery.instance().getRowCount(join.getRight()); return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java index 61a3a64..c0086f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java @@ -78,7 +78,7 @@ public RelOptCost getDefaultCost() { @Override public RelOptCost getScanCost(HiveTableScan ts) { - return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.getAverageRowSize(ts)); + return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.instance().getAverageRowSize(ts)); } @Override @@ -87,7 +87,7 @@ public RelOptCost getAggregateCost(HiveAggregate aggregate) { return HiveCost.FACTORY.makeZeroCost(); } else { // 1. Sum of input cardinalities - final Double rCount = RelMetadataQuery.getRowCount(aggregate.getInput()); + final Double rCount = RelMetadataQuery.instance().getRowCount(aggregate.getInput()); if (rCount == null) { return null; } @@ -96,7 +96,7 @@ public RelOptCost getAggregateCost(HiveAggregate aggregate) { // 3. IO cost = cost of writing intermediary results to local FS + // cost of reading from local FS for transferring to GBy + // cost of transferring map outputs to GBy operator - final Double rAverageSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput()); + final Double rAverageSize = RelMetadataQuery.instance().getAverageRowSize(aggregate.getInput()); if (rAverageSize == null) { return null; } @@ -129,8 +129,8 @@ public boolean isExecutable(HiveJoin join) { @Override public RelOptCost getCost(HiveJoin join) { // 1. Sum of input cardinalities - final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); - final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + final Double leftRCount = RelMetadataQuery.instance().getRowCount(join.getLeft()); + final Double rightRCount = RelMetadataQuery.instance().getRowCount(join.getRight()); if (leftRCount == null || rightRCount == null) { return null; } @@ -151,8 +151,8 @@ public RelOptCost getCost(HiveJoin join) { // 3. IO cost = cost of writing intermediary results to local FS + // cost of reading from local FS for transferring to join + // cost of transferring map outputs to Join operator - final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); - final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + final Double leftRAverageSize = RelMetadataQuery.instance().getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.instance().getAverageRowSize(join.getRight()); if (leftRAverageSize == null || rightRAverageSize == null) { return null; } @@ -187,8 +187,8 @@ public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { join.setJoinAlgorithm(TezCommonJoinAlgorithm.INSTANCE); final Double memoryWithinPhase = - RelMetadataQuery.cumulativeMemoryWithinPhase(join); - final Integer splitCount = RelMetadataQuery.splitCount(join); + RelMetadataQuery.instance().cumulativeMemoryWithinPhase(join); + final Integer splitCount = RelMetadataQuery.instance().splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { @@ -239,8 +239,8 @@ public boolean isExecutable(HiveJoin join) { @Override public RelOptCost getCost(HiveJoin join) { // 1. Sum of input cardinalities - final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft()); - final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight()); + final Double leftRCount = RelMetadataQuery.instance().getRowCount(join.getLeft()); + final Double rightRCount = RelMetadataQuery.instance().getRowCount(join.getRight()); if (leftRCount == null || rightRCount == null) { return null; } @@ -251,7 +251,7 @@ public RelOptCost getCost(HiveJoin join) { add(leftRCount). add(rightRCount). build(); - ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder(); + ImmutableBitSet.Builder streamingBuilder = ImmutableBitSet.builder(); switch (join.getStreamingSide()) { case LEFT_RELATION: streamingBuilder.set(0); @@ -266,8 +266,8 @@ public RelOptCost getCost(HiveJoin join) { final double cpuCost = HiveAlgorithmsUtil.computeMapJoinCPUCost(cardinalities, streaming); // 3. IO cost = cost of transferring small tables to join node * // degree of parallelism - final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft()); - final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight()); + final Double leftRAverageSize = RelMetadataQuery.instance().getAverageRowSize(join.getLeft()); + final Double rightRAverageSize = RelMetadataQuery.instance().getAverageRowSize(join.getRight()); if (leftRAverageSize == null || rightRAverageSize == null) { return null; } @@ -277,8 +277,8 @@ public RelOptCost getCost(HiveJoin join) { build(); JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezMapJoinAlgorithm.INSTANCE); - final int parallelism = RelMetadataQuery.splitCount(join) == null - ? 1 : RelMetadataQuery.splitCount(join); + final int parallelism = RelMetadataQuery.instance().splitCount(join) == null + ? 1 : RelMetadataQuery.instance().splitCount(join); join.setJoinAlgorithm(oldAlgo); final double ioCost = algoUtils.computeMapJoinIOCost(relationInfos, streaming, parallelism); // 4. Result @@ -322,7 +322,7 @@ public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { return null; } // If simple map join, the whole relation goes in memory - return RelMetadataQuery.cumulativeMemoryWithinPhase(inMemoryInput); + return RelMetadataQuery.instance().cumulativeMemoryWithinPhase(inMemoryInput); } @Override @@ -376,7 +376,7 @@ public boolean isExecutable(HiveJoin join) { // What we need is a way to get buckets not splits JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezBucketJoinAlgorithm.INSTANCE); - Integer buckets = RelMetadataQuery.splitCount(smallInput); + Integer buckets = RelMetadataQuery.instance().splitCount(smallInput); join.setJoinAlgorithm(oldAlgo); if (buckets == null) { @@ -388,7 +388,7 @@ public boolean isExecutable(HiveJoin join) { for (int i=0; i joinKeysInChildren = new ArrayList(); @@ -203,8 +203,8 @@ public ImmutableBitSet getSortedInputs() throws CalciteSemanticException { for (int i=0; i exps, Rel } @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return RelMetadataQuery.getNonCumulativeCost(this); + public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery relMetadataQuery) { + return relMetadataQuery.getNonCumulativeCost(this); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java index 4fac13e..11a0096 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java @@ -108,8 +108,8 @@ public void implement(Implementor implementor) { } @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return RelMetadataQuery.getNonCumulativeCost(this); + public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery relMetadataQuery) { + return relMetadataQuery.getNonCumulativeCost(this); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java index 5788805..78d6c40 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java @@ -122,8 +122,8 @@ public HiveTableScan copy(RelDataType newRowtype) { } @Override - public RelOptCost computeSelfCost(RelOptPlanner planner) { - return RelMetadataQuery.getNonCumulativeCost(this); + public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery relMetadataQuery) { + return relMetadataQuery.getNonCumulativeCost(this); } @Override public RelWriter explainTerms(RelWriter pw) { @@ -147,7 +147,7 @@ public void implement(Implementor implementor) { } @Override - public double getRows() { + public double estimateRowCount(RelMetadataQuery mq) { return ((RelOptHiveTable) table).getRowCount(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index 070c7ea..fea7711 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -121,7 +121,7 @@ public void onMatch(RelOptRuleCall call) { // Do the columns used by the join appear in the output of the aggregate? final ImmutableBitSet aggregateColumns = aggregate.getGroupSet(); final ImmutableBitSet keyColumns = keyColumns(aggregateColumns, - RelMetadataQuery.getPulledUpPredicates(join).pulledUpPredicates); + RelMetadataQuery.instance().getPulledUpPredicates(join).pulledUpPredicates); final ImmutableBitSet joinColumns = RelOptUtil.InputFinder.bits(join.getCondition()); final boolean allColumnsInAggregate = @@ -179,7 +179,7 @@ public void onMatch(RelOptRuleCall call) { unique = true; } else { final Boolean unique0 = - RelMetadataQuery.areColumnsUnique(joinInput, belowAggregateKey); + RelMetadataQuery.instance().areColumnsUnique(joinInput, belowAggregateKey); unique = unique0 != null && unique0; } if (unique) { @@ -299,8 +299,8 @@ public Integer apply(Integer a0) { } // Make a cost based decision to pick cheaper plan - RelOptCost afterCost = RelMetadataQuery.getCumulativeCost(r); - RelOptCost beforeCost = RelMetadataQuery.getCumulativeCost(aggregate); + RelOptCost afterCost = RelMetadataQuery.instance().getCumulativeCost(r); + RelOptCost beforeCost = RelMetadataQuery.instance().getCumulativeCost(aggregate); if (afterCost.isLt(beforeCost)) { call.transformTo(r); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java index 7d7631b..7d4411a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java @@ -112,7 +112,7 @@ public void onMatch(RelOptRuleCall call) { // arguments then we can use a more efficient form. if ((nonDistinctCount == 0) && (argListSets.size() == 1)) { for (Integer arg : argListSets.iterator().next()) { - Set colOrigs = RelMetadataQuery.getColumnOrigins(aggregate, arg); + Set colOrigs = RelMetadataQuery.instance().getColumnOrigins(aggregate, arg); if (null != colOrigs) { for (RelColumnOrigin colOrig : colOrigs) { RelOptHiveTable hiveTbl = (RelOptHiveTable)colOrig.getOriginTable(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java index 07928d8..994af97 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -80,7 +80,7 @@ public HiveJoinPushTransitivePredicatesRule(Class clazz, public void onMatch(RelOptRuleCall call) { Join join = call.rel(0); - RelOptPredicateList preds = RelMetadataQuery.getPulledUpPredicates(join); + RelOptPredicateList preds = RelMetadataQuery.instance().getPulledUpPredicates(join); HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); assert registry != null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 50e139b..6958993 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -128,7 +128,7 @@ public FilterReduceExpressionsRule(Class filterClass, RexNode newConditionExp; boolean reduced; final RelOptPredicateList predicates = - RelMetadataQuery.getPulledUpPredicates(filter.getInput()); + RelMetadataQuery.instance().getPulledUpPredicates(filter.getInput()); if (reduceExpressions(filter, expList, predicates)) { assert expList.size() == 1; newConditionExp = expList.get(0); @@ -242,7 +242,7 @@ public boolean matches(RelOptRuleCall call) { registry.registerVisited(this, project); } final RelOptPredicateList predicates = - RelMetadataQuery.getPulledUpPredicates(project.getInput()); + RelMetadataQuery.instance().getPulledUpPredicates(project.getInput()); final List expList = Lists.newArrayList(project.getProjects()); if (reduceExpressions(project, expList, predicates)) { @@ -274,9 +274,9 @@ public JoinReduceExpressionsRule(Class joinClass, final List expList = Lists.newArrayList(join.getCondition()); final int fieldCount = join.getLeft().getRowType().getFieldCount(); final RelOptPredicateList leftPredicates = - RelMetadataQuery.getPulledUpPredicates(join.getLeft()); + RelMetadataQuery.instance().getPulledUpPredicates(join.getLeft()); final RelOptPredicateList rightPredicates = - RelMetadataQuery.getPulledUpPredicates(join.getRight()); + RelMetadataQuery.instance().getPulledUpPredicates(join.getRight()); final RelOptPredicateList predicates = leftPredicates.union(rightPredicates.shift(fieldCount)); if (!reduceExpressions(join, expList, predicates)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 9f7f8a4..18145ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -23,17 +23,35 @@ import java.util.List; import java.util.Set; +import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.CorrelationId; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexPermuteInputsShuttle; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitor; import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql2rel.CorrelationReferenceFinder; import org.apache.calcite.sql2rel.RelFieldTrimmer; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Stacks; +import org.apache.calcite.util.Util; import org.apache.calcite.util.mapping.IntPair; import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.MappingType; @@ -41,16 +59,20 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; public class HiveRelFieldTrimmer extends RelFieldTrimmer { protected static final Log LOG = LogFactory.getLog(HiveRelFieldTrimmer.class); + private RelBuilder relBuilder; public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder) { super(validator, relBuilder); + this.relBuilder = relBuilder; } /** @@ -153,4 +175,187 @@ public TrimResult trimFields( return new TrimResult(newJoin, mapping); } + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.core.Sort}. + */ + public TrimResult trimFields( + HiveSortLimit sort, + ImmutableBitSet fieldsUsed, + Set extraFields) { + final RelDataType rowType = sort.getRowType(); + final int fieldCount = rowType.getFieldCount(); + final RelCollation collation = sort.getCollation(); + final RelNode input = sort.getInput(); + RelOptCluster cluster = sort.getCluster(); + + // We use the fields used by the consumer, plus any fields used as sort + // keys. + final ImmutableBitSet.Builder inputFieldsUsed = + ImmutableBitSet.builder(fieldsUsed); + for (RelFieldCollation field : collation.getFieldCollations()) { + inputFieldsUsed.set(field.getFieldIndex()); + } + + // Create input with trimmed columns. + final Set inputExtraFields = Collections.emptySet(); + TrimResult trimResult = + trimChild(sort, input, inputFieldsUsed.build(), inputExtraFields); + RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + // If the input is unchanged, and we need to project all columns, + // there's nothing we can do. + if (newInput == input + && inputMapping.isIdentity() + && fieldsUsed.cardinality() == fieldCount) { + return result(sort, Mappings.createIdentity(fieldCount)); + } + + relBuilder.push(newInput); + final int offset = + sort.offset == null ? 0 : RexLiteral.intValue(sort.offset); + final int fetch = + sort.fetch == null ? -1 : RexLiteral.intValue(sort.fetch); + final ImmutableList fields = + relBuilder.fields(RexUtil.apply(inputMapping, collation)); + + // The result has the same mapping as the input gave us. Sometimes we + // return fields that the consumer didn't ask for, because the filter + // needs them for its condition. + // TODO: Calcite will return empty LogicalValues when offset == 0 && fetch == 0. + // However, Hive ASTConverter can not deal with LogicalValues. + sortLimit(cluster, relBuilder, offset, fetch, fields); + return result(relBuilder.build(), inputMapping); + } + + private List projects(RelDataType inputRowType, RelOptCluster cluster) { + final List exprList = new ArrayList<>(); + for (RelDataTypeField field : inputRowType.getFieldList()) { + final RexBuilder rexBuilder = cluster.getRexBuilder(); + exprList.add(rexBuilder.makeInputRef(field.getType(), field.getIndex())); + } + return exprList; + } + + private static RelFieldCollation collation(RexNode node, + RelFieldCollation.Direction direction, + RelFieldCollation.NullDirection nullDirection, List extraNodes) { + switch (node.getKind()) { + case INPUT_REF: + return new RelFieldCollation(((RexInputRef) node).getIndex(), direction, + Util.first(nullDirection, direction.defaultNullDirection())); + case DESCENDING: + return collation(((RexCall) node).getOperands().get(0), + RelFieldCollation.Direction.DESCENDING, + nullDirection, extraNodes); + case NULLS_FIRST: + return collation(((RexCall) node).getOperands().get(0), direction, + RelFieldCollation.NullDirection.FIRST, extraNodes); + case NULLS_LAST: + return collation(((RexCall) node).getOperands().get(0), direction, + RelFieldCollation.NullDirection.LAST, extraNodes); + default: + final int fieldIndex = extraNodes.size(); + extraNodes.add(node); + return new RelFieldCollation(fieldIndex, direction, + Util.first(nullDirection, direction.defaultNullDirection())); + } + } + + private void sortLimit(RelOptCluster cluster, RelBuilder relBuilder, int offset, int fetch, + Iterable nodes) { + final List fieldCollations = new ArrayList<>(); + final RelDataType inputRowType = relBuilder.peek().getRowType(); + final List extraNodes = projects(inputRowType, cluster); + final List originalExtraNodes = ImmutableList.copyOf(extraNodes); + for (RexNode node : nodes) { + fieldCollations.add( + collation(node, RelFieldCollation.Direction.ASCENDING, null, + extraNodes)); + } + final RexNode offsetNode = offset <= 0 ? null : relBuilder.literal(offset); + final RexNode fetchNode = fetch < 0 ? null : relBuilder.literal(fetch); + if (offsetNode == null && fetchNode == null && fieldCollations.isEmpty()) { + return; // sort is trivial + } + + final boolean addedFields = extraNodes.size() > originalExtraNodes.size(); + if (fieldCollations.isEmpty()) { + assert !addedFields; + RelNode top = relBuilder.peek(); + if (top instanceof Sort) { + final Sort sort2 = (Sort) top; + if (sort2.offset == null && sort2.fetch == null) { + relBuilder.build(); + relBuilder.push(sort2.getInput()); + final RelNode sort = + HiveSortLimit.create(relBuilder.build(), sort2.collation, + offsetNode, fetchNode); + relBuilder.push(sort); + return; + } + } + if (top instanceof Project) { + final Project project = (Project) top; + if (project.getInput() instanceof Sort) { + final Sort sort2 = (Sort) project.getInput(); + if (sort2.offset == null && sort2.fetch == null) { + relBuilder.build(); + relBuilder.push(sort2.getInput()); + final RelNode sort = + HiveSortLimit.create(relBuilder.build(), sort2.collation, + offsetNode, fetchNode); + relBuilder.push(sort); + relBuilder.project(project.getProjects()); + return; + } + } + } + } + if (addedFields) { + relBuilder.project(extraNodes); + } + final RelNode sort = + HiveSortLimit.create(relBuilder.build(), RelCollations.of(fieldCollations), + offsetNode, fetchNode); + relBuilder.push(sort); + if (addedFields) { + relBuilder.project(originalExtraNodes); + } + return; + } + + private TrimResult result(RelNode r, final Mapping mapping) { + final RexBuilder rexBuilder = relBuilder.getRexBuilder(); + for (final CorrelationId correlation : r.getVariablesSet()) { + r = r.accept( + new CorrelationReferenceFinder() { + @Override + protected RexNode handle(RexFieldAccess fieldAccess) { + final RexCorrelVariable v = + (RexCorrelVariable) fieldAccess.getReferenceExpr(); + if (v.id.equals(correlation) + && v.getType().getFieldCount() == mapping.getSourceCount()) { + final int old = fieldAccess.getField().getIndex(); + final int new_ = mapping.getTarget(old); + final RelDataTypeFactory.FieldInfoBuilder typeBuilder = + relBuilder.getTypeFactory().builder(); + for (int target : Util.range(mapping.getTargetCount())) { + typeBuilder.add( + v.getType().getFieldList().get(mapping.getSource(target))); + } + final RexNode newV = + rexBuilder.makeCorrel(typeBuilder.build(), v.id); + if (old != new_) { + return rexBuilder.makeFieldAccess(newV, new_); + } + } + return fieldAccess; + } + + }); + } + return new TrimResult(r, mapping); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java index 0af60e8..2f2297d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java @@ -96,7 +96,7 @@ public boolean matches(RelOptRuleCall call) { // Finally, if we do not reduce the input size, we bail out final int offset = sortLimit.offset == null ? 0 : RexLiteral.intValue(sortLimit.offset); if (offset + RexLiteral.intValue(sortLimit.fetch) - >= RelMetadataQuery.getRowCount(reducedInput)) { + >= RelMetadataQuery.instance().getRowCount(reducedInput)) { return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java index 618c717..573b75a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java @@ -59,7 +59,7 @@ public boolean matches(RelOptRuleCall call) { // Finally, if we do not reduce the size input enough, we bail out int limit = RexLiteral.intValue(sortLimit.fetch); - Double rowCount = RelMetadataQuery.getRowCount(sortLimit.getInput()); + Double rowCount = RelMetadataQuery.instance().getRowCount(sortLimit.getInput()); if (rowCount != null && limit <= reductionProportion * rowCount && rowCount - limit >= reductionTuples) { return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java index 0ec8bf1..04b94c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java @@ -80,7 +80,7 @@ public void onMatch(RelOptRuleCall call) { final int offset = sort.offset == null ? 0 : RexLiteral.intValue(sort.offset); for (RelNode input : union.getInputs()) { // If we do not reduce the input size, we bail out - if (RexLiteral.intValue(sort.fetch) + offset < RelMetadataQuery.getRowCount(input)) { + if (RexLiteral.intValue(sort.fetch) + offset < RelMetadataQuery.instance().getRowCount(input)) { finishPushSortPastUnion = false; // Here we do some query rewrite. We first get the new fetchRN, which is // a sum of offset and fetch. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index c04060f..b5962df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -47,7 +47,7 @@ protected FilterSelectivityEstimator(RelNode childRel) { super(true); this.childRel = childRel; - this.childCardinality = RelMetadataQuery.getRowCount(childRel); + this.childCardinality = RelMetadataQuery.instance().getRowCount(childRel); } public Double estimateSelectivity(RexNode predicate) { @@ -254,7 +254,7 @@ private Double getMaxNDV(RexCall call) { for (RexNode op : call.getOperands()) { if (op instanceof RexInputRef) { - tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, + tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, RelMetadataQuery.instance(), ((RexInputRef) op).getIndex()); if (tmpNDV > maxNDV) maxNDV = tmpNDV; @@ -262,7 +262,8 @@ private Double getMaxNDV(RexCall call) { irv = new InputReferencedVisitor(); irv.apply(op); for (Integer childProjIndx : irv.inputPosReferenced) { - tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, childProjIndx); + tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, + RelMetadataQuery.instance(), childProjIndx); if (tmpNDV > maxNDV) maxNDV = tmpNDV; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java index 84fa518..66bc148 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdCollation.java @@ -24,6 +24,7 @@ import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdCollation; import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.util.BuiltInMethod; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; @@ -46,7 +47,7 @@ private HiveRelMdCollation() {} //~ Methods ---------------------------------------------------------------- - public ImmutableList collations(HiveAggregate aggregate) { + public ImmutableList collations(HiveAggregate aggregate, RelMetadataQuery relMetadataQuery) { // Compute collations ImmutableList.Builder collationListBuilder = new ImmutableList.Builder(); @@ -60,7 +61,7 @@ private HiveRelMdCollation() {} new HiveRelCollation(collationListBuilder.build()))); } - public ImmutableList collations(HiveJoin join) { + public ImmutableList collations(HiveJoin join, RelMetadataQuery relMetadataQuery) { return join.getCollation(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java index 1220401..749dee6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java @@ -58,19 +58,19 @@ private HiveRelMdDistinctRowCount() { // Catch-all rule when none of the others apply. @Override - public Double getDistinctRowCount(RelNode rel, ImmutableBitSet groupKey, + public Double getDistinctRowCount(RelNode rel, RelMetadataQuery relMetadataQuery, ImmutableBitSet groupKey, RexNode predicate) { if (rel instanceof HiveTableScan) { - return getDistinctRowCount((HiveTableScan) rel, groupKey, predicate); + return getDistinctRowCount((HiveTableScan) rel, relMetadataQuery, groupKey, predicate); } /* * For now use Calcite' default formulas for propagating NDVs up the Query * Tree. */ - return super.getDistinctRowCount(rel, groupKey, predicate); + return super.getDistinctRowCount(rel, relMetadataQuery, groupKey, predicate); } - private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey, + private Double getDistinctRowCount(HiveTableScan htRel, RelMetadataQuery relMetadataQuery, ImmutableBitSet groupKey, RexNode predicate) { List projIndxLst = HiveCalciteUtil .translateBitSetToProjIndx(groupKey); @@ -83,39 +83,39 @@ private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey return Math.min(noDistinctRows, htRel.getRows()); } - public static Double getDistinctRowCount(RelNode r, int indx) { + public static Double getDistinctRowCount(RelNode r, RelMetadataQuery relMetadataQuery, int indx) { ImmutableBitSet bitSetOfRqdProj = ImmutableBitSet.of(indx); - return RelMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r + return relMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r .getCluster().getRexBuilder().makeLiteral(true)); } @Override - public Double getDistinctRowCount(Join rel, ImmutableBitSet groupKey, + public Double getDistinctRowCount(Join rel, RelMetadataQuery relMetadataQuery, ImmutableBitSet groupKey, RexNode predicate) { if (rel instanceof HiveJoin) { HiveJoin hjRel = (HiveJoin) rel; //TODO: Improve this if (hjRel.isLeftSemiJoin()) { - return RelMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey, + return relMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey, rel.getCluster().getRexBuilder().makeLiteral(true)); } else { - return RelMdUtil.getJoinDistinctRowCount(rel, rel.getJoinType(), + return RelMdUtil.getJoinDistinctRowCount(relMetadataQuery, rel, rel.getJoinType(), groupKey, predicate, true); } } - return RelMetadataQuery.getDistinctRowCount(rel, groupKey, predicate); + return relMetadataQuery.getDistinctRowCount(rel, groupKey, predicate); } /* * Favor Broad Plans over Deep Plans. */ - public RelOptCost getCumulativeCost(HiveJoin rel) { - RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel); + public RelOptCost getCumulativeCost(HiveJoin rel, RelMetadataQuery relMetadataQuery) { + RelOptCost cost = relMetadataQuery.getNonCumulativeCost(rel); List inputs = rel.getInputs(); RelOptCost maxICost = HiveCost.ZERO; for (RelNode input : inputs) { - RelOptCost iCost = RelMetadataQuery.getCumulativeCost(input); + RelOptCost iCost = relMetadataQuery.getCumulativeCost(input); if (maxICost.isLt(iCost)) { maxICost = iCost; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java index b83f240..306df1a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistribution.java @@ -22,6 +22,7 @@ import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdDistribution; import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.util.BuiltInMethod; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; @@ -35,8 +36,7 @@ ChainedRelMetadataProvider.of( ImmutableList.of( ReflectiveRelMetadataProvider.reflectiveSource( - BuiltInMethod.DISTRIBUTION.method, new HiveRelMdDistribution()), - RelMdDistribution.SOURCE)); + BuiltInMethod.DISTRIBUTION.method, new HiveRelMdDistribution()))); //~ Constructors ----------------------------------------------------------- @@ -44,12 +44,12 @@ private HiveRelMdDistribution() {} //~ Methods ---------------------------------------------------------------- - public RelDistribution distribution(HiveAggregate aggregate) { + public RelDistribution distribution(HiveAggregate aggregate, RelMetadataQuery relMetadataQuery) { return new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, aggregate.getGroupSet().asList()); } - public RelDistribution distribution(HiveJoin join) { + public RelDistribution distribution(HiveJoin join, RelMetadataQuery relMetadataQuery) { return join.getDistribution(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java index bea5943..68619ff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMemory.java @@ -47,24 +47,24 @@ private HiveRelMdMemory() {} //~ Methods ---------------------------------------------------------------- - public Double memory(HiveTableScan tableScan) { + public Double memory(HiveTableScan tableScan, RelMetadataQuery relMetadataQuery) { return 0.0d; } - public Double memory(HiveAggregate aggregate) { - final Double avgRowSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput()); - final Double rowCount = RelMetadataQuery.getRowCount(aggregate.getInput()); + public Double memory(HiveAggregate aggregate, RelMetadataQuery relMetadataQuery) { + final Double avgRowSize = relMetadataQuery.getAverageRowSize(aggregate.getInput()); + final Double rowCount = relMetadataQuery.getRowCount(aggregate.getInput()); if (avgRowSize == null || rowCount == null) { return null; } return avgRowSize * rowCount; } - public Double memory(HiveFilter filter) { + public Double memory(HiveFilter filter, RelMetadataQuery relMetadataQuery) { return 0.0; } - public Double memory(HiveJoin join) { + public Double memory(HiveJoin join, RelMetadataQuery relMetadataQuery) { return join.getMemory(); } @@ -72,15 +72,15 @@ public Double cumulativeMemoryWithinPhaseSplit(HiveJoin join) { return join.getCumulativeMemoryWithinPhaseSplit(); } - public Double memory(HiveProject project) { + public Double memory(HiveProject project, RelMetadataQuery relMetadataQuery) { return 0.0; } - public Double memory(HiveSortLimit sort) { + public Double memory(HiveSortLimit sort, RelMetadataQuery relMetadataQuery) { if (sort.getCollation() != RelCollations.EMPTY) { // It sorts - final Double avgRowSize = RelMetadataQuery.getAverageRowSize(sort.getInput()); - final Double rowCount = RelMetadataQuery.getRowCount(sort.getInput()); + final Double avgRowSize = relMetadataQuery.getAverageRowSize(sort.getInput()); + final Double rowCount = relMetadataQuery.getRowCount(sort.getInput()); if (avgRowSize == null || rowCount == null) { return null; } @@ -90,7 +90,7 @@ public Double memory(HiveSortLimit sort) { return 0.0; } - public Double memory(HiveUnion union) { + public Double memory(HiveUnion union, RelMetadataQuery relMetadataQuery) { return 0.0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java index 2f51d3b..c8c9ee5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdParallelism.java @@ -48,21 +48,21 @@ public RelMetadataProvider getMetadataProvider() { //~ Methods ---------------------------------------------------------------- - public Boolean isPhaseTransition(HiveJoin join) { + public Boolean isPhaseTransition(HiveJoin join, RelMetadataQuery relMetadataQuery) { return join.isPhaseTransition(); } - public Boolean isPhaseTransition(HiveSortLimit sort) { + public Boolean isPhaseTransition(HiveSortLimit sort, RelMetadataQuery relMetadataQuery) { // As Exchange operator is introduced later on, we make a // sort operator create a new stage for the moment return true; } - public Integer splitCount(HiveJoin join) { + public Integer splitCount(HiveJoin join, RelMetadataQuery relMetadataQuery) { return join.getSplitCount(); } - public Integer splitCount(HiveTableScan scan) { + public Integer splitCount(HiveTableScan scan, RelMetadataQuery relMetadataQuery) { Integer splitCount; RelOptHiveTable table = (RelOptHiveTable) scan.getTable(); @@ -70,7 +70,7 @@ public Integer splitCount(HiveTableScan scan) { if (bucketCols != null && !bucketCols.isEmpty()) { splitCount = table.getHiveTableMD().getNumBuckets(); } else { - splitCount = splitCountRepartition(scan); + splitCount = splitCountRepartition(scan, relMetadataQuery); if (splitCount == null) { throw new RuntimeException("Could not get split count for table: " + scan.getTable().getQualifiedName()); @@ -80,8 +80,8 @@ public Integer splitCount(HiveTableScan scan) { return splitCount; } - public Integer splitCount(RelNode rel) { - Boolean newPhase = RelMetadataQuery.isPhaseTransition(rel); + public Integer splitCount(RelNode rel, RelMetadataQuery relMetadataQuery) { + Boolean newPhase = relMetadataQuery.isPhaseTransition(rel); if (newPhase == null) { return null; @@ -89,21 +89,21 @@ public Integer splitCount(RelNode rel) { if (newPhase) { // We repartition: new number of splits - return splitCountRepartition(rel); + return splitCountRepartition(rel, relMetadataQuery); } // We do not repartition: take number of splits from children Integer splitCount = 0; for (RelNode input : rel.getInputs()) { - splitCount += RelMetadataQuery.splitCount(input); + splitCount += relMetadataQuery.splitCount(input); } return splitCount; } - public Integer splitCountRepartition(RelNode rel) { + public Integer splitCountRepartition(RelNode rel, RelMetadataQuery relMetadataQuery) { // We repartition: new number of splits - final Double averageRowSize = RelMetadataQuery.getAverageRowSize(rel); - final Double rowCount = RelMetadataQuery.getRowCount(rel); + final Double averageRowSize = relMetadataQuery.getAverageRowSize(rel); + final Double rowCount = relMetadataQuery.getRowCount(rel); if (averageRowSize == null || rowCount == null) { return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index b7244fd..f22c8f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -99,10 +99,10 @@ * */ @Override - public RelOptPredicateList getPredicates(Project project) { + public RelOptPredicateList getPredicates(Project project, RelMetadataQuery relMetadataQuery) { RelNode child = project.getInput(); final RexBuilder rexBuilder = project.getCluster().getRexBuilder(); - RelOptPredicateList childInfo = RelMetadataQuery.getPulledUpPredicates(child); + RelOptPredicateList childInfo = relMetadataQuery.getPulledUpPredicates(child); List projectPullUpPredicates = new ArrayList(); HashMultimap inpIndxToOutIndxMap = HashMultimap.create(); @@ -151,13 +151,13 @@ public RelOptPredicateList getPredicates(Project project) { /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */ @Override - public RelOptPredicateList getPredicates(Join join) { + public RelOptPredicateList getPredicates(Join join, RelMetadataQuery relMetadataQuery) { RexBuilder rB = join.getCluster().getRexBuilder(); RelNode left = join.getInput(0); RelNode right = join.getInput(1); - RelOptPredicateList leftInfo = RelMetadataQuery.getPulledUpPredicates(left); - RelOptPredicateList rightInfo = RelMetadataQuery.getPulledUpPredicates(right); + RelOptPredicateList leftInfo = relMetadataQuery.getPulledUpPredicates(left); + RelOptPredicateList rightInfo = relMetadataQuery.getPulledUpPredicates(right); HiveJoinConditionBasedPredicateInference jI = new HiveJoinConditionBasedPredicateInference(join, RexUtil.composeConjunction(rB, leftInfo.pulledUpPredicates, false), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index caf8978..fe31f08 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -62,8 +62,8 @@ protected HiveRelMdRowCount() { super(); } - public Double getRowCount(Join join) { - PKFKRelationInfo pkfk = analyzeJoinForPKFK(join); + public Double getRowCount(Join join, RelMetadataQuery relMetadataQuery) { + PKFKRelationInfo pkfk = analyzeJoinForPKFK(join, relMetadataQuery); if (pkfk != null) { double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); selectivity = Math.min(1.0, selectivity); @@ -76,8 +76,8 @@ public Double getRowCount(Join join) { } @Override - public Double getRowCount(SemiJoin rel) { - PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel); + public Double getRowCount(SemiJoin rel, RelMetadataQuery relMetadataQuery) { + PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, relMetadataQuery); if (pkfk != null) { double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); selectivity = Math.min(1.0, selectivity); @@ -86,12 +86,12 @@ public Double getRowCount(SemiJoin rel) { } return pkfk.fkInfo.rowCount * selectivity; } - return super.getRowCount(rel); + return super.getRowCount(rel, relMetadataQuery); } @Override - public Double getRowCount(Sort rel) { - final Double rowCount = RelMetadataQuery.getRowCount(rel.getInput()); + public Double getRowCount(Sort rel, RelMetadataQuery relMetadataQuery) { + final Double rowCount = relMetadataQuery.getRowCount(rel.getInput()); if (rowCount != null && rel.fetch != null) { final int offset = rel.offset == null ? 0 : RexLiteral.intValue(rel.offset); final int limit = RexLiteral.intValue(rel.fetch); @@ -176,7 +176,7 @@ public String toString() { * or Fact roj Dim b) The selectivity factor applied on the Fact Table should * be 1. */ - public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) { + public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery relMetadataQuery) { RelNode left = joinRel.getInputs().get(0); RelNode right = joinRel.getInputs().get(1); @@ -229,16 +229,16 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) { */ boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel .getJoinType() == JoinRelType.RIGHT) - && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left); + && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left, relMetadataQuery); boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel - .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right); + .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right, relMetadataQuery); if (!leftIsKey && !rightIsKey) { return null; } - double leftRowCount = RelMetadataQuery.getRowCount(left); - double rightRowCount = RelMetadataQuery.getRowCount(right); + double leftRowCount = relMetadataQuery.getRowCount(left); + double rightRowCount = relMetadataQuery.getRowCount(right); if (leftIsKey && rightIsKey) { if (rightRowCount < leftRowCount) { @@ -251,10 +251,10 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) { boolean isPKSideSimpleTree = pkSide != -1 ? IsSimpleTreeOnJoinKey.check( pkSide == 0 ? left : right, - pkSide == 0 ? leftColIdx : rightColIdx) : false; + pkSide == 0 ? leftColIdx : rightColIdx, relMetadataQuery) : false; - double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(left, lBitSet, leftPred) : -1; - double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(right, rBitSet, rightPred) : -1; + double leftNDV = isPKSideSimpleTree ? relMetadataQuery.getDistinctRowCount(left, lBitSet, leftPred) : -1; + double rightNDV = isPKSideSimpleTree ? relMetadataQuery.getDistinctRowCount(right, rBitSet, rightPred) : -1; /* * If the ndv of the PK - FK side don't match, and the PK side is a filter @@ -285,7 +285,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) { if (pkSide == 0) { FKSideInfo fkInfo = new FKSideInfo(rightRowCount, rightNDV); - double pkSelectivity = pkSelectivity(joinRel, true, left, leftRowCount); + double pkSelectivity = pkSelectivity(joinRel, relMetadataQuery, true, left, leftRowCount); PKSideInfo pkInfo = new PKSideInfo(leftRowCount, leftNDV, joinRel.getJoinType().generatesNullsOnRight() ? 1.0 : @@ -297,7 +297,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) { if (pkSide == 1) { FKSideInfo fkInfo = new FKSideInfo(leftRowCount, leftNDV); - double pkSelectivity = pkSelectivity(joinRel, false, right, rightRowCount); + double pkSelectivity = pkSelectivity(joinRel, relMetadataQuery, false, right, rightRowCount); PKSideInfo pkInfo = new PKSideInfo(rightRowCount, rightNDV, joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 : @@ -309,7 +309,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) { return null; } - private static double pkSelectivity(Join joinRel, boolean leftChild, + private static double pkSelectivity(Join joinRel, RelMetadataQuery relMetadataQuery, boolean leftChild, RelNode child, double childRowCount) { if ((leftChild && joinRel.getJoinType().generatesNullsOnRight()) || @@ -318,7 +318,7 @@ private static double pkSelectivity(Join joinRel, boolean leftChild, } else { HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true); if (tScan != null) { - double tRowCount = RelMetadataQuery.getRowCount(tScan); + double tRowCount = relMetadataQuery.getRowCount(tScan); return childRowCount / tRowCount; } else { return 1.0; @@ -326,9 +326,9 @@ private static double pkSelectivity(Join joinRel, boolean leftChild, } } - private static boolean isKey(ImmutableBitSet c, RelNode rel) { + private static boolean isKey(ImmutableBitSet c, RelNode rel, RelMetadataQuery relMetadataQuery) { boolean isKey = false; - Set keys = RelMetadataQuery.getUniqueKeys(rel); + Set keys = relMetadataQuery.getUniqueKeys(rel); if (keys != null) { for (ImmutableBitSet key : keys) { if (key.equals(c)) { @@ -402,16 +402,18 @@ private static boolean isKey(ImmutableBitSet c, RelNode rel) { int joinKey; boolean simpleTree; + RelMetadataQuery relMetadataQuery; - static boolean check(RelNode r, int joinKey) { - IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey); + static boolean check(RelNode r, int joinKey, RelMetadataQuery relMetadataQuery) { + IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey, relMetadataQuery); v.go(r); return v.simpleTree; } - IsSimpleTreeOnJoinKey(int joinKey) { + IsSimpleTreeOnJoinKey(int joinKey, RelMetadataQuery relMetadataQuery) { super(); this.joinKey = joinKey; + this.relMetadataQuery = relMetadataQuery; simpleTree = true; } @@ -427,7 +429,7 @@ public void visit(RelNode node, int ordinal, RelNode parent) { } else if (node instanceof Project) { simpleTree = isSimple((Project) node); } else if (node instanceof Filter) { - simpleTree = isSimple((Filter) node); + simpleTree = isSimple((Filter) node, relMetadataQuery); } else { simpleTree = false; } @@ -446,9 +448,9 @@ private boolean isSimple(Project project) { return false; } - private boolean isSimple(Filter filter) { + private boolean isSimple(Filter filter, RelMetadataQuery relMetadataQuery) { ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition()); - return isKey(condBits, filter); + return isKey(condBits, filter, relMetadataQuery); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index a0eb83d..9b1dfa5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -49,7 +49,7 @@ protected HiveRelMdSelectivity() { super(); } - public Double getSelectivity(HiveTableScan t, RexNode predicate) { + public Double getSelectivity(HiveTableScan t, RelMetadataQuery relMetadataQuery, RexNode predicate) { if (predicate != null) { FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t); return filterSelEstmator.estimateSelectivity(predicate); @@ -58,15 +58,15 @@ public Double getSelectivity(HiveTableScan t, RexNode predicate) { return 1.0; } - public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException { + public Double getSelectivity(HiveJoin j, RelMetadataQuery relMetadataQuery, RexNode predicate) throws CalciteSemanticException { if (j.getJoinType().equals(JoinRelType.INNER)) { - return computeInnerJoinSelectivity(j, predicate); + return computeInnerJoinSelectivity(j, relMetadataQuery, predicate); } else if (j.getJoinType().equals(JoinRelType.LEFT) || j.getJoinType().equals(JoinRelType.RIGHT)) { - double left = RelMetadataQuery.getRowCount(j.getLeft()); - double right = RelMetadataQuery.getRowCount(j.getRight()); + double left = relMetadataQuery.getRowCount(j.getLeft()); + double right = relMetadataQuery.getRowCount(j.getRight()); double product = left * right; - double innerJoinSelectivity = computeInnerJoinSelectivity(j, predicate); + double innerJoinSelectivity = computeInnerJoinSelectivity(j, relMetadataQuery, predicate); if (j.getJoinType().equals(JoinRelType.LEFT)) { return Math.max(innerJoinSelectivity, left/product); } @@ -75,7 +75,7 @@ public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemant return 1.0; } - private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException { + private Double computeInnerJoinSelectivity(HiveJoin j, RelMetadataQuery relMetadataQuery, RexNode predicate) throws CalciteSemanticException { double ndvCrossProduct = 1; Pair predInfo = getCombinedPredicateForJoin(j, predicate); @@ -97,14 +97,14 @@ private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws // Join which are part of join keys for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) { colStatMapBuilder.put(ljk, - HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), ljk)); + HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), relMetadataQuery, ljk)); } // 2. Update Col Stats Map with col stats for columns from right side of // Join which are part of join keys for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) { colStatMapBuilder.put(rjk + rightOffSet, - HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), rjk)); + HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), relMetadataQuery, rjk)); } colStatMap = colStatMapBuilder.build(); @@ -116,11 +116,11 @@ private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws ndvCrossProduct = exponentialBackoff(peLst, colStatMap); if (j.isLeftSemiJoin()) - ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()), + ndvCrossProduct = Math.min(relMetadataQuery.getRowCount(j.getLeft()), ndvCrossProduct); else - ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()) - * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct); + ndvCrossProduct = Math.min(relMetadataQuery.getRowCount(j.getLeft()) + * relMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct); } // 4. Join Selectivity = 1/NDV diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java index 3224039..b04d992 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java @@ -50,7 +50,7 @@ private HiveRelMdSize() {} //~ Methods ---------------------------------------------------------------- - public List averageColumnSizes(HiveTableScan scan) { + public List averageColumnSizes(HiveTableScan scan, RelMetadataQuery relMetadataQuery) { List neededcolsLst = scan.getNeededColIndxsFrmReloptHT(); List columnStatistics = ((RelOptHiveTable) scan.getTable()) .getColStat(neededcolsLst, true); @@ -77,14 +77,14 @@ private HiveRelMdSize() {} return list.build(); } - public List averageColumnSizes(HiveJoin rel) { + public List averageColumnSizes(HiveJoin rel, RelMetadataQuery relMetadataQuery) { final RelNode left = rel.getLeft(); final RelNode right = rel.getRight(); final List lefts = - RelMetadataQuery.getAverageColumnSizes(left); + relMetadataQuery.getAverageColumnSizes(left); List rights = null; if (!rel.isLeftSemiJoin()) { - rights = RelMetadataQuery.getAverageColumnSizes(right); + rights = relMetadataQuery.getAverageColumnSizes(right); } if (lefts == null && rights == null) { return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java index 7c22c33..da45bfa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java @@ -34,6 +34,7 @@ import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdUniqueKeys; import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.BitSets; @@ -59,12 +60,13 @@ * Inferring Uniqueness for all columns is very expensive right now. The flip * side of doing this is, it only works post Field Trimming. */ - public Set getUniqueKeys(Project rel, boolean ignoreNulls) { + public Set getUniqueKeys(Project rel, RelMetadataQuery relMetadataQuery, boolean ignoreNulls) { HiveTableScan tScan = getTableScan(rel.getInput(), false); if ( tScan == null ) { - Function fn = RelMdUniqueKeys.SOURCE.apply( + @SuppressWarnings("unchecked") + Function fn = (Function) RelMdUniqueKeys.SOURCE.apply( rel.getClass(), BuiltInMetadata.UniqueKeys.class); return ((BuiltInMetadata.UniqueKeys) fn.apply(rel)) .getUniqueKeys(ignoreNulls); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index a0e374c..b841315 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -430,8 +430,8 @@ OpAttr visit(HiveSortLimit sortRel) throws SemanticException { // 1.a. Extract order for each column from collation // Generate sortCols and order - ImmutableBitSet.Builder sortColsPosBuilder = new ImmutableBitSet.Builder(); - ImmutableBitSet.Builder sortOutputColsPosBuilder = new ImmutableBitSet.Builder(); + ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder(); + ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder(); Map obRefToCallMap = sortRel.getInputRefToCallMap(); List sortCols = new ArrayList(); StringBuilder order = new StringBuilder();