diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java index 4ba10e2..28c7362 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java @@ -60,8 +60,8 @@ * Double.parseDouble(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_HDFS_READ)); } - public static RelOptCost computeCardinalityBasedCost(HiveRelNode hr) { - return new HiveCost(hr.getRows(), 0, 0); + public static RelOptCost computeCardinalityBasedCost(HiveRelNode hr, RelMetadataQuery mq) { + return new HiveCost(mq.getRowCount(hr), 0, 0); } public HiveCost computeScanCost(double cardinality, double avgTupleSize) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java index 4af1f8d..3783d5f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java @@ -23,13 +23,14 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelDistribution; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import com.google.common.collect.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Cost model interface. @@ -49,7 +50,7 @@ public HiveCostModel(Set joinAlgorithms) { public abstract RelOptCost getAggregateCost(HiveAggregate aggregate); - public abstract RelOptCost getScanCost(HiveTableScan ts); + public abstract RelOptCost getScanCost(HiveTableScan ts, RelMetadataQuery mq); public RelOptCost getJoinCost(HiveJoin join) { // Select algorithm with min cost diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java index 40f2cef..11b6aa3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java @@ -55,7 +55,7 @@ public RelOptCost getDefaultCost() { } @Override - public RelOptCost getScanCost(HiveTableScan ts) { + public RelOptCost getScanCost(HiveTableScan ts, RelMetadataQuery mq) { return HiveCost.FACTORY.makeZeroCost(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java index af5fc5e..2dbfd8f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java @@ -78,8 +78,8 @@ public RelOptCost getDefaultCost() { } @Override - public RelOptCost getScanCost(HiveTableScan ts) { - return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.instance().getAverageRowSize(ts)); + public RelOptCost getScanCost(HiveTableScan ts, RelMetadataQuery mq) { + return algoUtils.computeScanCost(mq.getRowCount(ts), mq.getAverageRowSize(ts)); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java index cbea307..c6b8ce3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveRelMdCost.java @@ -67,7 +67,7 @@ public RelOptCost getNonCumulativeCost(HiveJoin join, RelMetadataQuery mq) { } public RelOptCost getNonCumulativeCost(HiveTableScan ts, RelMetadataQuery mq) { - return hiveCostModel.getScanCost(ts); + return hiveCostModel.getScanCost(ts, mq); } // Default case diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index a25b58b..9b8e6a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -44,11 +44,13 @@ public class FilterSelectivityEstimator extends RexVisitorImpl { private final RelNode childRel; private final double childCardinality; + private final RelMetadataQuery mq; - protected FilterSelectivityEstimator(RelNode childRel) { + protected FilterSelectivityEstimator(RelNode childRel, RelMetadataQuery mq) { super(true); + this.mq = mq; this.childRel = childRel; - this.childCardinality = RelMetadataQuery.instance().getRowCount(childRel); + this.childCardinality = mq.getRowCount(childRel); } public Double estimateSelectivity(RexNode predicate) { @@ -91,7 +93,7 @@ public Double visitCall(RexCall call) { case IS_NOT_NULL: { if (childRel instanceof HiveTableScan) { double noOfNulls = getMaxNulls(call, (HiveTableScan) childRel); - double totalNoOfTuples = childRel.getRows(); + double totalNoOfTuples = mq.getRowCount(childRel); if (totalNoOfTuples >= noOfNulls) { selectivity = (totalNoOfTuples - noOfNulls) / Math.max(totalNoOfTuples, 1); } else { @@ -252,7 +254,6 @@ private Double getMaxNDV(RexCall call) { double tmpNDV; double maxNDV = 1.0; InputReferencedVisitor irv; - RelMetadataQuery mq = RelMetadataQuery.instance(); for (RexNode op : call.getOperands()) { if (op instanceof RexInputRef) { tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, mq, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java index 77f7aa8..be2d5b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java @@ -81,7 +81,7 @@ private Double getDistinctRowCount(HiveTableScan htRel, RelMetadataQuery mq, Imm noDistinctRows *= cStat.getCountDistint(); } - return Math.min(noDistinctRows, htRel.getRows()); + return Math.min(noDistinctRows, mq.getRowCount(htRel)); } public static Double getDistinctRowCount(RelNode r, RelMetadataQuery mq, int indx) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 7bba80b..9199d52 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -72,7 +72,8 @@ public Double getRowCount(Join join, RelMetadataQuery mq) { } return pkfk.fkInfo.rowCount * selectivity; } - return join.getRows(); + // Do not call mq.getRowCount(join), will trigger CyclicMetadataException + return join.estimateRowCount(mq); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index 7192684..046f51b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -56,7 +56,7 @@ private HiveRelMdSelectivity() {} public Double getSelectivity(HiveTableScan t, RelMetadataQuery mq, RexNode predicate) { if (predicate != null) { - FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t); + FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t, mq); return filterSelEstmator.estimateSelectivity(predicate); } @@ -85,7 +85,7 @@ private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode getCombinedPredicateForJoin(j, predicate); if (!predInfo.getKey()) { return - new FilterSelectivityEstimator(j). + new FilterSelectivityEstimator(j, mq). estimateSelectivity(predInfo.getValue()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java index 9a5a2ba..aa4d8a5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java @@ -138,7 +138,7 @@ projectPos++; } - double numRows = tScan.getRows(); + double numRows = mq.getRowCount(tScan); List colStats = tScan.getColStat(BitSets .toList(projectedCols)); Set keys = new HashSet();