diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index e339d0a..28e242a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -683,7 +683,7 @@ public String apply(RexNode r) { // Note: this is the last step, trying to avoid the expensive call to the metadata provider // if possible Set predicatesInSubtree = Sets.newHashSet(); - for (RexNode pred : RelMetadataQuery.instance().getPulledUpPredicates(inp).pulledUpPredicates) { + for (RexNode pred : HiveRelMetadataQueryProvider.get().getPulledUpPredicates(inp).pulledUpPredicates) { predicatesInSubtree.add(pred.toString()); predicatesInSubtree.addAll(Lists.transform(RelOptUtil.conjunctions(pred), REX_STR_FN)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelMetadataQueryProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelMetadataQueryProvider.java new file mode 100644 index 0000000..534505e --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelMetadataQueryProvider.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import java.util.function.Supplier; +import org.apache.calcite.rel.metadata.RelMetadataQuery; + +/** + * Provides the current compiling query RelMetadataQuery instance + */ +public class HiveRelMetadataQueryProvider { + private static final ThreadLocal THREADS_RELMDQUERY = + ThreadLocal.withInitial(new Supplier() { + @Override + public RelMetadataQuery get() { + return RelMetadataQuery.instance(); + } + }); + + /** + * Private CTOR, Suppress accidental instantiation + */ + private HiveRelMetadataQueryProvider() { + } + + public static RelMetadataQuery get() { + return THREADS_RELMDQUERY.get(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java index 4ba10e2..acc2574 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveAlgorithmsUtil.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; import com.google.common.collect.ImmutableList; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; public class HiveAlgorithmsUtil { @@ -199,7 +200,7 @@ public double computeSMBMapJoinIOCost( } public static boolean isFittingIntoMemory(Double maxSize, RelNode input, int buckets) { - Double currentMemory = RelMetadataQuery.instance().cumulativeMemoryWithinPhase(input); + Double currentMemory = HiveRelMetadataQueryProvider.get().cumulativeMemoryWithinPhase(input); if (currentMemory != null) { if(currentMemory / buckets > maxSize) { return false; @@ -310,7 +311,7 @@ public static Double getJoinMemory(HiveJoin join) { public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation streamingSide) { Double memory = 0.0; - RelMetadataQuery mq = RelMetadataQuery.instance(); + RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); if (streamingSide == MapJoinStreamingRelation.NONE || streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) { // Left side @@ -338,7 +339,7 @@ public static Integer getSplitCountWithRepartition(HiveJoin join) { final Double maxSplitSize = join.getCluster().getPlanner().getContext(). unwrap(HiveAlgorithmsConf.class).getMaxSplitSize(); // We repartition: new number of splits - RelMetadataQuery mq = RelMetadataQuery.instance(); + RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); final Double averageRowSize = mq.getAverageRowSize(join); final Double rowCount = mq.getRowCount(join); if (averageRowSize == null || rowCount == null) { @@ -358,7 +359,7 @@ public static Integer getSplitCountWithoutRepartition(HiveJoin join) { } else { return null; } - return RelMetadataQuery.instance().splitCount(largeInput); + return HiveRelMetadataQueryProvider.get().splitCount(largeInput); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java index 40f2cef..47759d2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveDefaultCostModel.java @@ -28,6 +28,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; /** * Default implementation of the cost model. @@ -85,7 +86,7 @@ public boolean isExecutable(HiveJoin join) { @Override public RelOptCost getCost(HiveJoin join) { - RelMetadataQuery mq = RelMetadataQuery.instance(); + RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); double leftRCount = mq.getRowCount(join.getLeft()); double rightRCount = mq.getRowCount(join.getRight()); return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java index af5fc5e..43ea6bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java @@ -42,6 +42,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; /** * Cost model for Tez execution engine. @@ -79,7 +80,7 @@ public RelOptCost getDefaultCost() { @Override public RelOptCost getScanCost(HiveTableScan ts) { - return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.instance().getAverageRowSize(ts)); + return algoUtils.computeScanCost(ts.getRows(), HiveRelMetadataQueryProvider.get().getAverageRowSize(ts)); } @Override @@ -87,7 +88,7 @@ public RelOptCost getAggregateCost(HiveAggregate aggregate) { if (aggregate.isBucketedInput()) { return HiveCost.FACTORY.makeZeroCost(); } else { - RelMetadataQuery mq = RelMetadataQuery.instance(); + RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); // 1. Sum of input cardinalities final Double rCount = mq.getRowCount(aggregate.getInput()); if (rCount == null) { @@ -130,7 +131,7 @@ public boolean isExecutable(HiveJoin join) { @Override public RelOptCost getCost(HiveJoin join) { - RelMetadataQuery mq = RelMetadataQuery.instance(); + RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); // 1. Sum of input cardinalities final Double leftRCount = mq.getRowCount(join.getLeft()); final Double rightRCount = mq.getRowCount(join.getRight()); @@ -190,8 +191,8 @@ public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { join.setJoinAlgorithm(TezCommonJoinAlgorithm.INSTANCE); final Double memoryWithinPhase = - RelMetadataQuery.instance().cumulativeMemoryWithinPhase(join); - final Integer splitCount = RelMetadataQuery.instance().splitCount(join); + HiveRelMetadataQueryProvider.get().cumulativeMemoryWithinPhase(join); + final Integer splitCount = HiveRelMetadataQueryProvider.get().splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { @@ -241,7 +242,7 @@ public boolean isExecutable(HiveJoin join) { @Override public RelOptCost getCost(HiveJoin join) { - RelMetadataQuery mq = RelMetadataQuery.instance(); + RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); // 1. Sum of input cardinalities final Double leftRCount = mq.getRowCount(join.getLeft()); final Double rightRCount = mq.getRowCount(join.getRight()); @@ -332,7 +333,7 @@ public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { return null; } // If simple map join, the whole relation goes in memory - return RelMetadataQuery.instance().cumulativeMemoryWithinPhase(inMemoryInput); + return HiveRelMetadataQueryProvider.get().cumulativeMemoryWithinPhase(inMemoryInput); } @Override @@ -386,7 +387,7 @@ public boolean isExecutable(HiveJoin join) { // What we need is a way to get buckets not splits JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezBucketJoinAlgorithm.INSTANCE); - Integer buckets = RelMetadataQuery.instance().splitCount(smallInput); + Integer buckets = HiveRelMetadataQueryProvider.get().splitCount(smallInput); join.setJoinAlgorithm(oldAlgo); if (buckets == null) { @@ -398,7 +399,7 @@ public boolean isExecutable(HiveJoin join) { for (int i=0; i colOrigs = RelMetadataQuery.instance().getColumnOrigins(aggregate, arg); + Set colOrigs = HiveRelMetadataQueryProvider.get().getColumnOrigins(aggregate, arg); if (null != colOrigs) { for (RelColumnOrigin colOrig : colOrigs) { RelOptHiveTable hiveTbl = (RelOptHiveTable)colOrig.getOriginTable(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java index 65a19e1..3a25eb5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -48,6 +48,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; /** * Planner rule that infers predicates from on a @@ -80,7 +81,7 @@ public HiveJoinPushTransitivePredicatesRule(Class clazz, public void onMatch(RelOptRuleCall call) { Join join = call.rel(0); - RelOptPredicateList preds = RelMetadataQuery.instance().getPulledUpPredicates(join); + RelOptPredicateList preds = HiveRelMetadataQueryProvider.get().getPulledUpPredicates(join); HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); assert registry != null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 6efc731..a242f7d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -40,6 +40,7 @@ import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; /** * Collection of planner rules that apply various simplifying transformations on @@ -109,7 +110,7 @@ public FilterReduceExpressionsRule(Class filterClass, Lists.newArrayList(filter.getCondition()); RexNode newConditionExp; boolean reduced; - final RelMetadataQuery mq = RelMetadataQuery.instance(); + final RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); final RelOptPredicateList predicates = mq.getPulledUpPredicates(filter.getInput()); if (reduceExpressions(filter, expList, predicates, true)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java index 0644f0c..7327790 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java @@ -46,6 +46,7 @@ import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; /** * This rule simplifies the condition in Filter operators using the @@ -82,7 +83,7 @@ public void onMatch(RelOptRuleCall call) { final Filter filter = call.rel(0); final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); - final RelMetadataQuery metadataProvider = RelMetadataQuery.instance(); + final RelMetadataQuery metadataProvider = HiveRelMetadataQueryProvider.get(); // 1. Recompose filter possibly by pulling out common elements from DNF // expressions diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 05d2b28..df75e9d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -122,6 +122,7 @@ import java.util.TreeMap; import java.util.TreeSet; import javax.annotation.Nonnull; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; /** * NOTE: this whole logic is replicated from Calcite's RelDecorrelator @@ -2190,7 +2191,7 @@ public void onMatch(RelOptRuleCall call) { // The join filters out the nulls. So, it's ok if there are // nulls in the join keys. - final RelMetadataQuery mq = RelMetadataQuery.instance(); + final RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); if (!RelMdUtil.areColumnsDefinitelyUniqueWhenNullsFiltered(mq, right, rightJoinKeys)) { //SQL2REL_LOGGER.fine(rightJoinKeys.toString() @@ -2405,7 +2406,7 @@ public void onMatch(RelOptRuleCall call) { // The join filters out the nulls. So, it's ok if there are // nulls in the join keys. - final RelMetadataQuery mq = RelMetadataQuery.instance(); + final RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); if (!RelMdUtil.areColumnsDefinitelyUniqueWhenNullsFiltered(mq, left, correlatedInputRefJoinKeys)) { //SQL2REL_LOGGER.fine(correlatedJoinKeys.toString() @@ -2484,7 +2485,7 @@ public void onMatch(RelOptRuleCall call) { // leftInputRel contains unique keys // i.e. each row is distinct and can group by on all the left // fields - final RelMetadataQuery mq = RelMetadataQuery.instance(); + final RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); if (!RelMdUtil.areColumnsDefinitelyUnique(mq, left, allCols)) { //SQL2REL_LOGGER.fine("There are no unique keys for " + left); return; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java index 2f2297d..c6f17a7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java @@ -28,6 +28,7 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexLiteral; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; @@ -96,7 +97,7 @@ public boolean matches(RelOptRuleCall call) { // Finally, if we do not reduce the input size, we bail out final int offset = sortLimit.offset == null ? 0 : RexLiteral.intValue(sortLimit.offset); if (offset + RexLiteral.intValue(sortLimit.fetch) - >= RelMetadataQuery.instance().getRowCount(reducedInput)) { + >= HiveRelMetadataQueryProvider.get().getRowCount(reducedInput)) { return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java index 54874e3..16807e6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java @@ -47,6 +47,7 @@ import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableList; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; /** * Planner rule that pulls up constant keys through a SortLimit operator. @@ -84,7 +85,7 @@ public void onMatch(RelOptRuleCall call) { } final RexBuilder rexBuilder = sort.getCluster().getRexBuilder(); - final RelMetadataQuery mq = RelMetadataQuery.instance(); + final RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); final RelOptPredicateList predicates = mq.getPulledUpPredicates(sort.getInput()); if (predicates == null) { return; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java index 573b75a..8ea1c42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java @@ -22,6 +22,7 @@ import org.apache.calcite.plan.RelOptRuleOperand; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexLiteral; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; /** @@ -59,7 +60,7 @@ public boolean matches(RelOptRuleCall call) { // Finally, if we do not reduce the size input enough, we bail out int limit = RexLiteral.intValue(sortLimit.fetch); - Double rowCount = RelMetadataQuery.instance().getRowCount(sortLimit.getInput()); + Double rowCount = HiveRelMetadataQueryProvider.get().getRowCount(sortLimit.getInput()); if (rowCount != null && limit <= reductionProportion * rowCount && rowCount - limit >= reductionTuples) { return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java index 04b94c3..94c79de 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java @@ -29,6 +29,7 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; @@ -80,7 +81,7 @@ public void onMatch(RelOptRuleCall call) { final int offset = sort.offset == null ? 0 : RexLiteral.intValue(sort.offset); for (RelNode input : union.getInputs()) { // If we do not reduce the input size, we bail out - if (RexLiteral.intValue(sort.fetch) + offset < RelMetadataQuery.instance().getRowCount(input)) { + if (RexLiteral.intValue(sort.fetch) + offset < HiveRelMetadataQueryProvider.get().getRowCount(input)) { finishPushSortPastUnion = false; // Here we do some query rewrite. We first get the new fetchRN, which is // a sum of offset and fetch. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionPullUpConstantsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionPullUpConstantsRule.java index 4f6c610..8e4ca84 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionPullUpConstantsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionPullUpConstantsRule.java @@ -44,6 +44,7 @@ import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableList; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; /** * Planner rule that pulls up constants through a Union operator. @@ -76,7 +77,7 @@ public void onMatch(RelOptRuleCall call) { } final RexBuilder rexBuilder = union.getCluster().getRexBuilder(); - final RelMetadataQuery mq = RelMetadataQuery.instance(); + final RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); final RelOptPredicateList predicates = mq.getPulledUpPredicates(union); if (predicates == null) { return; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index a25b58b..3a72b6d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -37,6 +37,7 @@ import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.util.ImmutableBitSet; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.plan.ColStatistics; @@ -48,7 +49,7 @@ protected FilterSelectivityEstimator(RelNode childRel) { super(true); this.childRel = childRel; - this.childCardinality = RelMetadataQuery.instance().getRowCount(childRel); + this.childCardinality = HiveRelMetadataQueryProvider.get().getRowCount(childRel); } public Double estimateSelectivity(RexNode predicate) { @@ -252,7 +253,7 @@ private Double getMaxNDV(RexCall call) { double tmpNDV; double maxNDV = 1.0; InputReferencedVisitor irv; - RelMetadataQuery mq = RelMetadataQuery.instance(); + RelMetadataQuery mq = HiveRelMetadataQueryProvider.get(); for (RexNode op : call.getOperands()) { if (op instanceof RexInputRef) { tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, mq, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 7bba80b..f8201b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -19,7 +19,9 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.WeakHashMap; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.hep.HepRelVertex; @@ -58,51 +60,76 @@ public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider .reflectiveSource(BuiltInMethod.ROW_COUNT.method, new HiveRelMdRowCount()); + protected Map memoMap = new WeakHashMap<>(); + protected HiveRelMdRowCount() { super(); } public Double getRowCount(Join join, RelMetadataQuery mq) { - PKFKRelationInfo pkfk = analyzeJoinForPKFK(join, mq); - if (pkfk != null) { - double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); - selectivity = Math.min(1.0, selectivity); - if (LOG.isDebugEnabled()) { - LOG.debug("Identified Primary - Foreign Key relation: {} {}",RelOptUtil.toString(join), pkfk); + Double ret = memoMap.get(join); + if (ret == null) { + PKFKRelationInfo pkfk = analyzeJoinForPKFK(join, mq); + if (pkfk != null) { + double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); + selectivity = Math.min(1.0, selectivity); + if (LOG.isDebugEnabled()) { + LOG.debug("Identified Primary - Foreign Key relation: {} {}",RelOptUtil.toString(join), pkfk); + } + return pkfk.fkInfo.rowCount * selectivity; + } + ret = join.getRows(); + if (ret != null) { + memoMap.put(join, ret); } - return pkfk.fkInfo.rowCount * selectivity; } - return join.getRows(); + return ret; } @Override public Double getRowCount(SemiJoin rel, RelMetadataQuery mq) { - PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, mq); - if (pkfk != null) { - double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); - selectivity = Math.min(1.0, selectivity); - if (LOG.isDebugEnabled()) { - LOG.debug("Identified Primary - Foreign Key relation: {} {}", RelOptUtil.toString(rel), pkfk); + Double ret = memoMap.get(rel); + if (ret == null) { + PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, mq); + if (pkfk != null) { + double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); + selectivity = Math.min(1.0, selectivity); + if (LOG.isDebugEnabled()) { + LOG.debug("Identified Primary - Foreign Key relation: {} {}", RelOptUtil.toString(rel), pkfk); + } + ret = pkfk.fkInfo.rowCount * selectivity; + } else { + ret = super.getRowCount(rel, mq); + } + if (ret != null) { + memoMap.put(rel, ret); } - return pkfk.fkInfo.rowCount * selectivity; } - return super.getRowCount(rel, mq); + return ret; } @Override public Double getRowCount(Sort rel, RelMetadataQuery mq) { - final Double rowCount = mq.getRowCount(rel.getInput()); - if (rowCount != null && rel.fetch != null) { - final int offset = rel.offset == null ? 0 : RexLiteral.intValue(rel.offset); - final int limit = RexLiteral.intValue(rel.fetch); - final Double offsetLimit = new Double(offset + limit); - // offsetLimit is smaller than rowCount of the input operator - // thus, we return the offsetLimit - if (offsetLimit < rowCount) { - return offsetLimit; + Double ret = memoMap.get(rel); + if (ret == null) { + final Double rowCount = mq.getRowCount(rel.getInput()); + if (rowCount != null && rel.fetch != null) { + final int offset = rel.offset == null ? 0 : RexLiteral.intValue(rel.offset); + final int limit = RexLiteral.intValue(rel.fetch); + final Double offsetLimit = new Double(offset + limit); + // offsetLimit is smaller than rowCount of the input operator + // thus, we return the offsetLimit + if (offsetLimit < rowCount) { + ret = offsetLimit; + } + } else { + ret = rowCount; + } + if (ret != null) { + return ret; } } - return rowCount; + return ret; } static class PKFKRelationInfo { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 721dac8..ac7b038 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -261,6 +261,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelMetadataQueryProvider; public class CalcitePlanner extends SemanticAnalyzer { @@ -288,11 +289,16 @@ public void resetCalciteConfiguration() { @Override @SuppressWarnings("nls") public void analyzeInternal(ASTNode ast) throws SemanticException { - if (runCBO) { - PreCboCtx cboCtx = new PreCboCtx(); - super.analyzeInternal(ast, cboCtx); - } else { - super.analyzeInternal(ast); + try { + if (runCBO) { + PreCboCtx cboCtx = new PreCboCtx(); + super.analyzeInternal(ast, cboCtx); + } else { + super.analyzeInternal(ast); + } + } finally { + // Cleanup any residual Calcite memoize references + HiveRelMetadataQueryProvider.get().map.clear(); } }