diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index c1ab64c90f..653a3c1170 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -61,7 +61,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdPredicates; -import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMaxRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRuntimeRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity; @@ -84,7 +83,6 @@ new HiveRelMdCost(HiveDefaultCostModel.getCostModel()).getMetadataProvider(), HiveRelMdSelectivity.SOURCE, HiveRelMdRuntimeRowCount.SOURCE, - HiveRelMdMaxRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, HiveRelMdSize.SOURCE, @@ -156,7 +154,6 @@ private RelMetadataProvider init(HiveConf hiveConf) { new HiveRelMdCost(HiveOnTezCostModel.getCostModel(hiveConf)).getMetadataProvider(), HiveRelMdSelectivity.SOURCE, HiveRelMdRowCount.SOURCE, - HiveRelMdMaxRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, HiveRelMdSize.SOURCE, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitRemoveRule.java index 858aa1a9ec..0867add92f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitRemoveRule.java @@ -19,6 +19,7 @@ import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rex.RexLiteral; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; @@ -41,8 +42,17 @@ public boolean matches(RelOptRuleCall call) { final HiveSortLimit sortLimit = call.rel(0); Double maxRowCount = call.getMetadataQuery().getMaxRowCount(sortLimit.getInput()); - if (maxRowCount != null &&(maxRowCount <= 1)) { - return true; + if (maxRowCount != null) { + if(sortLimit.getFetchExpr() != null) { + // we have LIMIT + int limit = RexLiteral.intValue(sortLimit.getFetchExpr()); + if(maxRowCount <= limit) { + return true; + } + } else if(maxRowCount <= 1){ + // No LIMIT only ORDER BY + return true; + } } return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java deleted file mode 100644 index b45d7652a6..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* TODO: CALC-2991 created some optimizations. This file bypasses - the change for now (see HIVE-22408) -*/ -package org.apache.hadoop.hive.ql.optimizer.calcite.stats; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.calcite.plan.RelOptCost; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; -import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; -import org.apache.calcite.rel.metadata.RelMdDistinctRowCount; -import org.apache.calcite.rel.metadata.RelMdUtil; -import org.apache.calcite.rel.metadata.RelMetadataProvider; -import org.apache.calcite.rel.metadata.RelMdMaxRowCount; -import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.util.BuiltInMethod; -import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.NumberUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; -import org.apache.hadoop.hive.ql.plan.ColStatistics; - -import com.google.common.collect.ImmutableList; - -public class HiveRelMdMaxRowCount extends RelMdMaxRowCount { - - private static final HiveRelMdMaxRowCount INSTANCE = - new HiveRelMdMaxRowCount(); - - public static final RelMetadataProvider SOURCE = - ChainedRelMetadataProvider.of( - ImmutableList.of( - ReflectiveRelMetadataProvider.reflectiveSource( - BuiltInMethod.MAX_ROW_COUNT.method, new HiveRelMdMaxRowCount()), - RelMdMaxRowCount.SOURCE)); - - private HiveRelMdMaxRowCount() { - super(); - } - - @Override - public Double getMaxRowCount(Aggregate rel, RelMetadataQuery mq) { - if (rel.getGroupSet().isEmpty()) { - // Aggregate with no GROUP BY always returns 1 row (even on empty table). - return 1D; - } - - final Double rowCount = mq.getMaxRowCount(rel.getInput()); - if (rowCount == null) { - return null; - } - return rowCount * rel.getGroupSets().size(); - } - -} diff --git a/ql/src/test/queries/clientpositive/cbo_limit.q b/ql/src/test/queries/clientpositive/cbo_limit.q index d0b1bc9351..336f9e246e 100644 --- a/ql/src/test/queries/clientpositive/cbo_limit.q +++ b/ql/src/test/queries/clientpositive/cbo_limit.q @@ -34,6 +34,9 @@ select count(*) cs from cbo_t1 where c_int > 1 LIMIT 100; explain cbo select c_int from (select c_int from cbo_t1 where c_float > 1.0 limit 1) subq where c_int > 1 order by c_int; select c_int from (select c_int from cbo_t1 where c_float > 1.0 limit 1) subq where c_int > 1 order by c_int; +explain cbo select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0; +select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0; + -- prune un-necessary aggregates explain cbo select count(*) from cbo_t1 order by sum(c_int), count(*); select count(*) from cbo_t1 order by sum(c_int), count(*); diff --git a/ql/src/test/results/clientpositive/llap/cbo_limit.q.out b/ql/src/test/results/clientpositive/llap/cbo_limit.q.out index ee62b31ba4..ab53e94df3 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_limit.q.out @@ -192,6 +192,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### +PREHOOK: query: explain cbo select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(fetch=[0]) + HiveProject(_o__c0=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject($f0=[true]) + HiveFilter(condition=[>($3, 1.0E0)]) + HiveTableScan(table=[[default, cbo_t1]], table:alias=[cbo_t1]) + +PREHOOK: query: select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### PREHOOK: query: explain cbo select count(*) from cbo_t1 order by sum(c_int), count(*) PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1