diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/CountDistinctRewriteProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/CountDistinctRewriteProc.java index 32edacba7c..44077dbebc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/CountDistinctRewriteProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/CountDistinctRewriteProc.java @@ -141,8 +141,8 @@ public CountDistinctProcessor(ParseContext pGraphContext) { // Check if we can process it or not by the index of distinct protected int checkCountDistinct(GroupByOperator mGby, ReduceSinkOperator rs, GroupByOperator rGby) { - // Position of distinct column in aggregator list of map Gby before rewrite. - int indexOfDist = -1; + int indexOfDist = -1; // Position of distinct column in aggregator list of map Gby before rewrite. + boolean isNondistinctCountUsed = false; List keys = mGby.getConf().getKeys(); if (!(mGby.getConf().getMode() == GroupByDesc.Mode.HASH && !mGby.getConf().isGroupingSetsPresent() && rs.getConf().getKeyCols().size() == 1 @@ -174,10 +174,14 @@ protected int checkCountDistinct(GroupByOperator mGby, ReduceSinkOperator rs, return -1; } } + } else if (aggr.getGenericUDAFName().equalsIgnoreCase("count")) { + isNondistinctCountUsed = true; } } if (indexOfDist == -1) { return -1; + } else if (isNondistinctCountUsed) { + pGraphContext.setSkipGroupByReduceDeduplication(true); } // check if it is potential to trigger nullscan if (pGraphContext.getConf().getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index ce44bd38ef..1779b1f53f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -69,6 +69,10 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { pGraphContext = pctx; + if (pctx.getSkipGroupByReduceDeduplication()) { + return pctx; + } + // generate pruned column list for all relevant operators ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 6c363750c5..20e5daa4a0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -143,6 +143,8 @@ private boolean disableMapJoin; private Multimap, ReduceSinkOperator> terminalOpToRSMap; + private boolean skipGroupByReduceDeduplication; + public ParseContext() { } @@ -717,4 +719,12 @@ public void setTerminalOpToRSMap(Multimap, ReduceSinkOperato public Multimap, ReduceSinkOperator> getTerminalOpToRSMap() { return terminalOpToRSMap; } + + public boolean getSkipGroupByReduceDeduplication() { + return skipGroupByReduceDeduplication; + } + + public void setSkipGroupByReduceDeduplication(boolean skipGroupByReduceDeduplication) { + this.skipGroupByReduceDeduplication = skipGroupByReduceDeduplication; + } }