diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 32fba6c8ff..6ab8d701dd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1382,7 +1382,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } else { // Case 3: column stats, hash aggregation, NO grouping sets cardinality = Math.min(parentNumRows/2, StatsUtils.safeMult(ndvProduct, parallelism)); - long orgParentNumRows = getParentNumRows(gop, gop.getConf().getKeys(), conf); + long orgParentNumRows = StatsUtils.safeMult(getParentNumRows(gop, gop.getConf().getKeys(), conf) , + parallelism); cardinality = Math.min(cardinality, orgParentNumRows); if (LOG.isDebugEnabled()) { @@ -1410,7 +1411,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // in reduce side GBY, we don't know if the grouping set was present or not. so get it // from map side GBY - GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class); + GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstreamJoinAccounted(parent, GroupByOperator.class); if (mGop != null) { containsGroupingSet = mGop.getConf().isGroupingSetsPresent(); } @@ -1425,7 +1426,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } else { // Case 9: column stats, NO grouping sets - cardinality = Math.min(parentNumRows, ndvProduct); + // to get to the source number of rows we should be using original group by + long orgParentNumRows = getParentNumRows(mGop, mGop.getConf().getKeys(), conf) ; + cardinality = Math.min(orgParentNumRows, ndvProduct); if (LOG.isDebugEnabled()) { LOG.debug("[Case 9] STATS-" + gop.toString() + ": cardinality: " + cardinality);