diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 9d78048..6622b5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -826,6 +826,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Map joinedColStats = Maps.newHashMap(); Map> joinKeys = Maps.newHashMap(); + List rowCounts = Lists.newArrayList(); // get the join keys from parent ReduceSink operators for (int pos = 0; pos < parents.size(); pos++) { @@ -845,6 +846,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (String tabAlias : tableAliases) { rowCountParents.put(tabAlias, parentStats.getNumRows()); } + rowCounts.add(parentStats.getNumRows()); // multi-attribute join key if (keyExprs.size() > 1) { @@ -945,8 +947,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // update join statistics stats.setColumnStats(outColStats); - long newRowCount = computeNewRowCount( - Lists.newArrayList(rowCountParents.values()), denom); + long newRowCount = computeNewRowCount(rowCounts, denom); updateStatsForJoinType(stats, newRowCount, jop, rowCountParents, outInTabAlias); diff --git ql/src/test/results/clientpositive/union20.q.out ql/src/test/results/clientpositive/union20.q.out index 631d158..98a99df 100644 --- ql/src/test/results/clientpositive/union20.q.out +++ ql/src/test/results/clientpositive/union20.q.out @@ -132,14 +132,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 36 Data size: 9792 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 36 Data size: 9792 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1632 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 36 Data size: 9792 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat