diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 8bba7b6..ba6e7ed 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1589,6 +1589,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (satisfyPrecondition(parentStats)) { Statistics stats = parentStats.clone(); + List colStats = StatsUtils.getColStatisticsFromRowSchema( + parentStats, lop.getSchema()); + stats.setColumnStats(colStats); // if limit is greater than available rows then do not update // statistics diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 30f63a2..892da5f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -20,7 +20,6 @@ import com.google.common.base.Joiner; import com.google.common.collect.Lists; -import com.google.common.math.DoubleMath; import com.google.common.math.LongMath; import org.apache.commons.logging.Log; @@ -1020,8 +1019,6 @@ public static long getWritableSize(ObjectInspector oi, Object value) { if (colStat != null) { colStat.setColumnName(outColName); colStat.setTableAlias(outTabAlias); - } - if (colStat != null) { cs.add(colStat); } } @@ -1041,6 +1038,39 @@ public static long getWritableSize(ObjectInspector oi, Object value) { } /** + * Get column statistics from parent statistics given the + * row schema of its child. + * @param parentStats + * - parent statistics + * @param rowSchema + * - row schema + * @return column statistics + */ + public static List getColStatisticsFromRowSchema( + Statistics parentStats, RowSchema rowSchema) { + + List cs = Lists.newArrayList(); + + for (ColumnInfo ci : rowSchema.getSignature()) { + String outColName = ci.getInternalName(); + ColStatistics colStat = parentStats.getColumnStatisticsFromColName(outColName); + if (colStat != null) { + try { + colStat = colStat.clone(); + } catch (CloneNotSupportedException e) { + colStat = null; + } + } + if (colStat != null) { + colStat.setTableAlias(ci.getTabAlias()); + cs.add(colStat); + } + } + + return cs; + } + + /** * Get column statistics expression nodes * @param conf * - hive conf diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out index 8984d02..a4c8c41 100644 --- ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -1062,17 +1062,17 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), 11.0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat