diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index d8acf94..7a15904 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -283,8 +283,8 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa // add partition column stats addParitionColumnStats(conf, neededColumns, referencedColumns, schema, table, partList, emptyStats); - stats.addToColumnStats(emptyStats); + stats.addToDataSize(getDataSizeFromColumnStats(nr, emptyStats)); stats.updateColumnStatsState(deriveStatType(emptyStats, referencedColumns)); } else { List colStats = aggrStats.getColStats(); diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index 131cf6a..df42f36 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -56,11 +56,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging @@ -98,7 +98,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 6 Data size: 780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 1884 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -156,11 +156,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 7 Data size: 1966 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -239,7 +239,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 2246 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -262,7 +262,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 2246 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -287,14 +287,14 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: ((year = '2001') and (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 110 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- partition level partial column statistics @@ -371,7 +371,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2246 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: year (type: string) outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/stats_partial_size.q.out ql/src/test/results/clientpositive/stats_partial_size.q.out index 31adec7..ee9040d 100644 --- ql/src/test/results/clientpositive/stats_partial_size.q.out +++ ql/src/test/results/clientpositive/stats_partial_size.q.out @@ -49,7 +49,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sample_partitioned - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: x (type: int), y (type: int) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/stats_ppr_all.q.out ql/src/test/results/clientpositive/stats_ppr_all.q.out index d8da399..de6cb31 100644 --- ql/src/test/results/clientpositive/stats_ppr_all.q.out +++ ql/src/test/results/clientpositive/stats_ppr_all.q.out @@ -122,22 +122,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: ss - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (UDFToDouble((((year * 10000) + (month * 100)) + day)) = 2015010.0) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: order_amount (type: float) outputColumnNames: order_amount - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: sum(order_amount) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: double) Reduce Operator Tree: Group By Operator @@ -272,7 +272,7 @@ STAGE PLANS: Processor Tree: TableScan alias: ss - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (UDFToDouble(((201500 + (month * 10)) + day)) > 201511.0) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE