diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 1feb1fd..ddd4971 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -150,25 +150,20 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Statistics parentStats = parent.getStatistics(); AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; HiveConf conf = aspCtx.getConf(); + Statistics stats = null; - // SELECT (*) does not change the statistics. Just pass on the parent statistics - if (sop.getConf().isSelectStar()) { + if (parentStats != null) { try { - if (parentStats != null) { - sop.setStatistics(parentStats.clone()); - } + stats = parentStats.clone(); } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } - return null; } try { if (satisfyPrecondition(parentStats)) { - Statistics stats = parentStats.clone(); - List colStats = - StatsUtils.getColStatisticsFromExprMap(conf, parentStats, sop.getColumnExprMap(), - sop.getSchema()); + List colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, + sop.getColumnExprMap(), sop.getSchema()); long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats); stats.setColumnStats(colStats); stats.setDataSize(setMaxIfInvalid(dataSize)); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index d42ede4..87dbe6f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1020,6 +1020,15 @@ public static long getWritableSize(ObjectInspector oi, Object value) { } cs.add(colStat); } + + return cs; + } + + // In cases where column expression map or row schema is missing, just pass on the parent column + // stats. This could happen in cases like TS -> FIL where FIL does not map input column names to + // internal names. + if (colExprMap == null || rowSchema == null) { + cs.addAll(parentStats.getColumnStats()); } return cs; } diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 70df189..e8cd06d 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -264,10 +264,10 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), null (type: void), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -417,7 +417,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 @@ -480,7 +480,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 8 rawDataSize: 804 @@ -541,7 +541,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 @@ -723,10 +723,10 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), null (type: void) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index 1459b44..34f723c 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -158,7 +158,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() keys: state (type: string), locid (type: int) @@ -260,7 +260,7 @@ STAGE PLANS: Select Operator expressions: year (type: int) outputColumnNames: year - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: year (type: int) mode: hash @@ -317,7 +317,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), locid (type: int) mode: hash @@ -374,7 +374,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), locid (type: int), '0' (type: string) mode: hash @@ -431,7 +431,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), locid (type: int), '0' (type: string) mode: hash @@ -488,7 +488,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), locid (type: int), '0' (type: string) mode: hash @@ -545,7 +545,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), locid (type: int), '0' (type: string) mode: hash @@ -602,7 +602,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), locid (type: int), '0' (type: string) mode: hash @@ -659,7 +659,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), locid (type: int), '0' (type: string) mode: hash @@ -720,7 +720,7 @@ STAGE PLANS: Select Operator expressions: year (type: int) outputColumnNames: year - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: year (type: int) mode: hash @@ -777,7 +777,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), locid (type: int), '0' (type: string) mode: hash @@ -836,7 +836,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: state (type: string), zip (type: bigint) mode: hash diff --git ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index f991191..fbd0fb3 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -210,7 +210,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), country (type: string) outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), country (type: string) mode: hash @@ -269,7 +269,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), votes (type: bigint) outputColumnNames: state, votes - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: state (type: string), votes (type: bigint) mode: hash @@ -326,7 +326,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), country (type: string) outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), country (type: string), '0' (type: string) mode: hash @@ -383,7 +383,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), country (type: string) outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), country (type: string) mode: hash @@ -440,7 +440,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), country (type: string) outputColumnNames: state, country - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), country (type: string), '0' (type: string) mode: hash diff --git ql/src/test/results/clientpositive/annotate_stats_limit.q.out ql/src/test/results/clientpositive/annotate_stats_limit.q.out index b61a597..5f8b6f8 100644 --- ql/src/test/results/clientpositive/annotate_stats_limit.q.out +++ ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -80,7 +80,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 4 rawDataSize: 396 @@ -103,7 +103,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 4 Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE @@ -131,10 +131,10 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 16 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 @@ -157,7 +157,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index b767a32..3d705d4 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -102,7 +102,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- partition level analyze statistics for specific parition @@ -162,7 +162,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 9 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -249,7 +249,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -272,7 +272,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- both partitions will be pruned @@ -475,7 +475,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 2192 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- This is to test filter expression evaluation on partition column @@ -575,10 +575,10 @@ STAGE PLANS: Select Operator expressions: locid (type: int), '2001' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out index 8ef4964..61490de 100644 --- ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -142,7 +142,7 @@ STAGE PLANS: Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 8 @@ -708,7 +708,7 @@ STAGE PLANS: alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -762,7 +762,7 @@ STAGE PLANS: alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count(1) mode: hash diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out index eed5daa..c91a353 100644 --- ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -159,7 +159,7 @@ STAGE PLANS: Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- all selected columns have statistics @@ -217,7 +217,7 @@ STAGE PLANS: Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4560 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out index e0e1504..f4dca28 100644 --- ql/src/test/results/clientpositive/annotate_stats_union.q.out +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -164,7 +164,7 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 16 rawDataSize: 1592 @@ -187,16 +187,16 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -207,16 +207,16 @@ STAGE PLANS: Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1608 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat