From cf1048e919b58f8d226eae34b222756175e0d59b Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 28 Jul 2016 17:13:22 -0700 Subject: [PATCH] HIVE-14378 : Data size may be estimated as 0 if no columns are being projected after an operator --- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 13 +++- .../clientpositive/annotate_stats_part.q.out | 20 +++--- .../results/clientpositive/cbo_rp_auto_join1.q.out | 4 +- .../columnStatsUpdateForStatsOptimizer_1.q.out | 4 +- .../results/clientpositive/stats_only_null.q.out | 4 +- .../clientpositive/stats_partial_size.q.out | 4 +- .../tez/vector_left_outer_join.q.out | 28 ++++---- .../clientpositive/tez/vector_outer_join3.q.out | 84 +++++++++++----------- .../clientpositive/tez/vector_outer_join4.q.out | 56 +++++++-------- 9 files changed, 112 insertions(+), 105 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 7a15904..7458cab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -197,7 +197,7 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa if (fetchColStats) { colStats = getTableColumnStats(table, schema, neededColumns); long betterDS = getDataSizeFromColumnStats(nr, colStats); - ds = betterDS < 1 ? ds : betterDS; + ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; } stats.setDataSize(ds); // infer if any column can be primary key based on column statistics @@ -298,7 +298,7 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa addParitionColumnStats(conf, neededColumns, referencedColumns, schema, table, partList, columnStats); long betterDS = getDataSizeFromColumnStats(nr, columnStats); - stats.setDataSize(betterDS < 1 ? ds : betterDS); + stats.setDataSize((betterDS < 1 || columnStats.isEmpty()) ? ds : betterDS); // infer if any column can be primary key based on column statistics inferAndSetPrimaryKey(stats.getNumRows(), columnStats); @@ -1449,10 +1449,17 @@ public static long getBasicStatForTable(Table table, String statType) { public static long getDataSizeFromColumnStats(long numRows, List colStats) { long result = 0; - if (numRows <= 0 || colStats == null || colStats.isEmpty()) { + if (numRows <= 0 || colStats == null) { return result; } + if (colStats.isEmpty()) { + // this may happen if we are not projecting any column from current operator + // think count(*) where we are projecting rows without any columns + // in such a case we estimate empty row to be of size of empty java object. + return numRows * JavaDataModel.JAVA64_REF; + } + for (ColStatistics cs : colStats) { if (cs != null) { String colTypeLowerCase = cs.getColumnType().toLowerCase(); diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out index 50fc633..77fbd3a 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -133,11 +133,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 372 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 372 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE @@ -179,11 +179,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 734 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '2001' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 734 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- partition level analyze statistics for all partitions @@ -216,11 +216,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -325,11 +325,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 838 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: zip (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 838 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL @@ -442,11 +442,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index 934eb5d..89bc7af 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -301,9 +301,9 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Statistics: Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_1.q.out b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_1.q.out index 00f3776..56a5184 100644 --- a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_1.q.out +++ b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_1.q.out @@ -661,11 +661,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: calendarp - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: year (type: int) outputColumnNames: year - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(year) mode: hash diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out index b47fe64..88c2114 100644 --- a/ql/src/test/results/clientpositive/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/stats_only_null.q.out @@ -132,11 +132,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: stats_null_part - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(a), count(b), count(c), count(d) mode: hash diff --git a/ql/src/test/results/clientpositive/stats_partial_size.q.out b/ql/src/test/results/clientpositive/stats_partial_size.q.out index ee9040d..8ee4b11 100644 --- a/ql/src/test/results/clientpositive/stats_partial_size.q.out +++ b/ql/src/test/results/clientpositive/stats_partial_size.q.out @@ -83,10 +83,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out index 01e3040..21e50d3 100644 --- a/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out @@ -33,11 +33,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 73384 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 73384 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -47,7 +47,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26150 Data size: 104600 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -56,46 +56,46 @@ STAGE PLANS: 1 _col0 (type: tinyint) input vertices: 1 Map 4 - Statistics: Num rows: 14867 Data size: 3196776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3418417 Data size: 27347336 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized Map 3 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 36692 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 36692 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 36692 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 36692 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 36692 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 36692 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 2 Execution mode: vectorized @@ -104,10 +104,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/tez/vector_outer_join3.q.out index 7d19da9..115a5ee 100644 --- a/ql/src/test/results/clientpositive/tez/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_outer_join3.q.out @@ -261,11 +261,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -275,7 +275,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 5024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Map Join Operator condition map: @@ -285,47 +285,47 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 4 - Statistics: Num rows: 24 Data size: 5526 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized Map 3 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 2 Execution mode: vectorized @@ -334,10 +334,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -409,11 +409,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 2850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cstring2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 2850 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -423,7 +423,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 5024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 1869 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Map Join Operator condition map: @@ -433,47 +433,47 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 4 - Statistics: Num rows: 24 Data size: 5526 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized Map 3 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 2 Execution mode: vectorized @@ -482,10 +482,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -557,11 +557,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3050 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3050 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -571,7 +571,7 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 5024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Map Join Operator condition map: @@ -581,47 +581,47 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: string) input vertices: 1 Map 4 - Statistics: Num rows: 24 Data size: 5526 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized Map 3 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cstring2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 20 Data size: 4568 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 2 Execution mode: vectorized @@ -630,10 +630,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out index 8ae89e8..af80b0d 100644 --- a/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out @@ -274,11 +274,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 6940 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 6940 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -288,11 +288,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 - Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 81 Data size: 49734 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 81 Data size: 49734 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -302,16 +302,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 6940 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 6940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 6940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized @@ -421,11 +421,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -435,11 +435,11 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 2 - Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 450 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 450 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -449,16 +449,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Stage: Stage-0 @@ -930,11 +930,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -944,7 +944,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 81 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Map Join Operator condition map: @@ -954,47 +954,47 @@ STAGE PLANS: 1 _col0 (type: tinyint) input vertices: 1 Map 4 - Statistics: Num rows: 36 Data size: 8273 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1215 Data size: 9720 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized Map 3 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 2 Execution mode: vectorized @@ -1003,10 +1003,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -- 1.7.12.4 (Apple Git-37)