diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 17d9f2df0a..778c918d2e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -35,6 +35,7 @@ import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.collect.Sets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -277,7 +278,28 @@ public static long getNumRows(HiveConf conf, List schema, Table tabl } } - private static long getNumRows(HiveConf conf, List schema, List neededColumns, Table table, long ds) { + private static void estimateStatsForMissingCols(List neededColumns, List columnStats, + Table table, HiveConf conf, long nr, List schema) { + + Set neededCols = new HashSet<>(neededColumns); + Set colsWithStats = new HashSet<>(); + + for (ColStatistics cstats : columnStats) { + colsWithStats.add(cstats.getColumnName()); + } + + List missingColStats = new ArrayList(Sets.difference(neededCols, colsWithStats)); + + if(missingColStats.size() > 0) { + List estimatedColStats = estimateStats(table, schema, missingColStats, conf, nr); + for (ColStatistics estColStats : estimatedColStats) { + columnStats.add(estColStats); + } + } + } + + private static long getNumRows(HiveConf conf, List schema, List neededColumns, + Table table, long ds) { long nr = getNumRows(table); // number of rows -1 means that statistics from metastore is not reliable // and 0 means statistics gathering is disabled @@ -322,9 +344,13 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p List colStats = Lists.newArrayList(); if (fetchColStats) { colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache); - if(colStats == null || colStats.size() < 1) { - colStats = estimateStats(table,schema,neededColumns, conf, nr); + if(colStats == null) { + colStats = Lists.newArrayList(); } + estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, schema); + + // we should have stats for all columns (estimated or actual) + assert(neededColumns.size() == colStats.size()); long betterDS = getDataSizeFromColumnStats(nr, colStats); ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; } @@ -457,15 +483,11 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p aggrStats.getColStats() != null && aggrStats.getColStatsSize() != 0; if (neededColumns.size() == 0 || (neededColsToRetrieve.size() > 0 && !statsRetrieved)) { + estimateStatsForMissingCols(neededColsToRetrieve, columnStats, table, conf, nr, schema); // There are some partitions with no state (or we didn't fetch any state). // Update the stats with empty list to reflect that in the // state/initialize structures. - if(columnStats.isEmpty()) { - // estimate stats - columnStats = estimateStats(table, schema, neededColumns, conf, nr); - } - // add partition column stats addPartitionColumnStats(conf, partitionColsToRetrieve, schema, table, partList, columnStats); diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index f9a1eb8312..cd4b0ad3a6 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,7 +129,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: min(_col1) keys: _col0 (type: string), _col2 (type: bigint) @@ -743,30 +743,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index 31c4ed147f..49868793ca 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -196,30 +196,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: location - Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), votes (type: bigint) outputColumnNames: state, votes - Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: state (type: string), votes (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index cdb9cead4c..a73e34d724 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -559,19 +559,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: ((s_company_id > 0) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL TableScan alias: ss Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE @@ -595,10 +595,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out index dec7f402ad..7f5b83244a 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -132,11 +132,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc - Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 804 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 804 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select bo1 from alltypes_orc @@ -670,11 +670,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc - Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 804 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select i1 from (select i1 from alltypes_orc limit 10) temp diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out index 5d443f1381..ff7b403da3 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -139,11 +139,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 48 Data size: 8840 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 48 Data size: 8840 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select deptid from emp_orc diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index 88b5d8464f..a603cc6072 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sq1:loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,7 +129,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: state, locid, $f2 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: min(locid) keys: state (type: string), $f2 (type: bigint) @@ -767,30 +767,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, zip - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 8f9b6363f4..9d00ce6786 100644 --- a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -38,22 +38,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: f + alias: g Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: (value <> '') (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: false @@ -70,13 +70,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -129,16 +129,16 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f, $hdt$_1:m] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g, $hdt$_1:m] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -150,8 +150,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col3 - columns.types int,string + columns _col0,_col2 + columns.types string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -165,31 +165,31 @@ STAGE PLANS: TableScan GatherStats: false Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col2 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col3 (type: string) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col0 (type: int) + value expressions: _col0 (type: string) auto parallelism: false TableScan - alias: g + alias: f Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false @@ -203,8 +203,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col3 - columns.types int,string + columns _col0,_col2 + columns.types string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -213,8 +213,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col3 - columns.types int,string + columns _col0,_col2 + columns.types string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -266,7 +266,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:f] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -274,12 +274,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col5 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col5 (type: string) + expressions: _col5 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index e43f736a54..e5ba529d05 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -1804,34 +1804,36 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_27] (rows=292 width=10) - Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_25] (rows=266 width=10) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242 width=10) - Output:["_col0"] - Filter Operator [FIL_23] (rows=242 width=10) - predicate:key is not null - TableScan [TS_3] (rows=242 width=10) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242 width=10) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=242 width=10) - Output:["_col1"] - Filter Operator [FIL_24] (rows=242 width=10) - predicate:value is not null - TableScan [TS_6] (rows=242 width=10) - default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Select Operator [SEL_15] (rows=292 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_27] (rows=292 width=10) + Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_25] (rows=266 width=10) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] + <-Select Operator [SEL_5] (rows=242 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=242 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=242 width=10) + default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=242 width=10) + Output:["_col0"] + Filter Operator [FIL_22] (rows=242 width=10) + predicate:key is not null + TableScan [TS_0] (rows=242 width=10) + default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242 width=10) + Output:["_col1"] + Filter Operator [FIL_24] (rows=242 width=10) + predicate:value is not null + TableScan [TS_6] (rows=242 width=10) + default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: explain select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key diff --git a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out index 1bff9eab19..ecc2246b81 100644 --- a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out @@ -44,23 +44,23 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: f - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + alias: g + Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: true Execution mode: llap @@ -116,7 +116,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [f] + /filter_join_breaktask/ds=2008-04-08 [g] Map 4 Map Operator Tree: TableScan @@ -132,13 +132,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 4512 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 24 Data size: 4512 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -197,23 +197,23 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: g - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + alias: f + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: true Execution mode: llap @@ -269,7 +269,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [g] + /filter_join_breaktask/ds=2008-04-08 [f] Reducer 2 Execution mode: llap Needs Tagging: false @@ -278,19 +278,19 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Position of Big Table: 1 - Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Position of Big Table: 0 + Statistics: Num rows: 27 Data size: 5060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col2 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 27 Data size: 5060 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col0 (type: int) + value expressions: _col0 (type: string) auto parallelism: true Reducer 3 Execution mode: llap @@ -300,21 +300,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col5 Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col5 (type: string) + expressions: _col5 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out index 738424bb5b..c94002f37d 100644 --- a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out +++ b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out @@ -257,25 +257,25 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 PREHOOK: type: QUERY POSTHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: foo is not null (type: boolean) + predicate: ((bar = 2) and foo is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: foo (type: int) @@ -287,10 +287,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((bar = 2) and foo is not null) (type: boolean) + predicate: foo is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: foo (type: int) @@ -308,24 +308,23 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int) - TableScan alias: d Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -339,6 +338,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: int) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -346,10 +350,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col2 + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) + expressions: _col2 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out index 718346f83a..320a52ee3d 100644 --- a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out @@ -43,22 +43,22 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: f + alias: g Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: (value <> '') (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: false @@ -113,7 +113,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g] Map 4 Map Operator Tree: TableScan @@ -129,13 +129,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -192,22 +192,22 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: g + alias: f Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false @@ -262,7 +262,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:f] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -270,18 +270,18 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col2 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col3 (type: string) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col0 (type: int) + value expressions: _col0 (type: string) auto parallelism: false Reducer 3 Needs Tagging: true @@ -290,12 +290,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col5 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col5 (type: string) + expressions: _col5 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out index 54d1ce3b8e..546ae600ae 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out @@ -776,34 +776,36 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3) - Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_25] (rows=253/480 width=3) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=230/242 width=3) - Output:["_col0"] - Filter Operator [FIL_23] (rows=230/242 width=3) - predicate:key is not null - TableScan [TS_3] (rows=242/242 width=3) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=218/242 width=179) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=218/242 width=179) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=179) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=230/242 width=175) - Output:["_col1"] - Filter Operator [FIL_24] (rows=230/242 width=175) - predicate:value is not null - TableScan [TS_6] (rows=242/242 width=175) - default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Select Operator [SEL_15] (rows=278/1166 width=3) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3) + Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_25] (rows=253/480 width=3) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] + <-Select Operator [SEL_5] (rows=218/242 width=179) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=218/242 width=179) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=242/242 width=179) + default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=230/242 width=3) + Output:["_col0"] + Filter Operator [FIL_22] (rows=230/242 width=3) + predicate:key is not null + TableScan [TS_0] (rows=242/242 width=3) + default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=230/242 width=175) + Output:["_col1"] + Filter Operator [FIL_24] (rows=230/242 width=175) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=175) + default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key UNION ALL