From 01b39c7e343fe01a214efaabd70a23a15c7ad8e6 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Sun, 1 Nov 2015 11:33:00 -0800 Subject: [PATCH] HIVE-12313 : Turn hive.optimize.union.remove on by default --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../apache/hadoop/hive/ql/exec/FetchOperator.java | 3 +- .../results/clientpositive/dynamic_rdd_cache.q.out | 116 +- .../results/clientpositive/groupby_sort_1_23.q.out | 404 +++---- .../clientpositive/groupby_sort_skew_1_23.q.out | 534 ++++----- ql/src/test/results/clientpositive/input25.q.out | 39 +- ql/src/test/results/clientpositive/input26.q.out | 59 +- ql/src/test/results/clientpositive/input41.q.out | 2 - ql/src/test/results/clientpositive/lineage1.q.out | 83 +- .../llap/dynamic_partition_pruning_2.q.out | 22 +- .../results/clientpositive/llap/tez_union.q.out | 35 +- .../results/clientpositive/load_dyn_part14.q.out | 101 +- .../results/clientpositive/multiMapJoin2.q.out | 133 ++- .../results/clientpositive/optimize_nullscan.q.out | 351 ++---- .../results/clientpositive/ppd_union_view.q.out | 420 +++---- .../results/clientpositive/quotedid_skew.q.out | 45 +- .../results/clientpositive/skewjoin_mapjoin1.q.out | 162 +-- .../clientpositive/skewjoin_mapjoin10.q.out | 162 +-- .../clientpositive/skewjoin_mapjoin11.q.out | 81 +- .../results/clientpositive/skewjoin_mapjoin2.q.out | 126 +-- .../results/clientpositive/skewjoin_mapjoin3.q.out | 81 +- .../results/clientpositive/skewjoin_mapjoin4.q.out | 105 +- .../results/clientpositive/skewjoin_mapjoin7.q.out | 202 ++-- .../test/results/clientpositive/skewjoinopt1.q.out | 90 +- .../results/clientpositive/skewjoinopt11.q.out | 80 +- .../results/clientpositive/skewjoinopt12.q.out | 45 +- .../results/clientpositive/skewjoinopt15.q.out | 90 +- .../results/clientpositive/skewjoinopt16.q.out | 45 +- .../results/clientpositive/skewjoinopt17.q.out | 90 +- .../results/clientpositive/skewjoinopt19.q.out | 45 +- .../test/results/clientpositive/skewjoinopt2.q.out | 90 +- .../results/clientpositive/skewjoinopt20.q.out | 45 +- .../test/results/clientpositive/skewjoinopt3.q.out | 90 +- .../test/results/clientpositive/skewjoinopt4.q.out | 90 +- .../test/results/clientpositive/skewjoinopt5.q.out | 45 +- .../test/results/clientpositive/skewjoinopt6.q.out | 45 +- .../test/results/clientpositive/skewjoinopt7.q.out | 45 +- .../test/results/clientpositive/skewjoinopt8.q.out | 45 +- ql/src/test/results/clientpositive/stats1.q.out | 97 +- .../tez/dynamic_partition_pruning_2.q.out | 22 +- .../results/clientpositive/tez/explainuser_2.q.out | 512 ++++----- .../clientpositive/tez/optimize_nullscan.q.out | 48 +- .../results/clientpositive/tez/tez_union.q.out | 35 +- .../test/results/clientpositive/tez/union4.q.out | 58 +- .../test/results/clientpositive/tez/union6.q.out | 11 +- ql/src/test/results/clientpositive/union10.q.out | 119 +- ql/src/test/results/clientpositive/union12.q.out | 119 +- ql/src/test/results/clientpositive/union22.q.out | 382 +++---- ql/src/test/results/clientpositive/union24.q.out | 1176 +++++++++----------- ql/src/test/results/clientpositive/union28.q.out | 198 ++-- ql/src/test/results/clientpositive/union30.q.out | 237 ++-- ql/src/test/results/clientpositive/union32.q.out | 204 ++-- ql/src/test/results/clientpositive/union33.q.out | 276 +++-- ql/src/test/results/clientpositive/union4.q.out | 89 +- ql/src/test/results/clientpositive/union6.q.out | 111 +- .../results/clientpositive/union_top_level.q.out | 390 ++----- .../clientpositive/vector_multi_insert.q.out | 5 +- 57 files changed, 3308 insertions(+), 5029 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 7a8517b..2def618 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1298,7 +1298,7 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { // whether to optimize union followed by select followed by filesink // It creates sub-directories in the final output, so should not be turned on in systems // where MAPREDUCE-1501 is not present - HIVE_OPTIMIZE_UNION_REMOVE("hive.optimize.union.remove", false, + HIVE_OPTIMIZE_UNION_REMOVE("hive.optimize.union.remove", true, "Whether to remove the union and push the operators between union and the filesink above union. \n" + "This avoids an extra scan of the output by union. This is independently useful for union\n" + "queries, and specially useful when hive.optimize.skewjoin.compiletime is set to true, since an\n" + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 157115b..5bb618b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; @@ -351,7 +352,7 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException // not using FileInputFormat.setInputPaths() here because it forces a connection to the // default file system - which may or may not be online during pure metadata operations job.set("mapred.input.dir", StringUtils.escapeString(currPath.toString())); - + job.setBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, true); // Fetch operator is not vectorized and as such turn vectorization flag off so that // non-vectorized record reader is created below. HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); diff --git a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out index 501a86f..58bf26f 100644 --- a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out @@ -405,16 +405,15 @@ INSERT OVERWRITE TABLE tmptable POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1, Stage-8, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage Stage-9 is a root stage - Stage-10 is a root stage STAGE PLANS: Stage: Stage-1 @@ -441,69 +440,22 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst1' (type: string), _col0 (type: bigint) + expressions: 'tst1' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: true + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -519,10 +471,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -534,7 +486,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -546,13 +498,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -576,17 +528,19 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst2' (type: string), _col0 (type: bigint) + expressions: 'tst2' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: true + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -610,15 +564,17 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst3' (type: string), _col0 (type: bigint) + expressions: 'tst3' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: true + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index d0644f8..cdb054b 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -3105,122 +3105,18 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-2 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-8 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage STAGE PLANS: - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -3238,110 +3134,45 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) + expressions: UDFToInteger(UDFToDouble(_col0)) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + name default.outputtbl1 + numFiles 1 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3390,12 +3221,11 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] -#### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -3428,11 +3258,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -3516,7 +3346,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -3600,12 +3430,134 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: t1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,val + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.t1 + numFiles 2 + numRows 6 + rawDataSize 24 + serialization.ddl struct t1 { string key, string val} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 30 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,val + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.t1 + numFiles 2 + numRows 6 + rawDataSize 24 + serialization.ddl struct t1 { string key, string val} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 30 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + name: default.t1 + Truncated Path -> Alias: + /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key @@ -3634,16 +3586,6 @@ POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -1 1 -14 1 -16 2 -2 1 -2 1 -3 1 -4 1 -6 1 -7 1 -8 2 PREHOOK: query: -- group by followed by a join EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 @@ -3883,13 +3825,13 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 32 + numFiles 0 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 + totalSize 0 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 @@ -3913,13 +3855,13 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 32 + numFiles 0 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 + totalSize 0 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index a0c7cc0..8018e47 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -3365,187 +3365,19 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-10 depends on stages: Stage-9 - Stage-2 depends on stages: Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage + Stage-9 depends on stages: Stage-8 STAGE PLANS: - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: double) - mode: partials - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: double) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -3563,110 +3395,45 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) + expressions: UDFToInteger(UDFToDouble(_col0)) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + name default.outputtbl1 + numFiles 1 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3715,12 +3482,11 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] -#### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -3753,11 +3519,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -3841,7 +3607,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -3925,12 +3691,198 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: t1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,val + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.t1 + numFiles 2 + numRows 6 + rawDataSize 24 + serialization.ddl struct t1 { string key, string val} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 30 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,val + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.t1 + numFiles 2 + numRows 6 + rawDataSize 24 + serialization.ddl struct t1 { string key, string val} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 30 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + name: default.t1 + Truncated Path -> Alias: + /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types double,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types double,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types double,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key @@ -3959,16 +3911,6 @@ POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -1 1 -14 1 -16 2 -2 1 -2 1 -3 1 -4 1 -6 1 -7 1 -8 2 PREHOOK: query: -- group by followed by a join EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 @@ -4208,13 +4150,13 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 32 + numFiles 0 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 + totalSize 0 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 @@ -4238,13 +4180,13 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 32 + numFiles 0 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 + totalSize 0 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 diff --git a/ql/src/test/results/clientpositive/input25.q.out b/ql/src/test/results/clientpositive/input25.q.out index d0a97fa..82e1015 100644 --- a/ql/src/test/results/clientpositive/input25.q.out +++ b/ql/src/test/results/clientpositive/input25.q.out @@ -36,9 +36,8 @@ select * from ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -72,36 +71,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -132,10 +108,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/input26.q.out b/ql/src/test/results/clientpositive/input26.q.out index b917b86..272b71e 100644 --- a/ql/src/test/results/clientpositive/input26.q.out +++ b/ql/src/test/results/clientpositive/input26.q.out @@ -14,9 +14,8 @@ select * from ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -43,52 +42,21 @@ STAGE PLANS: Number of rows: 5 Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col3 + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - Union - Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -114,15 +82,16 @@ STAGE PLANS: Number of rows: 5 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), '14' (type: string) - outputColumnNames: _col0, _col1, _col3 + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '14' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/input41.q.out b/ql/src/test/results/clientpositive/input41.q.out index 7cc1007..158c2a0 100644 --- a/ql/src/test/results/clientpositive/input41.q.out +++ b/ql/src/test/results/clientpositive/input41.q.out @@ -35,5 +35,3 @@ POSTHOOK: query: select * from dest_sp x order by x.cnt limit 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_sp #### A masked pattern was here #### -0 -500 diff --git a/ql/src/test/results/clientpositive/lineage1.q.out b/ql/src/test/results/clientpositive/lineage1.q.out index a655c6c..62589f0 100644 --- a/ql/src/test/results/clientpositive/lineage1.q.out +++ b/ql/src/test/results/clientpositive/lineage1.q.out @@ -34,15 +34,14 @@ FROM (SELECT t1.key, p1.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-10 is a root stage + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-9 is a root stage STAGE PLANS: Stage: Stage-1 @@ -83,54 +82,22 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_l1 - TableScan - Union - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_l1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_l1 - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -146,10 +113,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -161,7 +128,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -173,13 +140,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -217,15 +184,17 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_l1 PREHOOK: query: INSERT OVERWRITE TABLE dest_l1 SELECT j.* diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out index cec4219..5cd06ac 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out @@ -830,8 +830,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -848,7 +848,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Filter Operator @@ -860,13 +860,13 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap - Map 3 + Map 2 Map Operator Tree: TableScan alias: dim_shops @@ -896,7 +896,7 @@ STAGE PLANS: Target column: dim_shops_id Target Vertex: Map 1 Execution mode: llap - Map 4 + Map 3 Map Operator Tree: TableScan alias: agg_01 @@ -910,7 +910,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 5 + 1 Map 4 Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Filter Operator @@ -922,13 +922,13 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap - Map 5 + Map 4 Map Operator Tree: TableScan alias: dim_shops @@ -956,10 +956,8 @@ STAGE PLANS: Partition key expr: dim_shops_id Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Target column: dim_shops_id - Target Vertex: Map 4 + Target Vertex: Map 3 Execution mode: llap - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_union.q.out b/ql/src/test/results/clientpositive/llap/tez_union.q.out index b60779a..1fd9de2 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union.q.out @@ -16,8 +16,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) + Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -40,18 +39,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 775 Data size: 8233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap - Map 3 + Map 2 Map Operator Tree: TableScan alias: s1 @@ -69,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - Map 4 + Map 3 Map Operator Tree: TableScan alias: s1 @@ -80,14 +79,12 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 775 Data size: 8233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -949,8 +946,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -973,18 +970,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap - Map 3 + Map 2 Map Operator Tree: TableScan alias: s2 @@ -1002,7 +999,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - Map 4 + Map 3 Map Operator Tree: TableScan alias: s2 @@ -1022,18 +1019,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 5 + 1 Map 4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap - Map 5 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -1051,8 +1048,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/load_dyn_part14.q.out index 57a1a93..66c0d25 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -55,16 +55,15 @@ select key, value from ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1, Stage-8, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage Stage-9 is a root stage - Stage-10 is a root stage STAGE PLANS: Stage: Stage-1 @@ -91,52 +90,17 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 - TableScan - Union - Statistics: Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 - TableScan - Union - Statistics: Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -154,10 +118,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -169,7 +133,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -181,13 +145,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -211,12 +175,14 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -240,10 +206,12 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 PREHOOK: query: insert overwrite table nzhang_part14 partition(value) select key, value from ( @@ -267,9 +235,7 @@ select key, value from ( POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@nzhang_part14@value= -POSTHOOK: Output: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__ POSTHOOK: Lineage: nzhang_part14 PARTITION(value= ).key EXPRESSION [] -POSTHOOK: Lineage: nzhang_part14 PARTITION(value=__HIVE_DEFAULT_PARTITION__).key EXPRESSION [] PREHOOK: query: show partitions nzhang_part14 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@nzhang_part14 @@ -277,22 +243,15 @@ POSTHOOK: query: show partitions nzhang_part14 POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@nzhang_part14 value= -value=__HIVE_DEFAULT_PARTITION__ PREHOOK: query: select * from nzhang_part14 where value <> 'a' PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_part14 PREHOOK: Input: default@nzhang_part14@value= -PREHOOK: Input: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### POSTHOOK: query: select * from nzhang_part14 where value <> 'a' POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_part14 POSTHOOK: Input: default@nzhang_part14@value= -POSTHOOK: Input: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### -k1 __HIVE_DEFAULT_PARTITION__ -k1 __HIVE_DEFAULT_PARTITION__ -k2 __HIVE_DEFAULT_PARTITION__ -k2 __HIVE_DEFAULT_PARTITION__ k3 k3 diff --git a/ql/src/test/results/clientpositive/multiMapJoin2.q.out b/ql/src/test/results/clientpositive/multiMapJoin2.q.out index 46b717f..750467b 100644 --- a/ql/src/test/results/clientpositive/multiMapJoin2.q.out +++ b/ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -2188,16 +2188,18 @@ SELECT * FROM ( ) x POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-15 is a root stage - Stage-4 depends on stages: Stage-15 - Stage-14 depends on stages: Stage-4, Stage-8 - Stage-2 depends on stages: Stage-14 + Stage-14 is a root stage + Stage-3 depends on stages: Stage-14 + Stage-13 depends on stages: Stage-3 + Stage-9 depends on stages: Stage-13 Stage-16 is a root stage - Stage-8 depends on stages: Stage-16 - Stage-0 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-16 + Stage-15 depends on stages: Stage-7 + Stage-11 depends on stages: Stage-15 + Stage-0 depends on stages: Stage-9 STAGE PLANS: - Stage: Stage-15 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: null-subquery1:$hdt$_0-subquery1:$hdt$_1:$hdt$_1:a @@ -2220,7 +2222,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -2266,15 +2268,12 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: null-subquery1:$hdt$_0-subquery1:$hdt$_0:a Fetch Operator limit: -1 - null-subquery2:$hdt$_0-subquery2:$hdt$_0:a - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: null-subquery1:$hdt$_0-subquery1:$hdt$_0:a TableScan @@ -2291,23 +2290,8 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - null-subquery2:$hdt$_0-subquery2:$hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Stage: Stage-2 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -2319,33 +2303,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -2372,7 +2336,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -2418,6 +2382,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery2:$hdt$_0-subquery2:$hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery2:$hdt$_0-subquery2:$hdt$_0:a + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: -1 @@ -2448,12 +2457,14 @@ POSTHOOK: query: SELECT * FROM ( POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -RUN: Stage-15:MAPREDLOCAL -RUN: Stage-16:MAPREDLOCAL -RUN: Stage-4:MAPRED -RUN: Stage-8:MAPRED RUN: Stage-14:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-16:MAPREDLOCAL +RUN: Stage-3:MAPRED +RUN: Stage-7:MAPRED +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-15:MAPREDLOCAL +RUN: Stage-9:MAPRED +RUN: Stage-11:MAPRED 0 0 0 diff --git a/ql/src/test/results/clientpositive/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/optimize_nullscan.q.out index de85410..4b7bb8e 100644 --- a/ql/src/test/results/clientpositive/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/optimize_nullscan.q.out @@ -770,9 +770,8 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -798,9 +797,9 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Path -> Alias: - -mr-10005default.src{} [null-subquery1:_u1-subquery1:src] + -mr-10003default.src{} [null-subquery1:_u1-subquery1:src] Path -> Partition: - -mr-10005default.src{} + -mr-10003default.src{} Partition input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -844,7 +843,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10005default.src{} [null-subquery1:_u1-subquery1:src] + -mr-10003default.src{} [null-subquery1:_u1-subquery1:src] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -857,15 +856,20 @@ STAGE PLANS: GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0 columns.types bigint escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false @@ -874,107 +878,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - GatherStats: false - Union - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -1197,15 +1100,20 @@ STAGE PLANS: GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0 columns.types bigint escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false @@ -1758,12 +1666,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1774,32 +1682,36 @@ STAGE PLANS: isSamplingPred: false predicate: false (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - TableScan - alias: srcpart - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: false (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - tag: 1 - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: - -mr-10005default.src{} [null-subquery2:a-subquery2:src] + -mr-10003default.src{} [null-subquery1:a-subquery1:src] Path -> Partition: - -mr-10005default.src{} + -mr-10003default.src{} Partition input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1843,34 +1755,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10005default.src{} [null-subquery2:a-subquery2:src] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + -mr-10003default.src{} [null-subquery1:a-subquery1:src] Stage: Stage-2 Map Reduce @@ -1883,63 +1768,30 @@ STAGE PLANS: isSamplingPred: false predicate: false (type: boolean) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + tag: 0 + auto parallelism: false TableScan + alias: srcpart + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false - Union - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: - -mr-10004default.src{} [null-subquery1:a-subquery1:src] -#### A masked pattern was here #### + -mr-10004default.src{} [null-subquery2:a-subquery2:src] Path -> Partition: -mr-10004default.src{} Partition @@ -1984,29 +1836,40 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src -#### A masked pattern was here #### - Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: - -mr-10004default.src{} [null-subquery1:a-subquery1:src] + -mr-10004default.src{} [null-subquery2:a-subquery2:src] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/ppd_union_view.q.out b/ql/src/test/results/clientpositive/ppd_union_view.q.out index 5dd9ae3..d3cc0b7 100644 --- a/ql/src/test/results/clientpositive/ppd_union_view.q.out +++ b/ql/src/test/results/clientpositive/ppd_union_view.q.out @@ -160,12 +160,50 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_new + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds = '2011-10-13') (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2011-10-13' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -307,126 +345,32 @@ STAGE PLANS: outputColumnNames: _col1, _col6 Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + expressions: _col6 (type: string), _col1 (type: string), '2011-10-13' (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 - columns.types string,string + columns _col0,_col1,_col2 + columns.types string:string:string escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t1_new - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (ds = '2011-10-13') (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2011-10-13' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2011-10-13' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Stage: Stage-0 Fetch Operator limit: -1 @@ -499,12 +443,96 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_new + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string), '2011-10-15' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2011-10-15 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2011-10-15 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.t1_new + numFiles 1 + numRows 1 + partition_columns ds + partition_columns.types string + rawDataSize 11 + serialization.ddl struct t1_new { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 12 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.t1_new + partition_columns ds + partition_columns.types string + serialization.ddl struct t1_new { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1_new + name: default.t1_new + Truncated Path -> Alias: + /t1_new/ds=2011-10-15 [null-subquery1:$hdt$_0-subquery1:t1_new] + + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -558,168 +586,32 @@ STAGE PLANS: outputColumnNames: _col1, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + expressions: _col3 (type: string), _col1 (type: string), '2011-10-15' (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 - columns.types string,string + columns _col0,_col1,_col2 + columns.types string:string:string escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t1_new - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2011-10-15' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 11 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 2 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2011-10-15' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 11 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: ds=2011-10-15 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2011-10-15 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1_new - numFiles 1 - numRows 1 - partition_columns ds - partition_columns.types string - rawDataSize 11 - serialization.ddl struct t1_new { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 12 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1_new - partition_columns ds - partition_columns.types string - serialization.ddl struct t1_new { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1_new - name: default.t1_new - Truncated Path -> Alias: - /t1_new/ds=2011-10-15 [null-subquery1:$hdt$_0-subquery1:t1_new] -#### A masked pattern was here #### - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/quotedid_skew.q.out b/ql/src/test/results/clientpositive/quotedid_skew.q.out index 9ac2b62..235af7a 100644 --- a/ql/src/test/results/clientpositive/quotedid_skew.q.out +++ b/ql/src/test/results/clientpositive/quotedid_skew.q.out @@ -48,9 +48,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a. `!@#$%^&*()_q` = b. `!@#$%^&*()_q` POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -95,36 +94,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -163,10 +139,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out index 91d31cd..e90d6d7 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out @@ -51,20 +51,19 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 Stage-8 is a root stage - Stage-2 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: a Fetch Operator limit: -1 - subquery1:a - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a TableScan @@ -77,19 +76,8 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - subquery1:a - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -110,15 +98,38 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subquery1:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -137,15 +148,13 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -184,20 +193,19 @@ EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 Stage-8 is a root stage - Stage-2 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: a Fetch Operator limit: -1 - subquery1:a - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a TableScan @@ -210,19 +218,8 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - subquery1:a - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -243,15 +240,38 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subquery1:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -270,15 +290,13 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out index ca966c2..06b4570 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out @@ -91,20 +91,19 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 Stage-8 is a root stage - Stage-2 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: a Fetch Operator limit: -1 - subquery1:a - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a TableScan @@ -117,19 +116,8 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - subquery1:a - TableScan - alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -150,15 +138,38 @@ STAGE PLANS: expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subquery1:a + TableScan + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -177,15 +188,13 @@ STAGE PLANS: expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -224,20 +233,19 @@ EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 Stage-8 is a root stage - Stage-2 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: a Fetch Operator limit: -1 - subquery1:a - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a TableScan @@ -250,19 +258,8 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - subquery1:a - TableScan - alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -283,15 +280,38 @@ STAGE PLANS: expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subquery1:a + TableScan + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -310,15 +330,13 @@ STAGE PLANS: expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out index 51445a5..57e352b 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out @@ -55,20 +55,19 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 Stage-8 is a root stage - Stage-2 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: a Fetch Operator limit: -1 - subquery1:a - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a TableScan @@ -81,19 +80,8 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - subquery1:a - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -114,15 +102,38 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subquery1:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (not (key = '2'))) (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -141,15 +152,13 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out index f3b5526..98dc686 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out @@ -55,20 +55,19 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 Stage-8 is a root stage - Stage-2 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: a Fetch Operator limit: -1 - subquery1:a - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a TableScan @@ -81,19 +80,8 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - subquery1:a - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -114,15 +102,38 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subquery1:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -141,15 +152,13 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -189,9 +198,8 @@ SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -236,36 +244,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -304,10 +289,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out index 1902c47..bf62a17 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out @@ -55,20 +55,19 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 Stage-8 is a root stage - Stage-2 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: a Fetch Operator limit: -1 - subquery1:a - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a TableScan @@ -81,19 +80,8 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - subquery1:a - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -114,15 +102,38 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subquery1:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -141,15 +152,13 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out index 8101595..40f1639 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out @@ -71,12 +71,14 @@ EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-10 + Stage-0 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: a @@ -85,12 +87,6 @@ STAGE PLANS: c Fetch Operator limit: -1 - subquery1:a - Fetch Operator - limit: -1 - subquery1:c - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a TableScan @@ -116,6 +112,50 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) 2 key (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery1:a + Fetch Operator + limit: -1 + subquery1:c + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: subquery1:a TableScan alias: a @@ -141,42 +181,13 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) - Stage: Stage-2 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - alias: b - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -193,15 +204,13 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out index 25ff20c..8d6dba4 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out @@ -63,25 +63,26 @@ select * from ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-16 is a root stage - Stage-2 depends on stages: Stage-16 - Stage-3 depends on stages: Stage-2, Stage-6 + Stage-15 is a root stage + Stage-12 depends on stages: Stage-15 + Stage-5 depends on stages: Stage-12, Stage-14 Stage-17 is a root stage - Stage-6 depends on stages: Stage-17 - Stage-0 depends on stages: Stage-3 + Stage-13 depends on stages: Stage-17 + Stage-16 depends on stages: Stage-13 + Stage-2 depends on stages: Stage-16 + Stage-18 is a root stage + Stage-14 depends on stages: Stage-18 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-16 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery1:subq1-subquery1:a - Fetch Operator - limit: -1 - subquery1:a + null-subquery2:subq1-subquery2:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery1:subq1-subquery1:a + null-subquery2:subq1-subquery2:a TableScan alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -92,19 +93,8 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - subquery1:a - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - Stage: Stage-2 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -125,14 +115,40 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-5 + Map Reduce + + Stage: Stage-17 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subquery1:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (not (key = '2'))) (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-13 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -151,60 +167,23 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-17 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery2:subq1-subquery2:a - Fetch Operator - limit: -1 - subquery2:a + null-subquery1:subq1-subquery1:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery2:subq1-subquery2:a + null-subquery1:subq1-subquery1:a TableScan alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -215,19 +194,8 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - subquery2:a - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - Stage: Stage-6 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -250,12 +218,42 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-18 + Map Reduce Local Work + Alias -> Map Local Tables: + subquery2:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subquery2:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (not (key = '2'))) (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-14 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -274,14 +272,12 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoinopt1.q.out b/ql/src/test/results/clientpositive/skewjoinopt1.q.out index f3aa0f7..e24e824 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt1.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt1.q.out @@ -48,9 +48,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -95,36 +94,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -163,10 +139,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -204,9 +181,8 @@ SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -251,36 +227,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -319,10 +272,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt11.q.out b/ql/src/test/results/clientpositive/skewjoinopt11.q.out index 121054b..8c9909a 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt11.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt11.q.out @@ -60,13 +60,12 @@ select * from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-8 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-5 is a root stage - Stage-6 depends on stages: Stage-5, Stage-10 - Stage-8 is a root stage - Stage-10 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-7 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4, Stage-9 + Stage-7 is a root stage + Stage-9 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -122,55 +121,19 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -217,29 +180,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-5 Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -283,7 +227,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/skewjoinopt12.q.out b/ql/src/test/results/clientpositive/skewjoinopt12.q.out index 7f5a932..1ff20ee 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt12.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt12.q.out @@ -50,9 +50,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -95,36 +94,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -161,10 +137,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt15.q.out b/ql/src/test/results/clientpositive/skewjoinopt15.q.out index dbf68f1..1dd370e 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt15.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt15.q.out @@ -88,9 +88,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -135,36 +134,13 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -203,10 +179,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -244,9 +221,8 @@ SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -291,36 +267,13 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -359,10 +312,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt16.q.out b/ql/src/test/results/clientpositive/skewjoinopt16.q.out index 937a65d..7d1c927 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt16.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt16.q.out @@ -50,9 +50,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -95,36 +94,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -161,10 +137,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt17.q.out b/ql/src/test/results/clientpositive/skewjoinopt17.q.out index 581e914..d0dfe5b 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt17.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt17.q.out @@ -54,9 +54,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -101,36 +100,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -169,10 +145,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -266,9 +243,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -311,36 +287,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -377,10 +330,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt19.q.out b/ql/src/test/results/clientpositive/skewjoinopt19.q.out index 91167db..81a1ecb 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt19.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt19.q.out @@ -52,9 +52,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -99,36 +98,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -167,10 +143,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt2.q.out b/ql/src/test/results/clientpositive/skewjoinopt2.q.out index 132633f..d827464 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt2.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt2.q.out @@ -58,9 +58,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -103,36 +102,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -169,10 +145,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -207,9 +184,8 @@ SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.va POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -252,36 +228,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -318,10 +271,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt20.q.out b/ql/src/test/results/clientpositive/skewjoinopt20.q.out index 15e96fd..077370b 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt20.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt20.q.out @@ -52,9 +52,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -99,36 +98,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -167,10 +143,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt3.q.out b/ql/src/test/results/clientpositive/skewjoinopt3.q.out index fad53c3..d60be16 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt3.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt3.q.out @@ -52,9 +52,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -99,36 +98,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -167,10 +143,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -208,9 +185,8 @@ SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -255,36 +231,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -323,10 +276,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt4.q.out b/ql/src/test/results/clientpositive/skewjoinopt4.q.out index 1d2a5a4..7d9db64 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt4.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt4.q.out @@ -48,9 +48,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -95,36 +94,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -163,10 +139,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -202,9 +179,8 @@ SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -249,36 +225,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -317,10 +270,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt5.q.out b/ql/src/test/results/clientpositive/skewjoinopt5.q.out index f395da8..d22d080 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt5.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt5.q.out @@ -50,9 +50,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -97,36 +96,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -165,10 +141,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt6.q.out b/ql/src/test/results/clientpositive/skewjoinopt6.q.out index ac926f6..849fe6c 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt6.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt6.q.out @@ -52,9 +52,8 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -99,36 +98,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -167,10 +143,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt7.q.out b/ql/src/test/results/clientpositive/skewjoinopt7.q.out index 823ac41..50cbf22 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt7.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt7.q.out @@ -68,9 +68,8 @@ SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-5 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -129,36 +128,13 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -210,10 +186,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt8.q.out b/ql/src/test/results/clientpositive/skewjoinopt8.q.out index bcd4e3a..a8f35eb 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt8.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt8.q.out @@ -66,9 +66,8 @@ SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-5 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -127,36 +126,13 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 132 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -208,10 +184,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/stats1.q.out b/ql/src/test/results/clientpositive/stats1.q.out index ac34bbb..9706fc9 100644 --- a/ql/src/test/results/clientpositive/stats1.q.out +++ b/ql/src/test/results/clientpositive/stats1.q.out @@ -26,9 +26,9 @@ FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1, Stage-3 + Stage-2 depends on stages: Stage-0 + Stage-3 is a root stage STAGE PLANS: Stage: Stage-1 @@ -60,42 +60,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - alias: s2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Stage: Stage-0 Move Operator @@ -107,9 +77,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + PREHOOK: query: INSERT OVERWRITE TABLE tmptable SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 @@ -138,32 +127,6 @@ POSTHOOK: query: SELECT * FROM tmptable x SORT BY x.key, x.value POSTHOOK: type: QUERY POSTHOOK: Input: default@tmptable #### A masked pattern was here #### - - - - - val_165 - val_193 - val_265 - val_27 - val_409 - val_484 -128 -146 val_146 -150 val_150 -213 val_213 -224 -238 val_238 -255 val_255 -273 val_273 -278 val_278 -311 val_311 -369 -401 val_401 -406 val_406 -66 val_66 -98 val_98 -tst1 500 PREHOOK: query: DESCRIBE FORMATTED tmptable PREHOOK: type: DESCTABLE PREHOOK: Input: default@tmptable @@ -184,8 +147,8 @@ Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE true numFiles 2 - numRows 26 - rawDataSize 199 + numRows 1 + rawDataSize 8 totalSize 225 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out index 430d5ad..cf8e8eb 100644 --- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out +++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out @@ -814,8 +814,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -832,7 +832,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Filter Operator @@ -844,12 +844,12 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 3 + Map 2 Map Operator Tree: TableScan alias: dim_shops @@ -878,7 +878,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Target column: dim_shops_id Target Vertex: Map 1 - Map 4 + Map 3 Map Operator Tree: TableScan alias: agg_01 @@ -892,7 +892,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 5 + 1 Map 4 Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Filter Operator @@ -904,12 +904,12 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 5 + Map 4 Map Operator Tree: TableScan alias: dim_shops @@ -937,9 +937,7 @@ STAGE PLANS: Partition key expr: dim_shops_id Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Target column: dim_shops_id - Target Vertex: Map 4 - Union 2 - Vertex: Union 2 + Target Vertex: Map 3 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out index ff055ea..a514606 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -3048,53 +3048,251 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 12 <- Union 9 (CONTAINS) -Map 13 <- Union 9 (CONTAINS) -Map 17 <- Map 16 (BROADCAST_EDGE) -Map 18 <- Map 17 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 19 <- Map 17 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 20 <- Map 17 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 21 <- Map 17 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 8 <- Union 9 (CONTAINS) -Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) -Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 11 <- Union 8 (CONTAINS) +Map 12 <- Union 8 (CONTAINS) +Map 16 <- Map 15 (BROADCAST_EDGE) +Map 17 <- Map 16 (BROADCAST_EDGE), Union 18 (CONTAINS) +Map 19 <- Map 16 (BROADCAST_EDGE), Union 18 (CONTAINS) +Map 20 <- Map 16 (BROADCAST_EDGE), Union 18 (CONTAINS) +Map 21 <- Map 16 (BROADCAST_EDGE), Union 18 (CONTAINS) +Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 7 <- Union 8 (CONTAINS) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Union 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Union 4 - |<-Map 18 [CONTAINS] - | File Output Operator [FS_75] + Reducer 10 + File Output Operator [FS_119] + compressed:false + Statistics:Num rows: 620 Data size: 6547 Basic stats: COMPLETE Column stats: NONE + table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + Select Operator [SEL_44] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 620 Data size: 6547 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_124] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"0":"_col1 (type: string)","1":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col4"] + | Statistics:Num rows: 620 Data size: 6547 Basic stats: COMPLETE Column stats: NONE + |<-Map 14 [SIMPLE_EDGE] + | Reduce Output Operator [RS_42] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_33] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_108] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_32] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_40] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_123] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"0":"_col0 (type: string)","1":"_col1 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE + |<-Map 13 [SIMPLE_EDGE] + | Reduce Output Operator [RS_37] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Select Operator [SEL_31] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_107] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_30] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Union 8 [SIMPLE_EDGE] + |<-Map 11 [CONTAINS] + | Reduce Output Operator [RS_35] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_24] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_105] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_23] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map 12 [CONTAINS] + | Reduce Output Operator [RS_35] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_28] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_106] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_27] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map 7 [CONTAINS] + Reduce Output Operator [RS_35] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_22] + outputColumnNames:["_col0"] + Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_104] + predicate:value is not null (type: boolean) + Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_21] + alias:x + Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + File Output Operator [FS_118] + compressed:false + Statistics:Num rows: 317 Data size: 3333 Basic stats: COMPLETE Column stats: NONE + table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + Select Operator [SEL_20] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 317 Data size: 3333 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_122] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"0":"_col1 (type: string)","1":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col4"] + | Statistics:Num rows: 317 Data size: 3333 Basic stats: COMPLETE Column stats: NONE + |<-Map 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_9] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_103] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_8] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Union 2 [SIMPLE_EDGE] + |<-Map 1 [CONTAINS] + | Reduce Output Operator [RS_16] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 289 Data size: 3030 Basic stats: COMPLETE Column stats: NONE + | Map Join Operator [MAPJOIN_121] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | HybridGraceHashJoin:true + | | keys:{"Map 1":"_col0 (type: string)","Map 5":"_col1 (type: string)"} + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 289 Data size: 3030 Basic stats: COMPLETE Column stats: NONE + | |<-Map 5 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_13] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_7] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_102] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_6] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | Reduce Output Operator [RS_127] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Please refer to the previous Select Operator [SEL_7] + | |<-Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_100] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [CONTAINS] + Reduce Output Operator [RS_16] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Statistics:Num rows: 289 Data size: 3030 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_121] + | condition map:[{"":"Inner Join 0 to 1"}] + | HybridGraceHashJoin:true + | keys:{"Map 4":"_col0 (type: string)","Map 5":"_col1 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 289 Data size: 3030 Basic stats: COMPLETE Column stats: NONE + |<- Please refer to the previous Map 5 [BROADCAST_EDGE] + |<-Select Operator [SEL_3] + outputColumnNames:["_col0"] + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_101] + predicate:value is not null (type: boolean) + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:y + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union 18 + |<-Map 17 [CONTAINS] + | File Output Operator [FS_120] | compressed:false - | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} | Select Operator [SEL_73] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_120] + | Map Join Operator [MAPJOIN_126] | | condition map:[{"":"Inner Join 0 to 1"}] | | HybridGraceHashJoin:true - | | keys:{"Map 17":"_col1 (type: string)","Map 18":"_col0 (type: string)"} + | | keys:{"Map 16":"_col1 (type: string)","Map 17":"_col0 (type: string)"} | | outputColumnNames:["_col0","_col3"] | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<-Map 17 [BROADCAST_EDGE] + | |<-Map 16 [BROADCAST_EDGE] | | Reduce Output Operator [RS_69] | | key expressions:_col1 (type: string) | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE | | value expressions:_col0 (type: string), _col3 (type: string) - | | Map Join Operator [MAPJOIN_119] + | | Map Join Operator [MAPJOIN_125] | | | condition map:[{"":"Inner Join 0 to 1"}] | | | HybridGraceHashJoin:true - | | | keys:{"Map 16":"_col0 (type: string)","Map 17":"_col0 (type: string)"} + | | | keys:{"Map 15":"_col0 (type: string)","Map 16":"_col0 (type: string)"} | | | outputColumnNames:["_col0","_col1","_col3"] | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 16 [BROADCAST_EDGE] + | | |<-Map 15 [BROADCAST_EDGE] | | | Reduce Output Operator [RS_64] | | | key expressions:_col0 (type: string) | | | Map-reduce partition columns:_col0 (type: string) @@ -3119,27 +3317,27 @@ Stage-0 | | TableScan [TS_49] | | alias:x | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | Reduce Output Operator [RS_125] + | | Reduce Output Operator [RS_131] | | key expressions:_col1 (type: string) | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_119] - | | Reduce Output Operator [RS_126] + | | Please refer to the previous Map Join Operator [MAPJOIN_125] + | | Reduce Output Operator [RS_132] | | key expressions:_col1 (type: string) | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_119] - | | Reduce Output Operator [RS_127] + | | Please refer to the previous Map Join Operator [MAPJOIN_125] + | | Reduce Output Operator [RS_133] | | key expressions:_col1 (type: string) | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_119] + | | Please refer to the previous Map Join Operator [MAPJOIN_125] | |<-Select Operator [SEL_52] | outputColumnNames:["_col0"] | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE @@ -3150,20 +3348,20 @@ Stage-0 | alias:x | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE |<-Map 19 [CONTAINS] - | File Output Operator [FS_75] + | File Output Operator [FS_120] | compressed:false - | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} | Select Operator [SEL_73] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_120] + | Map Join Operator [MAPJOIN_126] | | condition map:[{"":"Inner Join 0 to 1"}] | | HybridGraceHashJoin:true - | | keys:{"Map 17":"_col1 (type: string)","Map 19":"_col0 (type: string)"} + | | keys:{"Map 16":"_col1 (type: string)","Map 19":"_col0 (type: string)"} | | outputColumnNames:["_col0","_col3"] | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<- Please refer to the previous Map 17 [BROADCAST_EDGE] + | |<- Please refer to the previous Map 16 [BROADCAST_EDGE] | |<-Select Operator [SEL_54] | outputColumnNames:["_col0"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -3174,20 +3372,20 @@ Stage-0 | alias:y | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Map 20 [CONTAINS] - | File Output Operator [FS_75] + | File Output Operator [FS_120] | compressed:false - | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} | Select Operator [SEL_73] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_120] + | Map Join Operator [MAPJOIN_126] | | condition map:[{"":"Inner Join 0 to 1"}] | | HybridGraceHashJoin:true - | | keys:{"Map 17":"_col1 (type: string)","Map 20":"_col0 (type: string)"} + | | keys:{"Map 16":"_col1 (type: string)","Map 20":"_col0 (type: string)"} | | outputColumnNames:["_col0","_col3"] | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<- Please refer to the previous Map 17 [BROADCAST_EDGE] + | |<- Please refer to the previous Map 16 [BROADCAST_EDGE] | |<-Select Operator [SEL_58] | outputColumnNames:["_col0"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -3198,227 +3396,29 @@ Stage-0 | alias:y | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Map 21 [CONTAINS] - | File Output Operator [FS_75] - | compressed:false - | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - | Select Operator [SEL_73] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_120] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | HybridGraceHashJoin:true - | | keys:{"Map 17":"_col1 (type: string)","Map 21":"_col0 (type: string)"} - | | outputColumnNames:["_col0","_col3"] - | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<- Please refer to the previous Map 17 [BROADCAST_EDGE] - | |<-Select Operator [SEL_61] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_114] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_60] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 11 [CONTAINS] - | File Output Operator [FS_75] - | compressed:false - | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - | Select Operator [SEL_44] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 620 Data size: 6547 Basic stats: COMPLETE Column stats: NONE - | Merge Join Operator [MERGEJOIN_118] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"0":"_col1 (type: string)","1":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col4"] - | | Statistics:Num rows: 620 Data size: 6547 Basic stats: COMPLETE Column stats: NONE - | |<-Map 15 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_42] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_33] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_108] - | | predicate:key is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_32] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_40] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE - | Merge Join Operator [MERGEJOIN_117] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"0":"_col0 (type: string)","1":"_col1 (type: string)"} - | | outputColumnNames:["_col1"] - | | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE - | |<-Map 14 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_37] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string) - | | Select Operator [SEL_31] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_107] - | | predicate:(value is not null and key is not null) (type: boolean) - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_30] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Union 9 [SIMPLE_EDGE] - | |<-Map 12 [CONTAINS] - | | Reduce Output Operator [RS_35] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_24] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_105] - | | predicate:value is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_23] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Map 13 [CONTAINS] - | | Reduce Output Operator [RS_35] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_28] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_106] - | | predicate:value is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_27] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Map 8 [CONTAINS] - | Reduce Output Operator [RS_35] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_22] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_104] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_21] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 3 [CONTAINS] - File Output Operator [FS_75] + File Output Operator [FS_120] compressed:false - Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Select Operator [SEL_20] + Select Operator [SEL_73] outputColumnNames:["_col0","_col1"] - Statistics:Num rows: 317 Data size: 3333 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_116] + Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_126] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"0":"_col1 (type: string)","1":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col4"] - | Statistics:Num rows: 317 Data size: 3333 Basic stats: COMPLETE Column stats: NONE - |<-Map 7 [SIMPLE_EDGE] - | Reduce Output Operator [RS_18] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_9] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_103] - | predicate:key is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_8] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Union 2 [SIMPLE_EDGE] - |<-Map 1 [CONTAINS] - | Reduce Output Operator [RS_16] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Statistics:Num rows: 289 Data size: 3030 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_115] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | HybridGraceHashJoin:true - | | keys:{"Map 1":"_col0 (type: string)","Map 6":"_col1 (type: string)"} - | | outputColumnNames:["_col1"] - | | Statistics:Num rows: 289 Data size: 3030 Basic stats: COMPLETE Column stats: NONE - | |<-Map 6 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_13] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string) - | | Select Operator [SEL_7] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_102] - | | predicate:(value is not null and key is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_6] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | Reduce Output Operator [RS_121] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string) - | | Please refer to the previous Select Operator [SEL_7] - | |<-Select Operator [SEL_1] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_100] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_0] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Map 5 [CONTAINS] - Reduce Output Operator [RS_16] - key expressions:_col1 (type: string) - Map-reduce partition columns:_col1 (type: string) - sort order:+ - Statistics:Num rows: 289 Data size: 3030 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_115] - | condition map:[{"":"Inner Join 0 to 1"}] - | HybridGraceHashJoin:true - | keys:{"Map 5":"_col0 (type: string)","Map 6":"_col1 (type: string)"} - | outputColumnNames:["_col1"] - | Statistics:Num rows: 289 Data size: 3030 Basic stats: COMPLETE Column stats: NONE - |<- Please refer to the previous Map 6 [BROADCAST_EDGE] - |<-Select Operator [SEL_3] - outputColumnNames:["_col0"] - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_101] - predicate:value is not null (type: boolean) - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_2] - alias:y - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | HybridGraceHashJoin:true + | keys:{"Map 16":"_col1 (type: string)","Map 21":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col3"] + | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE + |<- Please refer to the previous Map 16 [BROADCAST_EDGE] + |<-Select Operator [SEL_61] + outputColumnNames:["_col0"] + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_114] + predicate:value is not null (type: boolean) + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_60] + alias:y + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, y.value diff --git a/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out index bf9ba9b..1bd6f2f 100644 --- a/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out @@ -669,8 +669,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -742,7 +742,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: -mr-10002default.src{} [src] - Map 4 + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -968,7 +968,7 @@ STAGE PLANS: GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -985,7 +985,7 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 5 + Reducer 4 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -998,7 +998,7 @@ STAGE PLANS: GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1015,8 +1015,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Union 3 - Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -1538,8 +1536,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Union 2 (CONTAINS) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1561,7 +1558,7 @@ STAGE PLANS: GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1579,12 +1576,11 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false Path -> Alias: -#### A masked pattern was here #### + -mr-10002default.src{} [src] Path -> Partition: -#### A masked pattern was here #### + -mr-10002default.src{} Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true @@ -1599,10 +1595,10 @@ STAGE PLANS: rawDataSize 5312 serialization.ddl struct src { string key, string value} serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe totalSize 5812 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.NullStructSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1626,8 +1622,8 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] - Map 3 + -mr-10002default.src{} [src] + Map 2 Map Operator Tree: TableScan alias: src @@ -1645,9 +1641,9 @@ STAGE PLANS: tag: 0 auto parallelism: true Path -> Alias: - -mr-10002default.src{} [src] + -mr-10003default.src{} [src] Path -> Partition: - -mr-10002default.src{} + -mr-10003default.src{} Partition input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1691,8 +1687,8 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10002default.src{} [src] - Map 5 + -mr-10003default.src{} [src] + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -1709,7 +1705,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE tag: 1 auto parallelism: true - Reducer 4 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Merge Join Operator @@ -1726,7 +1722,7 @@ STAGE PLANS: GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1743,8 +1739,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_union.q.out b/ql/src/test/results/clientpositive/tez/tez_union.q.out index 5a7d0d6..64306c0 100644 --- a/ql/src/test/results/clientpositive/tez/tez_union.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_union.q.out @@ -16,8 +16,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) + Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -40,17 +39,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 775 Data size: 8233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 3 + Map 2 Map Operator Tree: TableScan alias: s1 @@ -67,7 +66,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 3 Map Operator Tree: TableScan alias: s1 @@ -78,13 +77,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 775 Data size: 8233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -928,8 +925,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -952,17 +949,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 3 + Map 2 Map Operator Tree: TableScan alias: s2 @@ -979,7 +976,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 3 Map Operator Tree: TableScan alias: s2 @@ -999,17 +996,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 5 + 1 Map 4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 5 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -1026,8 +1023,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/union4.q.out b/ql/src/test/results/clientpositive/tez/union4.q.out index 9d079ad..2e46e97 100644 --- a/ql/src/test/results/clientpositive/tez/union4.q.out +++ b/ql/src/test/results/clientpositive/tez/union4.q.out @@ -39,8 +39,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -59,7 +59,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 4 + Map 3 Map Operator Tree: TableScan alias: s1 @@ -83,22 +83,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst1' (type: string), _col0 (type: bigint) + expressions: 'tst1' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - Reducer 5 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -106,23 +102,17 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst2' (type: string), _col0 (type: bigint) + expressions: 'tst2' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - Union 3 - Vertex: Union 3 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/tez/union6.q.out b/ql/src/test/results/clientpositive/tez/union6.q.out index 4647278..51fbbaf 100644 --- a/ql/src/test/results/clientpositive/tez/union6.q.out +++ b/ql/src/test/results/clientpositive/tez/union6.q.out @@ -37,8 +37,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 4 <- Union 3 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -57,7 +56,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 4 + Map 3 Map Operator Tree: TableScan alias: s2 @@ -68,7 +67,7 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,14 +86,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Union 3 - Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/union10.q.out b/ql/src/test/results/clientpositive/union10.q.out index 417ae27..3a30436 100644 --- a/ql/src/test/results/clientpositive/union10.q.out +++ b/ql/src/test/results/clientpositive/union10.q.out @@ -32,16 +32,15 @@ insert overwrite table tmptable POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1, Stage-8, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage Stage-9 is a root stage - Stage-10 is a root stage STAGE PLANS: Stage: Stage-1 @@ -68,69 +67,22 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst1' (type: string), _col0 (type: bigint) + expressions: 'tst1' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -146,10 +98,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -161,7 +113,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -173,13 +125,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -203,17 +155,19 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst2' (type: string), _col0 (type: bigint) + expressions: 'tst2' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -237,15 +191,17 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst3' (type: string), _col0 (type: bigint) + expressions: 'tst3' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 @@ -275,6 +231,3 @@ POSTHOOK: query: select * from tmptable x sort by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tmptable #### A masked pattern was here #### -tst1 500 -tst2 500 -tst3 500 diff --git a/ql/src/test/results/clientpositive/union12.q.out b/ql/src/test/results/clientpositive/union12.q.out index ee56f15..432a57c 100644 --- a/ql/src/test/results/clientpositive/union12.q.out +++ b/ql/src/test/results/clientpositive/union12.q.out @@ -32,16 +32,15 @@ insert overwrite table tmptable POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1, Stage-8, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage Stage-9 is a root stage - Stage-10 is a root stage STAGE PLANS: Stage: Stage-1 @@ -68,69 +67,22 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst1' (type: string), _col0 (type: bigint) + expressions: 'tst1' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -146,10 +98,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -161,7 +113,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -173,13 +125,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -203,17 +155,19 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst2' (type: string), _col0 (type: bigint) + expressions: 'tst2' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -237,15 +191,17 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst3' (type: string), _col0 (type: bigint) + expressions: 'tst3' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 @@ -279,6 +235,3 @@ POSTHOOK: query: select * from tmptable x sort by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tmptable #### A masked pattern was here #### -tst1 500 -tst2 25 -tst3 1000 diff --git a/ql/src/test/results/clientpositive/union22.q.out b/ql/src/test/results/clientpositive/union22.q.out index beb039e..09f1549 100644 --- a/ql/src/test/results/clientpositive/union22.q.out +++ b/ql/src/test/results/clientpositive/union22.q.out @@ -219,19 +219,144 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage , consists of Stage-8, Stage-4 - Stage-8 has a backup stage: Stage-4 - Stage-6 depends on stages: Stage-8 - Stage-2 depends on stages: Stage-4, Stage-6 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-4 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1, Stage-3, Stage-5 + Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage , consists of Stage-7, Stage-3 + Stage-7 has a backup stage: Stage-3 + Stage-5 depends on stages: Stage-7 + Stage-3 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dst_union22_delta + Statistics: Num rows: 500 Data size: 16936 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (k0 <= 50) (type: boolean) + Statistics: Num rows: 166 Data size: 5622 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: k1 (type: string), k2 (type: string), k3 (type: string), k4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 5622 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2/ + Statistics: Num rows: 166 Data size: 5622 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns k1,k2,k3,k4 + columns.comments + columns.types string:string:string:string +#### A masked pattern was here #### + name default.dst_union22 + partition_columns ds + partition_columns.types string + serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns k0,k1,k2,k3,k4,k5 + columns.comments + columns.types string:string:string:string:string:string +#### A masked pattern was here #### + name default.dst_union22_delta + numFiles 1 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 16936 + serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 17436 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns k0,k1,k2,k3,k4,k5 + columns.comments + columns.types string:string:string:string:string:string +#### A masked pattern was here #### + name default.dst_union22_delta + partition_columns ds + partition_columns.types string + serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22_delta + name: default.dst_union22_delta + Truncated Path -> Alias: + /dst_union22_delta/ds=1 [null-subquery1:subq-subquery1:dst_union22_delta] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns k1,k2,k3,k4 + columns.comments + columns.types string:string:string:string +#### A masked pattern was here #### + name default.dst_union22 + partition_columns ds + partition_columns.types string + serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-6 Conditional Operator - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: null-subquery2:subq-subquery2:b:dst_union22_delta @@ -307,7 +432,7 @@ STAGE PLANS: 1 _col1 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -338,18 +463,30 @@ STAGE PLANS: Statistics: Num rows: 182 Data size: 4062 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Static Partition Specification: ds=2/ + Statistics: Num rows: 182 Data size: 4062 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3 - columns.types string,string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + bucket_count -1 + columns k1,k2,k3,k4 + columns.comments + columns.types string:string:string:string +#### A masked pattern was here #### + name default.dst_union22 + partition_columns ds + partition_columns.types string + serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22 TotalFiles: 1 GatherStats: false MultiFileSpray: false @@ -451,188 +588,7 @@ STAGE PLANS: Truncated Path -> Alias: /dst_union22/ds=1 [null-subquery2:subq-subquery2:a] - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: dst_union22_delta - Statistics: Num rows: 500 Data size: 16936 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (k0 <= 50) (type: boolean) - Statistics: Num rows: 166 Data size: 5622 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: k1 (type: string), k2 (type: string), k3 (type: string), k4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 166 Data size: 5622 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2/ - Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.comments - columns.types string:string:string:string -#### A masked pattern was here #### - name default.dst_union22 - partition_columns ds - partition_columns.types string - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2/ - Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.comments - columns.types string:string:string:string -#### A masked pattern was here #### - name default.dst_union22 - partition_columns ds - partition_columns.types string - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: ds=1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 1 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns k0,k1,k2,k3,k4,k5 - columns.comments - columns.types string:string:string:string:string:string -#### A masked pattern was here #### - name default.dst_union22_delta - numFiles 1 - numRows 500 - partition_columns ds - partition_columns.types string - rawDataSize 16936 - serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17436 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k0,k1,k2,k3,k4,k5 - columns.comments - columns.types string:string:string:string:string:string -#### A masked pattern was here #### - name default.dst_union22_delta - partition_columns ds - partition_columns.types string - serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22_delta - name: default.dst_union22_delta - Truncated Path -> Alias: - /dst_union22_delta/ds=1 [null-subquery1:subq-subquery1:dst_union22_delta] -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.comments - columns.types string:string:string:string -#### A masked pattern was here #### - name default.dst_union22 - partition_columns ds - partition_columns.types string - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22 - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -788,18 +744,30 @@ STAGE PLANS: Statistics: Num rows: 182 Data size: 4062 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Static Partition Specification: ds=2/ + Statistics: Num rows: 182 Data size: 4062 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3 - columns.types string,string,string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + bucket_count -1 + columns k1,k2,k3,k4 + columns.comments + columns.types string:string:string:string +#### A masked pattern was here #### + name default.dst_union22 + partition_columns ds + partition_columns.types string + serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22 TotalFiles: 1 GatherStats: false MultiFileSpray: false diff --git a/ql/src/test/results/clientpositive/union24.q.out b/ql/src/test/results/clientpositive/union24.q.out index c0f8cd0..5dd2797 100644 --- a/ql/src/test/results/clientpositive/union24.q.out +++ b/ql/src/test/results/clientpositive/union24.q.out @@ -180,16 +180,18 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 is a root stage + Stage-3 is a root stage + Stage-4 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: src5 + alias: src2 Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -197,29 +199,37 @@ STAGE PLANS: predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), count (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src5 + base file name: src2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -229,11 +239,11 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src5 + name default.src2 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src5 { string key, i64 count} + serialization.ddl struct src2 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 @@ -249,86 +259,25 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src5 + name default.src2 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src5 { string key, i64 count} + serialization.ddl struct src2 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src5 - name: default.src5 + name: default.src2 + name: default.src2 Truncated Path -> Alias: - /src5 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:src5] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 51 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /src2 [null-subquery1:$hdt$_0-subquery1-subquery1:$hdt$_0-subquery1-subquery1:$hdt$_0-subquery1:src2] Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (UDFToDouble(key) < 10.0) (type: boolean) - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), count (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan alias: src3 Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -340,118 +289,34 @@ STAGE PLANS: expressions: key (type: string), count (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan - alias: src4 - Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (UDFToDouble(key) < 10.0) (type: boolean) - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), count (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: src2 + base file name: src3 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -461,11 +326,11 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src2 + name default.src3 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src2 { string key, i64 count} + serialization.ddl struct src3 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 @@ -481,21 +346,64 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src2 + name default.src3 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src2 { string key, i64 count} + serialization.ddl struct src3 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src2 - name: default.src2 + name: default.src3 + name: default.src3 + Truncated Path -> Alias: + /src3 [null-subquery1:$hdt$_0-subquery1-subquery1:$hdt$_0-subquery1-subquery2:$hdt$_0-subquery2:src3] + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src4 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (UDFToDouble(key) < 10.0) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), count (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: src3 + base file name: src4 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -505,11 +413,11 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src3 + name default.src4 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src3 { string key, i64 count} + serialization.ddl struct src4 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 @@ -525,21 +433,56 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src3 + name default.src4 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src3 { string key, i64 count} + serialization.ddl struct src4 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src3 - name: default.src3 + name: default.src4 + name: default.src4 + Truncated Path -> Alias: + /src4 [null-subquery1:$hdt$_0-subquery1-subquery2:$hdt$_0-subquery2:src4] + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src5 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (UDFToDouble(key) < 10.0) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: src4 + base file name: src5 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -549,11 +492,11 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src4 + name default.src5 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src4 { string key, i64 count} + serialization.ddl struct src5 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 @@ -569,23 +512,50 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src4 + name default.src5 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src4 { string key, i64 count} + serialization.ddl struct src5 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src4 - name: default.src4 + name: default.src5 + name: default.src5 Truncated Path -> Alias: - /src2 [null-subquery1:$hdt$_0-subquery1-subquery1:$hdt$_0-subquery1-subquery1:$hdt$_0-subquery1:src2] - /src3 [null-subquery1:$hdt$_0-subquery1-subquery1:$hdt$_0-subquery1-subquery2:$hdt$_0-subquery2:src3] - /src4 [null-subquery1:$hdt$_0-subquery1-subquery2:$hdt$_0-subquery2:src4] + /src5 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:src5] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 51 Data size: 244 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -777,51 +747,55 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 is a root stage + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: a + alias: src2 Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key < 10) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), count (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - TableScan - alias: b - Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: count (type: bigint) - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src4 + base file name: src2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -831,11 +805,11 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src4 + name default.src2 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src4 { string key, i64 count} + serialization.ddl struct src2 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 @@ -851,21 +825,64 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src4 + name default.src2 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src4 { string key, i64 count} + serialization.ddl struct src2 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src4 - name: default.src4 + name: default.src2 + name: default.src2 + Truncated Path -> Alias: + /src2 [null-subquery1-subquery1:s-subquery1-subquery1:src2] + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), count (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: src5 + base file name: src3 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -875,11 +892,11 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src5 + name default.src3 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src5 { string key, i64 count} + serialization.ddl struct src3 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 @@ -895,180 +912,61 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src5 + name default.src3 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src5 { string key, i64 count} + serialization.ddl struct src3 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src5 - name: default.src5 + name: default.src3 + name: default.src3 Truncated Path -> Alias: - /src4 [null-subquery2:s-subquery2:a] - /src5 [null-subquery2:s-subquery2:b] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + /src3 [null-subquery1-subquery2:s-subquery1-subquery2:src3] - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: a Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key < 10) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), count (type: bigint) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + tag: 0 + auto parallelism: false TableScan - alias: src3 + alias: b Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key < 10) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), count (type: bigint) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + tag: 1 + value expressions: count (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: src2 + base file name: src4 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1078,11 +976,11 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src2 + name default.src4 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src2 { string key, i64 count} + serialization.ddl struct src4 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 @@ -1098,21 +996,21 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src2 + name default.src4 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src2 { string key, i64 count} + serialization.ddl struct src4 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src2 - name: default.src2 + name: default.src4 + name: default.src4 #### A masked pattern was here #### Partition - base file name: src3 + base file name: src5 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1122,11 +1020,11 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src3 + name default.src5 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src3 { string key, i64 count} + serialization.ddl struct src5 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 @@ -1142,22 +1040,57 @@ STAGE PLANS: columns.comments columns.types string:bigint #### A masked pattern was here #### - name default.src3 + name default.src5 numFiles 1 numRows 309 rawDataSize 1482 - serialization.ddl struct src3 { string key, i64 count} + serialization.ddl struct src5 { string key, i64 count} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 1791 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src3 - name: default.src3 + name: default.src5 + name: default.src5 Truncated Path -> Alias: - /src2 [null-subquery1-subquery1:s-subquery1-subquery1:src2] - /src3 [null-subquery1-subquery2:s-subquery1-subquery2:src3] + /src4 [null-subquery2:s-subquery2:a] + /src5 [null-subquery2:s-subquery2:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1343,13 +1276,188 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 is a root stage + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), count (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,count + columns.comments + columns.types string:bigint +#### A masked pattern was here #### + name default.src2 + numFiles 1 + numRows 309 + rawDataSize 1482 + serialization.ddl struct src2 { string key, i64 count} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1791 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,count + columns.comments + columns.types string:bigint +#### A masked pattern was here #### + name default.src2 + numFiles 1 + numRows 309 + rawDataSize 1482 + serialization.ddl struct src2 { string key, i64 count} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1791 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src2 + name: default.src2 + Truncated Path -> Alias: + /src2 [null-subquery1-subquery1:s-subquery1-subquery1:src2] + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), count (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src3 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,count + columns.comments + columns.types string:bigint +#### A masked pattern was here #### + name default.src3 + numFiles 1 + numRows 309 + rawDataSize 1482 + serialization.ddl struct src3 { string key, i64 count} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1791 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,count + columns.comments + columns.types string:bigint +#### A masked pattern was here #### + name default.src3 + numFiles 1 + numRows 309 + rawDataSize 1482 + serialization.ddl struct src3 { string key, i64 count} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1791 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src3 + name: default.src3 + Truncated Path -> Alias: + /src3 [null-subquery1-subquery2:s-subquery1-subquery2:src3] + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1510,7 +1618,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1561,236 +1669,24 @@ STAGE PLANS: GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 56 Data size: 269 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1 - columns.types string,bigint + columns.types string:bigint escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), count (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan - alias: src3 - Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), count (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan - GatherStats: false - Union - Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10005 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: src2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,count - columns.comments - columns.types string:bigint -#### A masked pattern was here #### - name default.src2 - numFiles 1 - numRows 309 - rawDataSize 1482 - serialization.ddl struct src2 { string key, i64 count} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1791 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,count - columns.comments - columns.types string:bigint -#### A masked pattern was here #### - name default.src2 - numFiles 1 - numRows 309 - rawDataSize 1482 - serialization.ddl struct src2 { string key, i64 count} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1791 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src2 - name: default.src2 -#### A masked pattern was here #### - Partition - base file name: src3 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,count - columns.comments - columns.types string:bigint -#### A masked pattern was here #### - name default.src3 - numFiles 1 - numRows 309 - rawDataSize 1482 - serialization.ddl struct src3 { string key, i64 count} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1791 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,count - columns.comments - columns.types string:bigint -#### A masked pattern was here #### - name default.src3 - numFiles 1 - numRows 309 - rawDataSize 1482 - serialization.ddl struct src3 { string key, i64 count} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1791 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src3 - name: default.src3 - Truncated Path -> Alias: - /src2 [null-subquery1-subquery1:s-subquery1-subquery1:src2] - /src3 [null-subquery1-subquery2:s-subquery1-subquery2:src3] -#### A masked pattern was here #### - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/union28.q.out b/ql/src/test/results/clientpositive/union28.q.out index c3789d0..1e85345 100644 --- a/ql/src/test/results/clientpositive/union28.q.out +++ b/ql/src/test/results/clientpositive/union28.q.out @@ -33,110 +33,41 @@ select * from ( ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-2 depends on stages: Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-8, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-10 is a root stage + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage + Stage-9 is a root stage STAGE PLANS: - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - TableScan - Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - TableScan - Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -152,10 +83,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -167,7 +98,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -179,13 +110,52 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-10 + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union + + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -211,12 +181,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union PREHOOK: query: insert overwrite table union_subq_union select * from ( @@ -256,23 +232,3 @@ POSTHOOK: query: select * from union_subq_union order by key, value limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@union_subq_union #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -2 val_2 -2 val_2 -2 val_2 -4 val_4 -4 val_4 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -8 val_8 -8 val_8 -9 val_9 diff --git a/ql/src/test/results/clientpositive/union30.q.out b/ql/src/test/results/clientpositive/union30.q.out index 26a27c8..d7f0be4 100644 --- a/ql/src/test/results/clientpositive/union30.q.out +++ b/ql/src/test/results/clientpositive/union30.q.out @@ -47,131 +47,42 @@ select key, value from src ) aa POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-2 depends on stages: Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-8, Stage-9, Stage-10 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage + Stage-9 is a root stage Stage-10 is a root stage STAGE PLANS: - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - TableScan - Union - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - TableScan - Union - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -187,10 +98,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -202,7 +113,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -214,13 +125,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-10 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -246,12 +157,76 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union PREHOOK: query: insert overwrite table union_subq_union select * from ( @@ -305,23 +280,3 @@ POSTHOOK: query: select * from union_subq_union order by key, value limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@union_subq_union #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -2 val_2 -2 val_2 -2 val_2 -2 val_2 -4 val_4 -4 val_4 -4 val_4 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -5 val_5 diff --git a/ql/src/test/results/clientpositive/union32.q.out b/ql/src/test/results/clientpositive/union32.q.out index 22b7bbc..d105507 100644 --- a/ql/src/test/results/clientpositive/union32.q.out +++ b/ql/src/test/results/clientpositive/union32.q.out @@ -149,8 +149,8 @@ SELECT CAST(key AS DOUBLE) AS key FROM t2) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -193,40 +193,29 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToDouble(key) (type: double) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -297,12 +286,30 @@ UNION ALL SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToDouble(key) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -342,40 +349,11 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -447,8 +425,8 @@ SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -491,40 +469,29 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToDouble(key) (type: double), key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -595,12 +562,30 @@ UNION ALL SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -640,40 +625,11 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/union33.q.out b/ql/src/test/results/clientpositive/union33.q.out index 17e0844..29c8355 100644 --- a/ql/src/test/results/clientpositive/union33.q.out +++ b/ql/src/test/results/clientpositive/union33.q.out @@ -33,19 +33,93 @@ UNION ALL )a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-10 depends on stages: Stage-9 - Stage-2 depends on stages: Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage + Stage-9 depends on stages: Stage-8 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 0.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '0' (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -81,7 +155,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -104,98 +178,13 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) = 0.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '0' (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src - TableScan - Union - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, value FROM src @@ -228,7 +217,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM test_src POSTHOOK: type: QUERY POSTHOOK: Input: default@test_src #### A masked pattern was here #### -312 +0 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, COUNT(*) AS value FROM src @@ -250,14 +239,14 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-9 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-7 - Stage-8 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + Stage-9 is a root stage STAGE PLANS: Stage: Stage-1 @@ -319,50 +308,17 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) = 0.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '0' (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src - Stage: Stage-9 + Stage: Stage-8 Conditional Operator - Stage: Stage-6 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -378,10 +334,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -393,7 +349,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -405,12 +361,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - Stage: Stage-8 + Stage: Stage-7 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 0.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '0' (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src + PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, COUNT(*) AS value FROM src @@ -443,4 +421,4 @@ POSTHOOK: query: SELECT COUNT(*) FROM test_src POSTHOOK: type: QUERY POSTHOOK: Input: default@test_src #### A masked pattern was here #### -312 +0 diff --git a/ql/src/test/results/clientpositive/union4.q.out b/ql/src/test/results/clientpositive/union4.q.out index 41ca811..eff65b2 100644 --- a/ql/src/test/results/clientpositive/union4.q.out +++ b/ql/src/test/results/clientpositive/union4.q.out @@ -30,15 +30,14 @@ insert overwrite table tmptable POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1, Stage-8 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-9 is a root stage + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage STAGE PLANS: Stage: Stage-1 @@ -65,54 +64,22 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst1' (type: string), _col0 (type: bigint) + expressions: 'tst1' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -128,10 +95,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -143,7 +110,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -155,13 +122,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -185,15 +152,17 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst2' (type: string), _col0 (type: bigint) + expressions: 'tst2' (type: string), UDFToInteger(_col0) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 @@ -219,5 +188,3 @@ POSTHOOK: query: select * from tmptable x sort by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tmptable #### A masked pattern was here #### -tst1 500 -tst2 500 diff --git a/ql/src/test/results/clientpositive/union6.q.out b/ql/src/test/results/clientpositive/union6.q.out index 0844165..a72431f 100644 --- a/ql/src/test/results/clientpositive/union6.q.out +++ b/ql/src/test/results/clientpositive/union6.q.out @@ -28,14 +28,14 @@ insert overwrite table tmptable POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1, Stage-8 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage STAGE PLANS: Stage: Stage-1 @@ -67,47 +67,17 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - alias: s2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -123,10 +93,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -138,7 +108,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -150,12 +120,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL @@ -182,29 +171,3 @@ POSTHOOK: query: select * from tmptable x sort by x.key, x.value POSTHOOK: type: QUERY POSTHOOK: Input: default@tmptable #### A masked pattern was here #### - - - - - val_165 - val_193 - val_265 - val_27 - val_409 - val_484 -128 -146 val_146 -150 val_150 -213 val_213 -224 -238 val_238 -255 val_255 -273 val_273 -278 val_278 -311 val_311 -369 -401 val_401 -406 val_406 -66 val_66 -98 val_98 -tst1 500 diff --git a/ql/src/test/results/clientpositive/union_top_level.q.out b/ql/src/test/results/clientpositive/union_top_level.q.out index 2773ad8..85d69bd 100644 --- a/ql/src/test/results/clientpositive/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/union_top_level.q.out @@ -20,10 +20,9 @@ select * from (select key, 2 as value from src where key % 3 == 2 limit 3)c POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3, Stage-4 + Stage-2 is a root stage Stage-3 is a root stage - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -60,46 +59,13 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -133,12 +99,13 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -172,10 +139,11 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -221,10 +189,9 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-5 is a root stage - Stage-6 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-3 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -302,36 +269,13 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -388,7 +332,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -406,10 +350,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -469,17 +414,16 @@ select * from (select key, 2 as value from src where key % 3 == 2 limit 3)c POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-11 depends on stages: Stage-0 - Stage-3 depends on stages: Stage-11 + Stage-7 depends on stages: Stage-1, Stage-8, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-10 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-10 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage Stage-9 is a root stage - Stage-10 is a root stage STAGE PLANS: Stage: Stage-1 @@ -516,52 +460,17 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -573,7 +482,7 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-10 Create Table Operator: Create Table columns: key string, value int @@ -582,10 +491,10 @@ STAGE PLANS: serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -597,7 +506,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -609,13 +518,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -649,12 +558,14 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -688,10 +599,12 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top PREHOOK: query: create table union_top as select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a @@ -721,15 +634,6 @@ POSTHOOK: query: select * from union_top POSTHOOK: type: QUERY POSTHOOK: Input: default@union_top #### A masked pattern was here #### -165 0 -238 1 -255 0 -27 0 -278 2 -311 2 -409 1 -484 1 -86 2 PREHOOK: query: truncate table union_top PREHOOK: type: TRUNCATETABLE PREHOOK: Output: default@union_top @@ -756,16 +660,15 @@ select * from (select key, 2 as value from src where key % 3 == 2 limit 3)c POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1, Stage-8, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage Stage-9 is a root stage - Stage-10 is a root stage STAGE PLANS: Stage: Stage-1 @@ -802,52 +705,17 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -863,10 +731,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -878,7 +746,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -890,13 +758,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -930,12 +798,14 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -969,10 +839,12 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top PREHOOK: query: insert into table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a @@ -1002,15 +874,6 @@ POSTHOOK: query: select * from union_top POSTHOOK: type: QUERY POSTHOOK: Input: default@union_top #### A masked pattern was here #### -165 0 -238 1 -255 0 -27 0 -278 2 -311 2 -409 1 -484 1 -86 2 PREHOOK: query: explain insert overwrite table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a @@ -1029,16 +892,15 @@ select * from (select key, 2 as value from src where key % 3 == 2 limit 3)c POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1, Stage-8, Stage-9 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage Stage-9 is a root stage - Stage-10 is a root stage STAGE PLANS: Stage: Stage-1 @@ -1075,52 +937,17 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top - TableScan - Union - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -1136,10 +963,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1151,7 +978,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -1163,13 +990,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -1203,12 +1030,14 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -1242,10 +1071,12 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top PREHOOK: query: insert overwrite table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a @@ -1275,15 +1106,6 @@ POSTHOOK: query: select * from union_top POSTHOOK: type: QUERY POSTHOOK: Input: default@union_top #### A masked pattern was here #### -165 0 -238 1 -255 0 -27 0 -278 2 -311 2 -409 1 -484 1 -86 2 PREHOOK: query: -- create view explain create view union_top_view as diff --git a/ql/src/test/results/clientpositive/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/vector_multi_insert.q.out index 78456c7..f6b58a7 100644 --- a/ql/src/test/results/clientpositive/vector_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/vector_multi_insert.q.out @@ -104,7 +104,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rn < 100) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -337,7 +337,6 @@ POSTHOOK: query: select * from orc_rn1 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_rn1 #### A masked pattern was here #### -1 PREHOOK: query: select * from orc_rn2 PREHOOK: type: QUERY PREHOOK: Input: default@orc_rn2 @@ -346,7 +345,6 @@ POSTHOOK: query: select * from orc_rn2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_rn2 #### A masked pattern was here #### -100 PREHOOK: query: select * from orc_rn3 PREHOOK: type: QUERY PREHOOK: Input: default@orc_rn3 @@ -355,4 +353,3 @@ POSTHOOK: query: select * from orc_rn3 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_rn3 #### A masked pattern was here #### -10000 -- 1.7.12.4 (Apple Git-37)