commit 6fc48bae0e25ef2a3397de7af9446f554c802f26 Author: Andrew Sherman Date: Wed Oct 25 13:50:15 2017 -0700 HIVE-17935: set the default for hive.optimize.sort.dynamic.partition to true diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a26ea2115ff65dc80ff21e1303c4dbb353c514f8..60b5230c703d669fcf7145b8b20c5c610849d513 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1606,7 +1606,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Currently it only works with Apache Tez. This should always be set to true. \n" + "Since it is a new feature, it has been made configurable."), - HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", false, + HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", true, "When enabled dynamic partitioning column will be globally sorted.\n" + "This way we can keep only one record writer open for each partition value\n" + "in the reducer thereby reducing the memory pressure on reducers."), diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index 3a94a6a4e33a715fd54e4a31b38bfb035da4dcc7..643e724674804a15a5aa37a1019ef5b0a981a45e 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -90,7 +90,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 6 Data size: 3060 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 3059 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/autoColumnStats_1.q.out ql/src/test/results/clientpositive/autoColumnStats_1.q.out index e04eaf45308d3a80af2944592fed3f12eadf1b21..89f787737131e6ecb75dbd9f0899d38416dd009a 100644 --- ql/src/test/results/clientpositive/autoColumnStats_1.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_1.q.out @@ -489,11 +489,11 @@ STAGE PLANS: Processor Tree: TableScan alias: nzhang_part14 - Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: drop table src5 @@ -545,11 +545,11 @@ STAGE PLANS: Processor Tree: TableScan alias: nzhang_part14 - Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 11 Data size: 2049 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 11 Data size: 2049 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) @@ -1340,11 +1340,11 @@ STAGE PLANS: Processor Tree: TableScan alias: a - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select value from b @@ -1361,11 +1361,11 @@ STAGE PLANS: Processor Tree: TableScan alias: b - Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select key from b @@ -1382,11 +1382,11 @@ STAGE PLANS: Processor Tree: TableScan alias: b - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select value from c @@ -1403,11 +1403,11 @@ STAGE PLANS: Processor Tree: TableScan alias: c - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select key from c @@ -1424,10 +1424,10 @@ STAGE PLANS: Processor Tree: TableScan alias: c - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE ListSink diff --git ql/src/test/results/clientpositive/autoColumnStats_2.q.out ql/src/test/results/clientpositive/autoColumnStats_2.q.out index b209ff0dccd76b86f3c0ef7359f7d5d750b713e8..52ce6f76b0ebce2645cff89e632c8389ffcd7b81 100644 --- ql/src/test/results/clientpositive/autoColumnStats_2.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_2.q.out @@ -487,11 +487,11 @@ STAGE PLANS: Processor Tree: TableScan alias: nzhang_part14 - Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: drop table src5 @@ -543,11 +543,11 @@ STAGE PLANS: Processor Tree: TableScan alias: nzhang_part14 - Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 11 Data size: 2049 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 11 Data size: 2049 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: drop table alter5 @@ -1464,11 +1464,11 @@ STAGE PLANS: Processor Tree: TableScan alias: a - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select value from b @@ -1485,11 +1485,11 @@ STAGE PLANS: Processor Tree: TableScan alias: b - Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select key from b @@ -1506,11 +1506,11 @@ STAGE PLANS: Processor Tree: TableScan alias: b - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select value from c @@ -1527,11 +1527,11 @@ STAGE PLANS: Processor Tree: TableScan alias: c - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select key from c @@ -1548,10 +1548,10 @@ STAGE PLANS: Processor Tree: TableScan alias: c - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE ListSink diff --git ql/src/test/results/clientpositive/autoColumnStats_6.q.out ql/src/test/results/clientpositive/autoColumnStats_6.q.out index 70788fde9f356bc55a1c95a52aa85c52d367b214..6e3c62be5b6f9ba01b4f87766176c4218a91c9f3 100644 --- ql/src/test/results/clientpositive/autoColumnStats_6.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_6.q.out @@ -26,13 +26,8 @@ POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one=' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -45,57 +40,26 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col3 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge2a - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) - outputColumnNames: key, value, one, two, three - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: one (type: string), two (type: string), three (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge2a Stage: Stage-0 Move Operator @@ -119,26 +83,6 @@ STAGE PLANS: Column Types: int, string Table: default.orcfile_merge2a - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - ORC File Merge Operator - merge level: stripe - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - ORC File Merge Operator - merge level: stripe - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, PMOD(HASH(value), 10) as three diff --git ql/src/test/results/clientpositive/autoColumnStats_8.q.out ql/src/test/results/clientpositive/autoColumnStats_8.q.out index 1d7e966a317e7134ae377e5653f73862d97eba4f..3f77cfc32ca54e5692f1af06b77b9f1d5354b32b 100644 --- ql/src/test/results/clientpositive/autoColumnStats_8.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_8.q.out @@ -47,9 +47,9 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1, Stage-5 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -67,54 +67,15 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: key, value, ds, hr + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string), hr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -125,62 +86,22 @@ STAGE PLANS: Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-12-31/ - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 + columns _col0,_col1,_col2 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, hr - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: '2008-12-31' (type: string), hr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1,_col2,_col3 - columns.types string,string,struct,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -383,38 +304,40 @@ STAGE PLANS: /srcpart/ds=2008-04-09/hr=12 [srcpart] Needs Tagging: false Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -449,57 +372,19 @@ STAGE PLANS: Basic Stats Work: #### A masked pattern was here #### - Stage: Stage-1 - Move Operator - tables: - partition: - ds 2008-12-31 - hr - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - Stage: Stage-4 - Stats Work - Basic Stats Work: -#### A masked pattern was here #### - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.nzhang_part8 - Is Table Level Stats: false - - Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: '2008-12-31' (type: string), _col1 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col2 (type: struct), _col3 (type: struct) + value expressions: _col0 (type: string), _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -511,8 +396,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2,_col3 - columns.types string,string,struct,struct + columns _col0,_col1,_col2 + columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -521,8 +406,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col1,_col2,_col3 - columns.types string,string,struct,struct + columns _col0,_col1,_col2 + columns.types string,string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -530,38 +415,79 @@ STAGE PLANS: #### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: '2008-12-31' (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 2 +#### A masked pattern was here #### + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-12-31/ + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-1 + Move Operator + tables: + partition: + ds 2008-12-31 + hr + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types struct:struct:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + + Stage: Stage-5 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false PREHOOK: query: from srcpart insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index 5f74166e894be125729fcacd06045e24fef64c87..31f96ff3a27bee68ea27371a2550c31cd3774786 100644 --- ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -251,7 +251,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 410 + totalSize 409 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -296,7 +296,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 419 + totalSize 420 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -454,7 +454,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 410 + totalSize 409 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -499,7 +499,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 419 + totalSize 420 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -685,7 +685,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 410 + totalSize 409 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -730,7 +730,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 419 + totalSize 420 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -888,7 +888,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 410 + totalSize 409 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -933,7 +933,7 @@ STAGE PLANS: serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 419 + totalSize 420 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde diff --git ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out index 1fa9e94aa7ef7d23949bf2d48ecd6b6a81dee14a..668400faa09d415277e258238035c12ea619a510 100644 --- ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out +++ ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out @@ -39,17 +39,18 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - sort order: + + sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/insert_into6.q.out ql/src/test/results/clientpositive/insert_into6.q.out index 964e259771d619197879410aafde42c0fea242f1..e369831a25440b6a0cc335112452880c7929fb81 100644 --- ql/src/test/results/clientpositive/insert_into6.q.out +++ ql/src/test/results/clientpositive/insert_into6.q.out @@ -134,13 +134,8 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into6b PARTITION (ds) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -153,23 +148,26 @@ STAGE PLANS: expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into6b - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + value expressions: _col0 (type: int), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_into6b Stage: Stage-0 Move Operator @@ -187,36 +185,6 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into6b - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into6b - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT INTO TABLE insert_into6b PARTITION (ds) SELECT * FROM insert_into6a PREHOOK: type: QUERY PREHOOK: Input: default@insert_into6a diff --git ql/src/test/results/clientpositive/load_dyn_part1.q.out ql/src/test/results/clientpositive/load_dyn_part1.q.out index b43bd31128aed967f9f4f45bcbe03d07d704c13c..749de0c55d8b5b3219b0e21bd73ab43b04da5a6a 100644 --- ql/src/test/results/clientpositive/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/load_dyn_part1.q.out @@ -53,20 +53,11 @@ insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, v POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -82,14 +73,12 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 + value expressions: _col0 (type: string), _col1 (type: string) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -99,21 +88,24 @@ STAGE PLANS: Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part1 Stage: Stage-0 Move Operator @@ -136,40 +128,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part2 Stage: Stage-1 Move Operator @@ -184,40 +162,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 - Stage: Stage-9 + Stage: Stage-5 Stats Work Basic Stats Work: - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git ql/src/test/results/clientpositive/load_dyn_part10.q.out ql/src/test/results/clientpositive/load_dyn_part10.q.out index aea7798d0ba087b66a7d41d69481d924d8e59f0a..a840d7b11ea7c1888093685278475dbffa7a58da 100644 --- ql/src/test/results/clientpositive/load_dyn_part10.q.out +++ ql/src/test/results/clientpositive/load_dyn_part10.q.out @@ -57,14 +57,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part10 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part10 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/load_dyn_part14.q.out ql/src/test/results/clientpositive/load_dyn_part14.q.out index 8e951b6e3e8bf6f4b93426cd8a489c0174e3ea8a..bc4aa19b103ef31999a8825cdda498523be93a69 100644 --- ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -44,16 +44,11 @@ select key, value from ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-9 is a root stage - Stage-10 is a root stage + Stage-4 is a root stage + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -92,45 +87,44 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + value expressions: _col0 (type: string) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + value expressions: _col0 (type: string) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 Stage: Stage-0 Move Operator @@ -152,36 +146,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -208,7 +172,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-10 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/load_dyn_part3.q.out ql/src/test/results/clientpositive/load_dyn_part3.q.out index c63b4519f3ce56b099e42c254ec2699fdfe0ddb8..a1e7e9628d149090ca88bd77a5bc9a470278bf03 100644 --- ql/src/test/results/clientpositive/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/load_dyn_part3.q.out @@ -55,14 +55,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part3 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part3 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/load_dyn_part4.q.out ql/src/test/results/clientpositive/load_dyn_part4.q.out index cf571921fcaec4da811d98f2c1097b08459d1d20..a632127a1c5510f2f4481e77e2e23abe4a79f1ac 100644 --- ql/src/test/results/clientpositive/load_dyn_part4.q.out +++ ql/src/test/results/clientpositive/load_dyn_part4.q.out @@ -65,14 +65,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part4 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part4 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/load_dyn_part8.q.out ql/src/test/results/clientpositive/load_dyn_part8.q.out index 9e20fdc921ddde20abaf4c7357cf22c2acc86c5f..cdc2ddf22f0d4652546722bb01dd77b839e686c7 100644 --- ql/src/test/results/clientpositive/load_dyn_part8.q.out +++ ql/src/test/results/clientpositive/load_dyn_part8.q.out @@ -47,8 +47,9 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -66,35 +67,15 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -105,33 +86,21 @@ STAGE PLANS: Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-12-31/ - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 + columns _col0,_col1,_col2 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### @@ -333,6 +302,42 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -367,6 +372,85 @@ STAGE PLANS: Basic Stats Work: #### A masked pattern was here #### + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 2 +#### A masked pattern was here #### + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-12-31/ + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Stage: Stage-1 Move Operator tables: @@ -395,7 +479,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part8 - Stage: Stage-4 + Stage: Stage-5 Stats Work Basic Stats Work: #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/load_dyn_part9.q.out ql/src/test/results/clientpositive/load_dyn_part9.q.out index 77e689ca2d7c315e91bbe81a5f4063864c665a07..08ad3402163155c343b1ad38b000d99acb8382ea 100644 --- ql/src/test/results/clientpositive/load_dyn_part9.q.out +++ ql/src/test/results/clientpositive/load_dyn_part9.q.out @@ -57,14 +57,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part9 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part9 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/merge3.q.out ql/src/test/results/clientpositive/merge3.q.out index 22a2820dd3348e6585b67e84ab2095f4e02f5d8e..02fe3d40535d84105c91b811e37f4575e94a823f 100644 --- ql/src/test/results/clientpositive/merge3.q.out +++ ql/src/test/results/clientpositive/merge3.q.out @@ -2392,35 +2392,15 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.merge_src_part2 - partition_columns ds - partition_columns.types string - serialization.ddl struct merge_src_part2 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_src_part2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2440,7 +2420,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.merge_src_part - numFiles 2 + numFiles 1 numRows 1000 partition_columns ds partition_columns.types string @@ -2487,7 +2467,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.merge_src_part - numFiles 2 + numFiles 1 numRows 1000 partition_columns ds partition_columns.types string @@ -2521,6 +2501,42 @@ STAGE PLANS: Truncated Path -> Alias: /merge_src_part/ds=2008-04-08 [merge_src_part] /merge_src_part/ds=2008-04-09 [merge_src_part] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.merge_src_part2 + partition_columns ds + partition_columns.types string + serialization.ddl struct merge_src_part2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_src_part2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4815,8 +4831,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: + key expressions: _col2 (type: string) + null sort order: a + sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 @@ -4841,7 +4858,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.merge_src_part - numFiles 2 + numFiles 1 numRows 1000 partition_columns ds partition_columns.types string @@ -4888,7 +4905,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.merge_src_part - numFiles 2 + numFiles 1 numRows 1000 partition_columns ds partition_columns.types string @@ -4932,6 +4949,7 @@ STAGE PLANS: compressed: false GlobalTableId: 1 #### A masked pattern was here #### + Dp Sort State: PARTITION_SORTED NumFilesPerFileSink: 1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/merge4.q.out ql/src/test/results/clientpositive/merge4.q.out index de4c593d5302c85fa6fb38fcca55983e1f43a162..5c5d1c937a88ed13bafe4c02b0a1670973b4d469 100644 --- ql/src/test/results/clientpositive/merge4.q.out +++ ql/src/test/results/clientpositive/merge4.q.out @@ -33,14 +33,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part Stage: Stage-7 Conditional Operator @@ -2761,13 +2773,14 @@ select * from ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -2817,17 +2830,40 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part + + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -2846,11 +2882,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -2862,7 +2898,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2874,7 +2910,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out index 68d7bbe12237802eee56498d60ebce6f748e5608..f4ae6a935190327ed112a192214750eac552fa55 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out @@ -151,14 +151,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_dynamic_part + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_dynamic_part Stage: Stage-7 Conditional Operator @@ -274,9 +286,9 @@ outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat columns:struct columns { string key, string value} partitioned:true partitionColumns:struct partition_columns { string ds, string hr} -totalNumberFiles:6 +totalNumberFiles:4 totalFileSize:34830 -maxFileSize:5812 -minFileSize:5791 +maxFileSize:11603 +minFileSize:5812 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index 0da56cd9fca3bc66f57aa30202380caf5977314e..c4b3fc85a5b8d3a22b4602f7c5ea9e436402c4f9 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -152,14 +152,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 2.0) = 0.0), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.merge_dynamic_part + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.merge_dynamic_part Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 45237a15b94c9913acc4b3735ac828bde2ffcd76..7087e4859c584313b47341d40e74789a4f2885b0 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -128,14 +128,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.merge_dynamic_part + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.merge_dynamic_part Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/orc_int_type_promotion.q.out ql/src/test/results/clientpositive/orc_int_type_promotion.q.out index d3837a3bd898007b686504c7cdc54ad33ca80084..4aabca6f86b809832cd31b88761fafbfd3feac5b 100644 --- ql/src/test/results/clientpositive/orc_int_type_promotion.q.out +++ ql/src/test/results/clientpositive/orc_int_type_promotion.q.out @@ -258,15 +258,15 @@ POSTHOOK: Input: default@src_part_orc@ds=2008-04-08 POSTHOOK: Input: default@src_part_orc@ds=2008-04-09 #### A masked pattern was here #### 238 val_238 2008-04-08 -86 val_86 2008-04-08 -311 val_311 2008-04-08 -27 val_27 2008-04-08 -165 val_165 2008-04-08 -409 val_409 2008-04-08 -255 val_255 2008-04-08 -278 val_278 2008-04-08 -98 val_98 2008-04-08 -484 val_484 2008-04-08 +97 val_97 2008-04-08 +200 val_200 2008-04-08 +400 val_400 2008-04-08 +403 val_403 2008-04-08 +169 val_169 2008-04-08 +90 val_90 2008-04-08 +126 val_126 2008-04-08 +222 val_222 2008-04-08 +477 val_477 2008-04-08 PREHOOK: query: alter table src_part_orc change key key bigint PREHOOK: type: ALTERTABLE_RENAMECOL PREHOOK: Input: default@src_part_orc @@ -288,12 +288,12 @@ POSTHOOK: Input: default@src_part_orc@ds=2008-04-08 POSTHOOK: Input: default@src_part_orc@ds=2008-04-09 #### A masked pattern was here #### 238 val_238 2008-04-08 -86 val_86 2008-04-08 -311 val_311 2008-04-08 -27 val_27 2008-04-08 -165 val_165 2008-04-08 -409 val_409 2008-04-08 -255 val_255 2008-04-08 -278 val_278 2008-04-08 -98 val_98 2008-04-08 -484 val_484 2008-04-08 +97 val_97 2008-04-08 +200 val_200 2008-04-08 +400 val_400 2008-04-08 +403 val_403 2008-04-08 +169 val_169 2008-04-08 +90 val_90 2008-04-08 +126 val_126 2008-04-08 +222 val_222 2008-04-08 +477 val_477 2008-04-08 diff --git ql/src/test/results/clientpositive/orc_merge2.q.out ql/src/test/results/clientpositive/orc_merge2.q.out index 5e8a1f0ccbadc1027b6f9ff2c0b8f64f6907e464..cbf1f9ea1451efe3863c9c308dbb928eff6aae0e 100644 --- ql/src/test/results/clientpositive/orc_merge2.q.out +++ ql/src/test/results/clientpositive/orc_merge2.q.out @@ -26,13 +26,8 @@ POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one=' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -45,23 +40,26 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col3 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge2a - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + value expressions: _col0 (type: int), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge2a Stage: Stage-0 Move Operator @@ -81,26 +79,6 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - ORC File Merge Operator - merge level: stripe - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - ORC File Merge Operator - merge level: stripe - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, PMOD(HASH(value), 10) as three diff --git ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out index 9c62fb344bc7778565cfb5f665e83e076d94f135..c3bfa2de01a099d351c9136bba793f1a5ff69f86 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out @@ -61,9 +61,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -75,14 +78,17 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 + value expressions: _col0 (type: string), _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -90,14 +96,42 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part1 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part2 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out index ad4668e14950ac980e7559e1219d4f2b7ddaae6f..304a786811cba75343a5f33d88de649f13efb759 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out @@ -49,6 +49,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -60,14 +62,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part10 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part10 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out index 08d63506c01d7994e3d0e62b86a996c14e2ff3f1..f59a8ea5972a4d7f3269e546f3541066514a5529 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out @@ -52,8 +52,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) - Reducer 6 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (GROUP, 1) + Reducer 7 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -79,15 +80,28 @@ STAGE PLANS: expressions: 'k1' (type: string), null (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 - Reducer 4 + value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 + Reducer 5 Reduce Operator Tree: Limit Number of rows: 2 @@ -96,15 +110,13 @@ STAGE PLANS: expressions: 'k2' (type: string), '' (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 - Reducer 6 + value expressions: _col0 (type: string) + Reducer 7 Reduce Operator Tree: Limit Number of rows: 2 @@ -113,14 +125,12 @@ STAGE PLANS: expressions: 'k3' (type: string), ' ' (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + value expressions: _col0 (type: string) Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out index ca6729aba8d4133c31bd140c53e15fd81f5e1bd7..69c520677e2d6ccbbc182bd1b7d6e1b330218309 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out @@ -47,6 +47,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -58,14 +60,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part3 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part3 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out index 368c08a501dc662844bf3ff9e8c12c7a6280d2a0..e7a2f4fe0c45d445144ce27a74b439d844085974 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out @@ -57,6 +57,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -68,14 +70,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part4 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part4 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out index 4fc0d8c31ac78b855379ff9ce1ea0e9c31006f5c..25679366d8861b2b52feb8e464c343a39c2f5a63 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out @@ -34,6 +34,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -45,14 +47,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part5 + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part5 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out index fea2d518acf00bcebd0e4871b3755aba33241158..ddb8c41041d2807f28803d8abcc4224cf7c2a97b 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out @@ -53,9 +53,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -69,35 +72,221 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -106,36 +295,15 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 2 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-12-31/ + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -336,6 +504,81 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 2 +#### A masked pattern was here #### + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-12-31/ + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out index 4bb08a05d90f447c4362610e9014c385703e5949..3d7f16506b9c63149960cb19dc3ccf0b17de68d7 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out @@ -49,6 +49,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -60,14 +62,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part9 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part9 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/stats2.q.out ql/src/test/results/clientpositive/spark/stats2.q.out index f609702e01b6a249f88272b6482827c3e5d46ac7..2cf462e1a04b2b53b1b52d885bbee0dce0476a19 100644 --- ql/src/test/results/clientpositive/spark/stats2.q.out +++ ql/src/test/results/clientpositive/spark/stats2.q.out @@ -19,6 +19,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -30,14 +32,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.analyze_t1 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.analyze_t1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_15.q.out ql/src/test/results/clientpositive/spark/union_remove_15.q.out index 8a25ceba73f4bce89cdc1b950f60c2a81c1a7faa..8ecd7a7b45c3908ca3d5107292662577cecfc4d3 100644 --- ql/src/test/results/clientpositive/spark/union_remove_15.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_15.q.out @@ -49,7 +49,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 4 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -85,15 +86,28 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Reducer 4 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -105,14 +119,12 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-0 Move Operator @@ -172,11 +184,11 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 4 + numFiles 2 numPartitions 2 numRows 0 rawDataSize 0 - totalSize 332 + totalSize 178 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/spark/union_remove_16.q.out ql/src/test/results/clientpositive/spark/union_remove_16.q.out index 8594301cbc92a9876abf6aa301d4fb120bcb1575..abc4376e7c779346b0d767fc49378c0ef5975e84 100644 --- ql/src/test/results/clientpositive/spark/union_remove_16.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_16.q.out @@ -54,7 +54,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 4 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -90,15 +91,28 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Reducer 4 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -110,14 +124,12 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-6 Conditional Operator @@ -214,11 +226,11 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 4 + numFiles 2 numPartitions 2 numRows 0 rawDataSize 0 - totalSize 332 + totalSize 178 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/spark/union_remove_17.q.out ql/src/test/results/clientpositive/spark/union_remove_17.q.out index cef4255535ef09008ee74872460a6f39b954dcbe..df95041a43e664fcb734a17c48f81ada94aac734 100644 --- ql/src/test/results/clientpositive/spark/union_remove_17.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_17.q.out @@ -47,6 +47,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -62,15 +64,13 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Map 2 + value expressions: _col0 (type: string), _col1 (type: bigint) + Map 3 Map Operator Tree: TableScan alias: inputtbl1 @@ -83,14 +83,27 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_18.q.out ql/src/test/results/clientpositive/spark/union_remove_18.q.out index 51f2f163aa8b3bf7e4e3f268b5b191c663ed1a10..b276a937ecd65397644628d3039d9171889cbcee 100644 --- ql/src/test/results/clientpositive/spark/union_remove_18.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_18.q.out @@ -49,7 +49,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 4 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -85,34 +85,27 @@ STAGE PLANS: expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -184,7 +177,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 12 + numFiles 6 numPartitions 6 numRows 0 rawDataSize 0 diff --git ql/src/test/results/clientpositive/spark/union_remove_25.q.out ql/src/test/results/clientpositive/spark/union_remove_25.q.out index f681428785c1bff7c1a91997c3609c051f3fd2b3..84a9f6d4bda041c067fffcd4d8c47176bab2278f 100644 --- ql/src/test/results/clientpositive/spark/union_remove_25.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_25.q.out @@ -404,7 +404,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -437,35 +437,27 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - Reducer 4 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1000 - Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 Stage: Stage-0 Move Operator @@ -538,7 +530,7 @@ Database: default Table: outputtbl3 #### A masked pattern was here #### Partition Parameters: - numFiles 2 + numFiles 1 totalSize 6812 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/stats2.q.out ql/src/test/results/clientpositive/stats2.q.out index 65691d319c3a9425b0011e2031ca0935e574adef..42c91d092c896b1835d68cb8ee905bb885ad86e1 100644 --- ql/src/test/results/clientpositive/stats2.q.out +++ ql/src/test/results/clientpositive/stats2.q.out @@ -27,14 +27,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.analyze_t1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.analyze_t1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/stats4.q.out ql/src/test/results/clientpositive/stats4.q.out index 741e47b9480713b2c7b4d292b07fca3c6d31a88f..c3544f265ffc713c792cafade31254d750dff267 100644 --- ql/src/test/results/clientpositive/stats4.q.out +++ ql/src/test/results/clientpositive/stats4.q.out @@ -44,20 +44,11 @@ insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, v POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -73,14 +64,12 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 + value expressions: _col0 (type: string), _col1 (type: string) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -90,21 +79,24 @@ STAGE PLANS: Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part1 Stage: Stage-0 Move Operator @@ -127,40 +119,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part2 Stage: Stage-1 Move Operator @@ -175,40 +153,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 - Stage: Stage-9 + Stage: Stage-5 Stats Work Basic Stats Work: - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out index 54566546bb4745bd94ef3a82c1a4e3a7a233ba92..faaef749f688f6f16513f5bc82f0caf771223e8d 100644 --- ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out +++ ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out @@ -12,13 +12,8 @@ POSTHOOK: query: explain insert overwrite table tmptable partition (part) select POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -34,23 +29,26 @@ STAGE PLANS: expressions: 'no_such_value' (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Stage: Stage-0 Move Operator @@ -68,36 +66,6 @@ STAGE PLANS: Stats Work Basic Stats Work: - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: insert overwrite table tmptable partition (part) select key, value from src where key = 'no_such_value' PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/union_remove_15.q.out ql/src/test/results/clientpositive/union_remove_15.q.out index 2934c4c0e9c2bf878d3427bd42e17e4ab5a5891b..5f37c10bc9e05debe3d046e76a595fa73ab081ab 100644 --- ql/src/test/results/clientpositive/union_remove_15.q.out +++ ql/src/test/results/clientpositive/union_remove_15.q.out @@ -42,8 +42,9 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1, Stage-2 - Stage-2 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage STAGE PLANS: Stage: Stage-1 @@ -81,12 +82,46 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -100,7 +135,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -135,12 +170,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: insert overwrite table outputTbl1 partition (ds) SELECT * diff --git ql/src/test/results/clientpositive/union_remove_16.q.out ql/src/test/results/clientpositive/union_remove_16.q.out index 8be5ddd51162dbf691e0112fb314b0d695e957a6..b196c29f7bdf7800b987c21e6ed0bbae548b4b9f 100644 --- ql/src/test/results/clientpositive/union_remove_16.q.out +++ ql/src/test/results/clientpositive/union_remove_16.q.out @@ -42,13 +42,14 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-6 depends on stages: Stage-1, Stage-7 , consists of Stage-3, Stage-2, Stage-4 - Stage-3 - Stage-0 depends on stages: Stage-3, Stage-2, Stage-5 - Stage-2 + Stage-2 depends on stages: Stage-1, Stage-8 + Stage-7 depends on stages: Stage-2 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-5 depends on stages: Stage-4 - Stage-7 is a root stage + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage STAGE PLANS: Stage: Stage-1 @@ -86,17 +87,51 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + + Stage: Stage-7 Conditional Operator - Stage: Stage-3 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -114,27 +149,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 - Stage: Stage-2 + Stage: Stage-3 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-4 + Stage: Stage-5 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-5 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -169,12 +204,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: insert overwrite table outputTbl1 partition (ds) SELECT * diff --git ql/src/test/results/clientpositive/union_remove_17.q.out ql/src/test/results/clientpositive/union_remove_17.q.out index 4d35596b02e72c232cb5a1a718ef5f381cc8bc64..a05aaaa84df6ee28d9c7a64ba1786d3fc060d7f7 100644 --- ql/src/test/results/clientpositive/union_remove_17.q.out +++ ql/src/test/results/clientpositive/union_remove_17.q.out @@ -61,14 +61,12 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: inputtbl1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -82,14 +80,26 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/union_remove_18.q.out ql/src/test/results/clientpositive/union_remove_18.q.out index 3e181e31e1af341d456b669922996f59abd688a6..0996441f62a33222e81d3411ebac5b9ae4911ed3 100644 --- ql/src/test/results/clientpositive/union_remove_18.q.out +++ ql/src/test/results/clientpositive/union_remove_18.q.out @@ -42,8 +42,9 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1, Stage-2 - Stage-2 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage STAGE PLANS: Stage: Stage-1 @@ -81,12 +82,46 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -100,7 +135,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -135,12 +170,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: insert overwrite table outputTbl1 partition (ds) SELECT * @@ -200,7 +233,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 12 + numFiles 6 numPartitions 6 numRows 0 rawDataSize 0 diff --git ql/src/test/results/clientpositive/union_remove_25.q.out ql/src/test/results/clientpositive/union_remove_25.q.out index 20ab809cb1244617d03767a6f718c82cea1215a2..0634bc62d037af1dec4e489ccb64fe8ccdf16408 100644 --- ql/src/test/results/clientpositive/union_remove_25.q.out +++ ql/src/test/results/clientpositive/union_remove_25.q.out @@ -425,8 +425,9 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1, Stage-2 - Stage-2 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage STAGE PLANS: Stage: Stage-1 @@ -455,18 +456,56 @@ STAGE PLANS: Limit Number of rows: 1000 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 Stage: Stage-0 Move Operator @@ -481,7 +520,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -507,18 +546,12 @@ STAGE PLANS: Limit Number of rows: 1000 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: insert overwrite table outputTbl3 partition(ds, hr) SELECT * @@ -578,7 +611,7 @@ Database: default Table: outputtbl3 #### A masked pattern was here #### Partition Parameters: - numFiles 2 + numFiles 1 totalSize 6812 #### A masked pattern was here ####